diff --git a/.gitignore b/.gitignore index b810167..6c0b001 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,8 @@ env/ # Streamlit logs and dynamic caching .streamlit/ .streamlit/config.toml +.streamlit/secrets.toml + # Linter profiles and caches .black diff --git a/requirements.txt b/requirements.txt index 60b9ddf..b5f8435 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ pandas>=2.0.0 plotly>=5.15.0 requests>=2.31.0 Pillow>=10.0.0 +PyGithub>=2.1.1 # Code Quality & Linters black>=23.0.0 diff --git a/src/app.py b/src/app.py index 96e12a8..80ee295 100644 --- a/src/app.py +++ b/src/app.py @@ -1,4 +1,11 @@ import streamlit as st +import pandas as pd # Added to handle missing or null metric values (pd.notna) +from github_client import ( + calculate_community_health, +) # Added to access your metric module + +# Automatically pull metrics for the main ScanAPI repository +metrics = calculate_community_health("scanapi/scanapi") # Initial page configuration st.set_page_config( @@ -33,26 +40,36 @@ with tab_onboarding: st.header("Contributor Onboarding Hub") st.subheader("Lowering the barrier to entry") - st.info( - "Feature incoming: Dynamic aggregation of " - "'good first issue' and 'help wanted' labels." - ) - # Visual example of metric cards (TTFR and TTM) + # Visual metrics extracted dynamically from the GitHub client module col1, col2 = st.columns(2) with col1: st.metric( label="Avg Time-to-First-Response (TTFR)", - value="⏳ Loading...", - delta="Target: < 24h", + value=( + f"{metrics['avg_ttfr_hours']:.2f} hrs" + if pd.notna(metrics["avg_ttfr_hours"]) + else "N/A" + ), + # delta="Target: < 24h", ) with col2: st.metric( label="Avg Time-to-Merge (TTM)", - value="⏳ Loading...", - delta="Target: < 48h", + value=( + f"{metrics['avg_ttm_hours']:.2f} hrs" + if pd.notna(metrics["avg_ttm_hours"]) + else "N/A" + ), + # delta="Target: < 48h", ) + # Render raw issues dataframe beneath metrics if data exists + if not metrics["issues_df"].empty: + st.subheader("📋 Active Issue Stream") + st.dataframe(metrics["issues_df"], use_container_width=True) + + with tab_leaderboard: st.header("Community Wall of Fame") st.subheader("Celebrating our active contributors") diff --git a/src/github_client.py b/src/github_client.py new file mode 100644 index 0000000..3f4017a --- /dev/null +++ b/src/github_client.py @@ -0,0 +1,106 @@ +import pandas as pd +from github import Github, RateLimitExceededException +import streamlit as st + + +def get_github_client(): + """ + Fetches the token from secrets.toml and initializes the GitHub client. + Streamlit automatically searches within .streamlit/secrets.toml. + """ + token = st.secrets.get("GITHUB_TOKEN", None) + if token: + return Github(token) + return ( + Github() + ) # Fallback to unauthenticated client (significantly lower rate limits) + + +@st.cache_data(ttl=900, show_spinner="Fetching GitHub community metrics...") +def fetch_raw_repo_data(repo_name: str): + """ + Fetches raw issues and pull requests data from the specified repository. + Caches data for 15 minutes to preserve API rate limit allocations. + """ + g = get_github_client() + + try: + repo = g.get_repo(repo_name) + issues_data = [] + pulls_data = [] + + # Limiting to the 100 most recent items to prevent severe API degradation + for issue in repo.get_issues(state="all")[:100]: + base_info = { + "id": issue.id, + "number": issue.number, + "title": issue.title, + "created_at": issue.created_at, + "closed_at": issue.closed_at, + "labels": [label.name for label in issue.labels], + "assignee": issue.assignee.login if issue.assignee else None, + } + + if issue.pull_request: + # Process item as a Pull Request + pr = repo.get_pull(issue.number) + base_info["merged_at"] = pr.merged_at + base_info["is_merged"] = pr.merged + pulls_data.append(base_info) + else: + # Process item as a standard Issue (calculates baseline response timing) + comments = issue.get_comments() + if comments.totalCount > 0: + # Safely extract the created_at attribute from the very first comment item + first_comment_time = comments[0].created_at + else: + first_comment_time = None + base_info["first_response_at"] = first_comment_time + issues_data.append(base_info) + + return {"issues": issues_data, "pulls": pulls_data} + + except RateLimitExceededException: + st.error("💥 GitHub API Rate limit reached! Serving empty fallback arrays.") + return {"issues": [], "pulls": []} + + +def calculate_community_health(repo_name: str): + """ + Transforms raw dictionary arrays into structured DataFrames. + Calculates key performance metrics: Time-to-First-Response (TTFR) and Time-to-Merge (TTM). + """ + raw_data = fetch_raw_repo_data(repo_name) + + df_issues = pd.DataFrame(raw_data["issues"]) + df_pulls = pd.DataFrame(raw_data["pulls"]) + + # --- Time-to-First-Response (TTFR) Calculation in Hours --- + if not df_issues.empty: + df_issues["created_at"] = pd.to_datetime(df_issues["created_at"]) + df_issues["first_response_at"] = pd.to_datetime(df_issues["first_response_at"]) + df_issues["ttfr_hours"] = ( + df_issues["first_response_at"] - df_issues["created_at"] + ).dt.total_seconds() / 3600 + avg_ttfr = df_issues["ttfr_hours"].mean() + else: + avg_ttfr = None + + # --- Time-to-Merge (TTM) Calculation in Hours --- + if not df_pulls.empty: + df_pulls["created_at"] = pd.to_datetime(df_pulls["created_at"]) + df_pulls["merged_at"] = pd.to_datetime(df_pulls["merged_at"]) + merged_prs = df_pulls[df_pulls["is_merged"] == True].copy() + merged_prs["ttm_hours"] = ( + merged_prs["merged_at"] - merged_prs["created_at"] + ).dt.total_seconds() / 3600 + avg_ttm = merged_prs["ttm_hours"].mean() + else: + avg_ttm = None + + return { + "issues_df": df_issues, + "pulls_df": df_pulls, + "avg_ttfr_hours": avg_ttfr, + "avg_ttm_hours": avg_ttm, + }