From 46a355d1486563bd4f215ae9a09c6a7cacb2fd48 Mon Sep 17 00:00:00 2001 From: Ariana Cursino Date: Sat, 20 Jun 2026 13:35:32 -0300 Subject: [PATCH 1/2] feat: implement streamlit cached github client and active issue metrics stream --- .gitignore | 2 + requirements.txt | 1 + src/app.py | 25 ++++++++---- src/github_client.py | 97 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 116 insertions(+), 9 deletions(-) create mode 100644 src/github_client.py diff --git a/.gitignore b/.gitignore index b810167..6c0b001 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,8 @@ env/ # Streamlit logs and dynamic caching .streamlit/ .streamlit/config.toml +.streamlit/secrets.toml + # Linter profiles and caches .black diff --git a/requirements.txt b/requirements.txt index 60b9ddf..b5f8435 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ pandas>=2.0.0 plotly>=5.15.0 requests>=2.31.0 Pillow>=10.0.0 +PyGithub>=2.1.1 # Code Quality & Linters black>=23.0.0 diff --git a/src/app.py b/src/app.py index 96e12a8..b046fe2 100644 --- a/src/app.py +++ b/src/app.py @@ -1,4 +1,9 @@ import streamlit as st +import pandas as pd # Added to handle missing or null metric values (pd.notna) +from github_client import calculate_community_health # Added to access your metric module + +# Automatically pull metrics for the main ScanAPI repository +metrics = calculate_community_health("scanapi/scanapi") # Initial page configuration st.set_page_config( @@ -33,26 +38,28 @@ with tab_onboarding: st.header("Contributor Onboarding Hub") st.subheader("Lowering the barrier to entry") - st.info( - "Feature incoming: Dynamic aggregation of " - "'good first issue' and 'help wanted' labels." - ) - # Visual example of metric cards (TTFR and TTM) + # Visual metrics extracted dynamically from the GitHub client module col1, col2 = st.columns(2) with col1: st.metric( label="Avg Time-to-First-Response (TTFR)", - value="⏳ Loading...", - delta="Target: < 24h", + value=f"{metrics['avg_ttfr_hours']:.2f} hrs" if pd.notna(metrics["avg_ttfr_hours"]) else "N/A", + #delta="Target: < 24h", ) with col2: st.metric( label="Avg Time-to-Merge (TTM)", - value="⏳ Loading...", - delta="Target: < 48h", + value=f"{metrics['avg_ttm_hours']:.2f} hrs" if pd.notna(metrics["avg_ttm_hours"]) else "N/A", + #delta="Target: < 48h", ) + # Render raw issues dataframe beneath metrics if data exists + if not metrics["issues_df"].empty: + st.subheader("📋 Active Issue Stream") + st.dataframe(metrics["issues_df"], use_container_width=True) + + with tab_leaderboard: st.header("Community Wall of Fame") st.subheader("Celebrating our active contributors") diff --git a/src/github_client.py b/src/github_client.py new file mode 100644 index 0000000..59aa163 --- /dev/null +++ b/src/github_client.py @@ -0,0 +1,97 @@ +import pandas as pd +from github import Github, RateLimitExceededException +import streamlit as st + +def get_github_client(): + """ + Fetches the token from secrets.toml and initializes the GitHub client. + Streamlit automatically searches within .streamlit/secrets.toml. + """ + token = st.secrets.get("GITHUB_TOKEN", None) + if token: + return Github(token) + return Github() # Fallback to unauthenticated client (significantly lower rate limits) + +@st.cache_data(ttl=900, show_spinner="Fetching GitHub community metrics...") +def fetch_raw_repo_data(repo_name: str): + """ + Fetches raw issues and pull requests data from the specified repository. + Caches data for 15 minutes to preserve API rate limit allocations. + """ + g = get_github_client() + + try: + repo = g.get_repo(repo_name) + issues_data = [] + pulls_data = [] + + # Limiting to the 100 most recent items to prevent severe API degradation + for issue in repo.get_issues(state='all')[:100]: + base_info = { + "id": issue.id, + "number": issue.number, + "title": issue.title, + "created_at": issue.created_at, + "closed_at": issue.closed_at, + "labels": [label.name for label in issue.labels], + "assignee": issue.assignee.login if issue.assignee else None, + } + + if issue.pull_request: + # Process item as a Pull Request + pr = repo.get_pull(issue.number) + base_info["merged_at"] = pr.merged_at + base_info["is_merged"] = pr.merged + pulls_data.append(base_info) + else: + # Process item as a standard Issue (calculates baseline response timing) + comments = issue.get_comments() + if comments.totalCount > 0: + # Safely extract the created_at attribute from the very first comment item + first_comment_time = comments[0].created_at + else: + first_comment_time = None + base_info["first_response_at"] = first_comment_time + issues_data.append(base_info) + + return {"issues": issues_data, "pulls": pulls_data} + + except RateLimitExceededException: + st.error("💥 GitHub API Rate limit reached! Serving empty fallback arrays.") + return {"issues": [], "pulls": []} + +def calculate_community_health(repo_name: str): + """ + Transforms raw dictionary arrays into structured DataFrames. + Calculates key performance metrics: Time-to-First-Response (TTFR) and Time-to-Merge (TTM). + """ + raw_data = fetch_raw_repo_data(repo_name) + + df_issues = pd.DataFrame(raw_data["issues"]) + df_pulls = pd.DataFrame(raw_data["pulls"]) + + # --- Time-to-First-Response (TTFR) Calculation in Hours --- + if not df_issues.empty: + df_issues['created_at'] = pd.to_datetime(df_issues['created_at']) + df_issues['first_response_at'] = pd.to_datetime(df_issues['first_response_at']) + df_issues['ttfr_hours'] = (df_issues['first_response_at'] - df_issues['created_at']).dt.total_seconds() / 3600 + avg_ttfr = df_issues['ttfr_hours'].mean() + else: + avg_ttfr = None + + # --- Time-to-Merge (TTM) Calculation in Hours --- + if not df_pulls.empty: + df_pulls['created_at'] = pd.to_datetime(df_pulls['created_at']) + df_pulls['merged_at'] = pd.to_datetime(df_pulls['merged_at']) + merged_prs = df_pulls[df_pulls['is_merged'] == True].copy() + merged_prs['ttm_hours'] = (merged_prs['merged_at'] - merged_prs['created_at']).dt.total_seconds() / 3600 + avg_ttm = merged_prs['ttm_hours'].mean() + else: + avg_ttm = None + + return { + "issues_df": df_issues, + "pulls_df": df_pulls, + "avg_ttfr_hours": avg_ttfr, + "avg_ttm_hours": avg_ttm + } From 90b21bc5402c4864029dcb48db394890af9bae45 Mon Sep 17 00:00:00 2001 From: Ariana Cursino Date: Sat, 20 Jun 2026 13:44:44 -0300 Subject: [PATCH 2/2] style: run black formatter to fix CI linting errors --- src/app.py | 20 +++++++++++++++----- src/github_client.py | 33 +++++++++++++++++++++------------ 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/src/app.py b/src/app.py index b046fe2..80ee295 100644 --- a/src/app.py +++ b/src/app.py @@ -1,6 +1,8 @@ import streamlit as st import pandas as pd # Added to handle missing or null metric values (pd.notna) -from github_client import calculate_community_health # Added to access your metric module +from github_client import ( + calculate_community_health, +) # Added to access your metric module # Automatically pull metrics for the main ScanAPI repository metrics = calculate_community_health("scanapi/scanapi") @@ -44,14 +46,22 @@ with col1: st.metric( label="Avg Time-to-First-Response (TTFR)", - value=f"{metrics['avg_ttfr_hours']:.2f} hrs" if pd.notna(metrics["avg_ttfr_hours"]) else "N/A", - #delta="Target: < 24h", + value=( + f"{metrics['avg_ttfr_hours']:.2f} hrs" + if pd.notna(metrics["avg_ttfr_hours"]) + else "N/A" + ), + # delta="Target: < 24h", ) with col2: st.metric( label="Avg Time-to-Merge (TTM)", - value=f"{metrics['avg_ttm_hours']:.2f} hrs" if pd.notna(metrics["avg_ttm_hours"]) else "N/A", - #delta="Target: < 48h", + value=( + f"{metrics['avg_ttm_hours']:.2f} hrs" + if pd.notna(metrics["avg_ttm_hours"]) + else "N/A" + ), + # delta="Target: < 48h", ) # Render raw issues dataframe beneath metrics if data exists diff --git a/src/github_client.py b/src/github_client.py index 59aa163..3f4017a 100644 --- a/src/github_client.py +++ b/src/github_client.py @@ -2,6 +2,7 @@ from github import Github, RateLimitExceededException import streamlit as st + def get_github_client(): """ Fetches the token from secrets.toml and initializes the GitHub client. @@ -10,7 +11,10 @@ def get_github_client(): token = st.secrets.get("GITHUB_TOKEN", None) if token: return Github(token) - return Github() # Fallback to unauthenticated client (significantly lower rate limits) + return ( + Github() + ) # Fallback to unauthenticated client (significantly lower rate limits) + @st.cache_data(ttl=900, show_spinner="Fetching GitHub community metrics...") def fetch_raw_repo_data(repo_name: str): @@ -26,7 +30,7 @@ def fetch_raw_repo_data(repo_name: str): pulls_data = [] # Limiting to the 100 most recent items to prevent severe API degradation - for issue in repo.get_issues(state='all')[:100]: + for issue in repo.get_issues(state="all")[:100]: base_info = { "id": issue.id, "number": issue.number, @@ -60,6 +64,7 @@ def fetch_raw_repo_data(repo_name: str): st.error("💥 GitHub API Rate limit reached! Serving empty fallback arrays.") return {"issues": [], "pulls": []} + def calculate_community_health(repo_name: str): """ Transforms raw dictionary arrays into structured DataFrames. @@ -72,20 +77,24 @@ def calculate_community_health(repo_name: str): # --- Time-to-First-Response (TTFR) Calculation in Hours --- if not df_issues.empty: - df_issues['created_at'] = pd.to_datetime(df_issues['created_at']) - df_issues['first_response_at'] = pd.to_datetime(df_issues['first_response_at']) - df_issues['ttfr_hours'] = (df_issues['first_response_at'] - df_issues['created_at']).dt.total_seconds() / 3600 - avg_ttfr = df_issues['ttfr_hours'].mean() + df_issues["created_at"] = pd.to_datetime(df_issues["created_at"]) + df_issues["first_response_at"] = pd.to_datetime(df_issues["first_response_at"]) + df_issues["ttfr_hours"] = ( + df_issues["first_response_at"] - df_issues["created_at"] + ).dt.total_seconds() / 3600 + avg_ttfr = df_issues["ttfr_hours"].mean() else: avg_ttfr = None # --- Time-to-Merge (TTM) Calculation in Hours --- if not df_pulls.empty: - df_pulls['created_at'] = pd.to_datetime(df_pulls['created_at']) - df_pulls['merged_at'] = pd.to_datetime(df_pulls['merged_at']) - merged_prs = df_pulls[df_pulls['is_merged'] == True].copy() - merged_prs['ttm_hours'] = (merged_prs['merged_at'] - merged_prs['created_at']).dt.total_seconds() / 3600 - avg_ttm = merged_prs['ttm_hours'].mean() + df_pulls["created_at"] = pd.to_datetime(df_pulls["created_at"]) + df_pulls["merged_at"] = pd.to_datetime(df_pulls["merged_at"]) + merged_prs = df_pulls[df_pulls["is_merged"] == True].copy() + merged_prs["ttm_hours"] = ( + merged_prs["merged_at"] - merged_prs["created_at"] + ).dt.total_seconds() / 3600 + avg_ttm = merged_prs["ttm_hours"].mean() else: avg_ttm = None @@ -93,5 +102,5 @@ def calculate_community_health(repo_name: str): "issues_df": df_issues, "pulls_df": df_pulls, "avg_ttfr_hours": avg_ttfr, - "avg_ttm_hours": avg_ttm + "avg_ttm_hours": avg_ttm, }