From 46a355d1486563bd4f215ae9a09c6a7cacb2fd48 Mon Sep 17 00:00:00 2001
From: Ariana Cursino <arcursino@gmail.com>
Date: Sat, 20 Jun 2026 13:35:32 -0300
Subject: [PATCH 1/2] feat: implement streamlit cached github client and active
 issue metrics stream

---
 .gitignore           |  2 +
 requirements.txt     |  1 +
 src/app.py           | 25 ++++++++----
 src/github_client.py | 97 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 116 insertions(+), 9 deletions(-)
 create mode 100644 src/github_client.py

diff --git a/.gitignore b/.gitignore
index b810167..6c0b001 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,8 @@ env/
 # Streamlit logs and dynamic caching
 .streamlit/
 .streamlit/config.toml
+.streamlit/secrets.toml
+
 
 # Linter profiles and caches
 .black
diff --git a/requirements.txt b/requirements.txt
index 60b9ddf..b5f8435 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,6 +3,7 @@ pandas>=2.0.0
 plotly>=5.15.0
 requests>=2.31.0
 Pillow>=10.0.0
+PyGithub>=2.1.1
 
 # Code Quality & Linters
 black>=23.0.0
diff --git a/src/app.py b/src/app.py
index 96e12a8..b046fe2 100644
--- a/src/app.py
+++ b/src/app.py
@@ -1,4 +1,9 @@
 import streamlit as st
+import pandas as pd  # Added to handle missing or null metric values (pd.notna)
+from github_client import calculate_community_health  # Added to access your metric module
+
+# Automatically pull metrics for the main ScanAPI repository
+metrics = calculate_community_health("scanapi/scanapi")
 
 # Initial page configuration
 st.set_page_config(
@@ -33,26 +38,28 @@
 with tab_onboarding:
     st.header("Contributor Onboarding Hub")
     st.subheader("Lowering the barrier to entry")
-    st.info(
-        "Feature incoming: Dynamic aggregation of "
-        "'good first issue' and 'help wanted' labels."
-    )
 
-    # Visual example of metric cards (TTFR and TTM)
+    # Visual metrics extracted dynamically from the GitHub client module
     col1, col2 = st.columns(2)
     with col1:
         st.metric(
             label="Avg Time-to-First-Response (TTFR)",
-            value="⏳ Loading...",
-            delta="Target: < 24h",
+            value=f"{metrics['avg_ttfr_hours']:.2f} hrs" if pd.notna(metrics["avg_ttfr_hours"]) else "N/A",
+            #delta="Target: < 24h",
         )
     with col2:
         st.metric(
             label="Avg Time-to-Merge (TTM)",
-            value="⏳ Loading...",
-            delta="Target: < 48h",
+            value=f"{metrics['avg_ttm_hours']:.2f} hrs" if pd.notna(metrics["avg_ttm_hours"]) else "N/A",
+            #delta="Target: < 48h",
         )
 
+    # Render raw issues dataframe beneath metrics if data exists
+    if not metrics["issues_df"].empty:
+        st.subheader("📋 Active Issue Stream")
+        st.dataframe(metrics["issues_df"], use_container_width=True)
+
+
 with tab_leaderboard:
     st.header("Community Wall of Fame")
     st.subheader("Celebrating our active contributors")
diff --git a/src/github_client.py b/src/github_client.py
new file mode 100644
index 0000000..59aa163
--- /dev/null
+++ b/src/github_client.py
@@ -0,0 +1,97 @@
+import pandas as pd
+from github import Github, RateLimitExceededException
+import streamlit as st
+
+def get_github_client():
+    """
+    Fetches the token from secrets.toml and initializes the GitHub client.
+    Streamlit automatically searches within .streamlit/secrets.toml.
+    """
+    token = st.secrets.get("GITHUB_TOKEN", None)
+    if token:
+        return Github(token)
+    return Github()  # Fallback to unauthenticated client (significantly lower rate limits)
+
+@st.cache_data(ttl=900, show_spinner="Fetching GitHub community metrics...")
+def fetch_raw_repo_data(repo_name: str):
+    """
+    Fetches raw issues and pull requests data from the specified repository.
+    Caches data for 15 minutes to preserve API rate limit allocations.
+    """
+    g = get_github_client()
+
+    try:
+        repo = g.get_repo(repo_name)
+        issues_data = []
+        pulls_data = []
+
+        # Limiting to the 100 most recent items to prevent severe API degradation
+        for issue in repo.get_issues(state='all')[:100]:
+            base_info = {
+                "id": issue.id,
+                "number": issue.number,
+                "title": issue.title,
+                "created_at": issue.created_at,
+                "closed_at": issue.closed_at,
+                "labels": [label.name for label in issue.labels],
+                "assignee": issue.assignee.login if issue.assignee else None,
+            }
+
+            if issue.pull_request:
+                # Process item as a Pull Request
+                pr = repo.get_pull(issue.number)
+                base_info["merged_at"] = pr.merged_at
+                base_info["is_merged"] = pr.merged
+                pulls_data.append(base_info)
+            else:
+                # Process item as a standard Issue (calculates baseline response timing)
+                comments = issue.get_comments()
+                if comments.totalCount > 0:
+                    # Safely extract the created_at attribute from the very first comment item
+                    first_comment_time = comments[0].created_at
+                else:
+                    first_comment_time = None
+                base_info["first_response_at"] = first_comment_time
+                issues_data.append(base_info)
+
+        return {"issues": issues_data, "pulls": pulls_data}
+
+    except RateLimitExceededException:
+        st.error("💥 GitHub API Rate limit reached! Serving empty fallback arrays.")
+        return {"issues": [], "pulls": []}
+
+def calculate_community_health(repo_name: str):
+    """
+    Transforms raw dictionary arrays into structured DataFrames.
+    Calculates key performance metrics: Time-to-First-Response (TTFR) and Time-to-Merge (TTM).
+    """
+    raw_data = fetch_raw_repo_data(repo_name)
+
+    df_issues = pd.DataFrame(raw_data["issues"])
+    df_pulls = pd.DataFrame(raw_data["pulls"])
+
+    # --- Time-to-First-Response (TTFR) Calculation in Hours ---
+    if not df_issues.empty:
+        df_issues['created_at'] = pd.to_datetime(df_issues['created_at'])
+        df_issues['first_response_at'] = pd.to_datetime(df_issues['first_response_at'])
+        df_issues['ttfr_hours'] = (df_issues['first_response_at'] - df_issues['created_at']).dt.total_seconds() / 3600
+        avg_ttfr = df_issues['ttfr_hours'].mean()
+    else:
+        avg_ttfr = None
+
+    # --- Time-to-Merge (TTM) Calculation in Hours ---
+    if not df_pulls.empty:
+        df_pulls['created_at'] = pd.to_datetime(df_pulls['created_at'])
+        df_pulls['merged_at'] = pd.to_datetime(df_pulls['merged_at'])
+        merged_prs = df_pulls[df_pulls['is_merged'] == True].copy()
+        merged_prs['ttm_hours'] = (merged_prs['merged_at'] - merged_prs['created_at']).dt.total_seconds() / 3600
+        avg_ttm = merged_prs['ttm_hours'].mean()
+    else:
+        avg_ttm = None
+
+    return {
+        "issues_df": df_issues,
+        "pulls_df": df_pulls,
+        "avg_ttfr_hours": avg_ttfr,
+        "avg_ttm_hours": avg_ttm
+    }

From 90b21bc5402c4864029dcb48db394890af9bae45 Mon Sep 17 00:00:00 2001
From: Ariana Cursino <arcursino@gmail.com>
Date: Sat, 20 Jun 2026 13:44:44 -0300
Subject: [PATCH 2/2] style: run black formatter to fix CI linting errors

---
 src/app.py           | 20 +++++++++++++++-----
 src/github_client.py | 33 +++++++++++++++++++++------------
 2 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/src/app.py b/src/app.py
index b046fe2..80ee295 100644
--- a/src/app.py
+++ b/src/app.py
@@ -1,6 +1,8 @@
 import streamlit as st
 import pandas as pd  # Added to handle missing or null metric values (pd.notna)
-from github_client import calculate_community_health  # Added to access your metric module
+from github_client import (
+    calculate_community_health,
+)  # Added to access your metric module
 
 # Automatically pull metrics for the main ScanAPI repository
 metrics = calculate_community_health("scanapi/scanapi")
@@ -44,14 +46,22 @@
     with col1:
         st.metric(
             label="Avg Time-to-First-Response (TTFR)",
-            value=f"{metrics['avg_ttfr_hours']:.2f} hrs" if pd.notna(metrics["avg_ttfr_hours"]) else "N/A",
-            #delta="Target: < 24h",
+            value=(
+                f"{metrics['avg_ttfr_hours']:.2f} hrs"
+                if pd.notna(metrics["avg_ttfr_hours"])
+                else "N/A"
+            ),
+            # delta="Target: < 24h",
         )
     with col2:
         st.metric(
             label="Avg Time-to-Merge (TTM)",
-            value=f"{metrics['avg_ttm_hours']:.2f} hrs" if pd.notna(metrics["avg_ttm_hours"]) else "N/A",
-            #delta="Target: < 48h",
+            value=(
+                f"{metrics['avg_ttm_hours']:.2f} hrs"
+                if pd.notna(metrics["avg_ttm_hours"])
+                else "N/A"
+            ),
+            # delta="Target: < 48h",
         )
 
     # Render raw issues dataframe beneath metrics if data exists
diff --git a/src/github_client.py b/src/github_client.py
index 59aa163..3f4017a 100644
--- a/src/github_client.py
+++ b/src/github_client.py
@@ -2,6 +2,7 @@
 from github import Github, RateLimitExceededException
 import streamlit as st
 
+
 def get_github_client():
     """
     Fetches the token from secrets.toml and initializes the GitHub client.
@@ -10,7 +11,10 @@ def get_github_client():
     token = st.secrets.get("GITHUB_TOKEN", None)
     if token:
         return Github(token)
-    return Github()  # Fallback to unauthenticated client (significantly lower rate limits)
+    return (
+        Github()
+    )  # Fallback to unauthenticated client (significantly lower rate limits)
+
 
 @st.cache_data(ttl=900, show_spinner="Fetching GitHub community metrics...")
 def fetch_raw_repo_data(repo_name: str):
@@ -26,7 +30,7 @@ def fetch_raw_repo_data(repo_name: str):
         pulls_data = []
 
         # Limiting to the 100 most recent items to prevent severe API degradation
-        for issue in repo.get_issues(state='all')[:100]:
+        for issue in repo.get_issues(state="all")[:100]:
             base_info = {
                 "id": issue.id,
                 "number": issue.number,
@@ -60,6 +64,7 @@ def fetch_raw_repo_data(repo_name: str):
         st.error("💥 GitHub API Rate limit reached! Serving empty fallback arrays.")
         return {"issues": [], "pulls": []}
 
+
 def calculate_community_health(repo_name: str):
     """
     Transforms raw dictionary arrays into structured DataFrames.
@@ -72,20 +77,24 @@ def calculate_community_health(repo_name: str):
 
     # --- Time-to-First-Response (TTFR) Calculation in Hours ---
     if not df_issues.empty:
-        df_issues['created_at'] = pd.to_datetime(df_issues['created_at'])
-        df_issues['first_response_at'] = pd.to_datetime(df_issues['first_response_at'])
-        df_issues['ttfr_hours'] = (df_issues['first_response_at'] - df_issues['created_at']).dt.total_seconds() / 3600
-        avg_ttfr = df_issues['ttfr_hours'].mean()
+        df_issues["created_at"] = pd.to_datetime(df_issues["created_at"])
+        df_issues["first_response_at"] = pd.to_datetime(df_issues["first_response_at"])
+        df_issues["ttfr_hours"] = (
+            df_issues["first_response_at"] - df_issues["created_at"]
+        ).dt.total_seconds() / 3600
+        avg_ttfr = df_issues["ttfr_hours"].mean()
     else:
         avg_ttfr = None
 
     # --- Time-to-Merge (TTM) Calculation in Hours ---
     if not df_pulls.empty:
-        df_pulls['created_at'] = pd.to_datetime(df_pulls['created_at'])
-        df_pulls['merged_at'] = pd.to_datetime(df_pulls['merged_at'])
-        merged_prs = df_pulls[df_pulls['is_merged'] == True].copy()
-        merged_prs['ttm_hours'] = (merged_prs['merged_at'] - merged_prs['created_at']).dt.total_seconds() / 3600
-        avg_ttm = merged_prs['ttm_hours'].mean()
+        df_pulls["created_at"] = pd.to_datetime(df_pulls["created_at"])
+        df_pulls["merged_at"] = pd.to_datetime(df_pulls["merged_at"])
+        merged_prs = df_pulls[df_pulls["is_merged"] == True].copy()
+        merged_prs["ttm_hours"] = (
+            merged_prs["merged_at"] - merged_prs["created_at"]
+        ).dt.total_seconds() / 3600
+        avg_ttm = merged_prs["ttm_hours"].mean()
     else:
         avg_ttm = None
 
@@ -93,5 +102,5 @@ def calculate_community_health(repo_name: str):
         "issues_df": df_issues,
         "pulls_df": df_pulls,
         "avg_ttfr_hours": avg_ttfr,
-        "avg_ttm_hours": avg_ttm
+        "avg_ttm_hours": avg_ttm,
     }