scanapi · arcursino · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
diff --git a/.gitignore b/.gitignore
@@ -12,6 +12,8 @@ env/
 # Streamlit logs and dynamic caching
 .streamlit/
 .streamlit/config.toml
+.streamlit/secrets.toml
+
 
 # Linter profiles and caches
 .black

diff --git a/requirements.txt b/requirements.txt
@@ -3,6 +3,7 @@ pandas>=2.0.0
 plotly>=5.15.0
 requests>=2.31.0
 Pillow>=10.0.0
+PyGithub>=2.1.1
 
 # Code Quality & Linters
 black>=23.0.0

diff --git a/src/app.py b/src/app.py
@@ -1,4 +1,11 @@
 import streamlit as st
+import pandas as pd  # Added to handle missing or null metric values (pd.notna)
+from github_client import (
+    calculate_community_health,
+)  # Added to access your metric module
+
+# Automatically pull metrics for the main ScanAPI repository
+metrics = calculate_community_health("scanapi/scanapi")
 
 # Initial page configuration
 st.set_page_config(
@@ -33,26 +40,36 @@
 with tab_onboarding:
     st.header("Contributor Onboarding Hub")
     st.subheader("Lowering the barrier to entry")
-    st.info(
-        "Feature incoming: Dynamic aggregation of "
-        "'good first issue' and 'help wanted' labels."
-    )
 
-    # Visual example of metric cards (TTFR and TTM)
+    # Visual metrics extracted dynamically from the GitHub client module
     col1, col2 = st.columns(2)
     with col1:
         st.metric(
             label="Avg Time-to-First-Response (TTFR)",
-            value="⏳ Loading...",
-            delta="Target: < 24h",
+            value=(
+                f"{metrics['avg_ttfr_hours']:.2f} hrs"
+                if pd.notna(metrics["avg_ttfr_hours"])
+                else "N/A"
+            ),
+            # delta="Target: < 24h",
         )
     with col2:
         st.metric(
             label="Avg Time-to-Merge (TTM)",
-            value="⏳ Loading...",
-            delta="Target: < 48h",
+            value=(
+                f"{metrics['avg_ttm_hours']:.2f} hrs"
+                if pd.notna(metrics["avg_ttm_hours"])
+                else "N/A"
+            ),
+            # delta="Target: < 48h",
         )
 
+    # Render raw issues dataframe beneath metrics if data exists
+    if not metrics["issues_df"].empty:
+        st.subheader("📋 Active Issue Stream")
+        st.dataframe(metrics["issues_df"], use_container_width=True)
+
+
 with tab_leaderboard:
     st.header("Community Wall of Fame")
     st.subheader("Celebrating our active contributors")

diff --git a/src/github_client.py b/src/github_client.py
@@ -0,0 +1,106 @@
+import pandas as pd
+from github import Github, RateLimitExceededException
+import streamlit as st
+
+
+def get_github_client():
+    """
+    Fetches the token from secrets.toml and initializes the GitHub client.
+    Streamlit automatically searches within .streamlit/secrets.toml.
+    """
+    token = st.secrets.get("GITHUB_TOKEN", None)
+    if token:
+        return Github(token)
+    return (
+        Github()
+    )  # Fallback to unauthenticated client (significantly lower rate limits)
+
+
+@st.cache_data(ttl=900, show_spinner="Fetching GitHub community metrics...")
+def fetch_raw_repo_data(repo_name: str):
+    """
+    Fetches raw issues and pull requests data from the specified repository.
+    Caches data for 15 minutes to preserve API rate limit allocations.
+    """
+    g = get_github_client()
+
+    try:
+        repo = g.get_repo(repo_name)
+        issues_data = []
+        pulls_data = []
+
+        # Limiting to the 100 most recent items to prevent severe API degradation
+        for issue in repo.get_issues(state="all")[:100]:
+            base_info = {
+                "id": issue.id,
+                "number": issue.number,
+                "title": issue.title,
+                "created_at": issue.created_at,
+                "closed_at": issue.closed_at,
+                "labels": [label.name for label in issue.labels],
+                "assignee": issue.assignee.login if issue.assignee else None,
+            }
+
+            if issue.pull_request:
+                # Process item as a Pull Request
+                pr = repo.get_pull(issue.number)
+                base_info["merged_at"] = pr.merged_at
+                base_info["is_merged"] = pr.merged
+                pulls_data.append(base_info)
+            else:
+                # Process item as a standard Issue (calculates baseline response timing)
+                comments = issue.get_comments()
+                if comments.totalCount > 0:
+                    # Safely extract the created_at attribute from the very first comment item
+                    first_comment_time = comments[0].created_at
+                else:
+                    first_comment_time = None
+                base_info["first_response_at"] = first_comment_time
+                issues_data.append(base_info)
+
+        return {"issues": issues_data, "pulls": pulls_data}
+
+    except RateLimitExceededException:
+        st.error("💥 GitHub API Rate limit reached! Serving empty fallback arrays.")
+        return {"issues": [], "pulls": []}
+
+
+def calculate_community_health(repo_name: str):
+    """
+    Transforms raw dictionary arrays into structured DataFrames.
+    Calculates key performance metrics: Time-to-First-Response (TTFR) and Time-to-Merge (TTM).
+    """
+    raw_data = fetch_raw_repo_data(repo_name)
+
+    df_issues = pd.DataFrame(raw_data["issues"])
+    df_pulls = pd.DataFrame(raw_data["pulls"])
+
+    # --- Time-to-First-Response (TTFR) Calculation in Hours ---
+    if not df_issues.empty:
+        df_issues["created_at"] = pd.to_datetime(df_issues["created_at"])
+        df_issues["first_response_at"] = pd.to_datetime(df_issues["first_response_at"])
+        df_issues["ttfr_hours"] = (
+            df_issues["first_response_at"] - df_issues["created_at"]
+        ).dt.total_seconds() / 3600
+        avg_ttfr = df_issues["ttfr_hours"].mean()
+    else:
+        avg_ttfr = None
+
+    # --- Time-to-Merge (TTM) Calculation in Hours ---
+    if not df_pulls.empty:
+        df_pulls["created_at"] = pd.to_datetime(df_pulls["created_at"])
+        df_pulls["merged_at"] = pd.to_datetime(df_pulls["merged_at"])
+        merged_prs = df_pulls[df_pulls["is_merged"] == True].copy()
+        merged_prs["ttm_hours"] = (
+            merged_prs["merged_at"] - merged_prs["created_at"]
+        ).dt.total_seconds() / 3600
+        avg_ttm = merged_prs["ttm_hours"].mean()
+    else:
+        avg_ttm = None
+
+    return {
+        "issues_df": df_issues,
+        "pulls_df": df_pulls,
+        "avg_ttfr_hours": avg_ttfr,
+        "avg_ttm_hours": avg_ttm,
+    }