Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ env/
# Streamlit logs and dynamic caching
.streamlit/
.streamlit/config.toml
.streamlit/secrets.toml


# Linter profiles and caches
.black
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pandas>=2.0.0
plotly>=5.15.0
requests>=2.31.0
Pillow>=10.0.0
PyGithub>=2.1.1

# Code Quality & Linters
black>=23.0.0
Expand Down
35 changes: 26 additions & 9 deletions src/app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
import streamlit as st
import pandas as pd # Added to handle missing or null metric values (pd.notna)
from github_client import (
calculate_community_health,
) # Added to access your metric module

# Automatically pull metrics for the main ScanAPI repository
metrics = calculate_community_health("scanapi/scanapi")

# Initial page configuration
st.set_page_config(
Expand Down Expand Up @@ -33,26 +40,36 @@
with tab_onboarding:
st.header("Contributor Onboarding Hub")
st.subheader("Lowering the barrier to entry")
st.info(
"Feature incoming: Dynamic aggregation of "
"'good first issue' and 'help wanted' labels."
)

# Visual example of metric cards (TTFR and TTM)
# Visual metrics extracted dynamically from the GitHub client module
col1, col2 = st.columns(2)
with col1:
st.metric(
label="Avg Time-to-First-Response (TTFR)",
value="⏳ Loading...",
delta="Target: < 24h",
value=(
f"{metrics['avg_ttfr_hours']:.2f} hrs"
if pd.notna(metrics["avg_ttfr_hours"])
else "N/A"
),
# delta="Target: < 24h",
)
with col2:
st.metric(
label="Avg Time-to-Merge (TTM)",
value="⏳ Loading...",
delta="Target: < 48h",
value=(
f"{metrics['avg_ttm_hours']:.2f} hrs"
if pd.notna(metrics["avg_ttm_hours"])
else "N/A"
),
# delta="Target: < 48h",
)

# Render raw issues dataframe beneath metrics if data exists
if not metrics["issues_df"].empty:
st.subheader("📋 Active Issue Stream")
st.dataframe(metrics["issues_df"], use_container_width=True)


with tab_leaderboard:
st.header("Community Wall of Fame")
st.subheader("Celebrating our active contributors")
Expand Down
106 changes: 106 additions & 0 deletions src/github_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import pandas as pd
from github import Github, RateLimitExceededException
import streamlit as st


def get_github_client():
"""
Fetches the token from secrets.toml and initializes the GitHub client.
Streamlit automatically searches within .streamlit/secrets.toml.
"""
token = st.secrets.get("GITHUB_TOKEN", None)
if token:
return Github(token)
return (
Github()
) # Fallback to unauthenticated client (significantly lower rate limits)


@st.cache_data(ttl=900, show_spinner="Fetching GitHub community metrics...")
def fetch_raw_repo_data(repo_name: str):
"""
Fetches raw issues and pull requests data from the specified repository.
Caches data for 15 minutes to preserve API rate limit allocations.
"""
g = get_github_client()

try:
repo = g.get_repo(repo_name)
issues_data = []
pulls_data = []

# Limiting to the 100 most recent items to prevent severe API degradation
for issue in repo.get_issues(state="all")[:100]:
base_info = {
"id": issue.id,
"number": issue.number,
"title": issue.title,
"created_at": issue.created_at,
"closed_at": issue.closed_at,
"labels": [label.name for label in issue.labels],
"assignee": issue.assignee.login if issue.assignee else None,
}

if issue.pull_request:
# Process item as a Pull Request
pr = repo.get_pull(issue.number)
base_info["merged_at"] = pr.merged_at
base_info["is_merged"] = pr.merged
pulls_data.append(base_info)
else:
# Process item as a standard Issue (calculates baseline response timing)
comments = issue.get_comments()
if comments.totalCount > 0:
# Safely extract the created_at attribute from the very first comment item
first_comment_time = comments[0].created_at
else:
first_comment_time = None
base_info["first_response_at"] = first_comment_time
issues_data.append(base_info)

return {"issues": issues_data, "pulls": pulls_data}

except RateLimitExceededException:
st.error("💥 GitHub API Rate limit reached! Serving empty fallback arrays.")
return {"issues": [], "pulls": []}


def calculate_community_health(repo_name: str):
"""
Transforms raw dictionary arrays into structured DataFrames.
Calculates key performance metrics: Time-to-First-Response (TTFR) and Time-to-Merge (TTM).
"""
raw_data = fetch_raw_repo_data(repo_name)

df_issues = pd.DataFrame(raw_data["issues"])
df_pulls = pd.DataFrame(raw_data["pulls"])

# --- Time-to-First-Response (TTFR) Calculation in Hours ---
if not df_issues.empty:
df_issues["created_at"] = pd.to_datetime(df_issues["created_at"])
df_issues["first_response_at"] = pd.to_datetime(df_issues["first_response_at"])
df_issues["ttfr_hours"] = (
df_issues["first_response_at"] - df_issues["created_at"]
).dt.total_seconds() / 3600
avg_ttfr = df_issues["ttfr_hours"].mean()
else:
avg_ttfr = None

# --- Time-to-Merge (TTM) Calculation in Hours ---
if not df_pulls.empty:
df_pulls["created_at"] = pd.to_datetime(df_pulls["created_at"])
df_pulls["merged_at"] = pd.to_datetime(df_pulls["merged_at"])
merged_prs = df_pulls[df_pulls["is_merged"] == True].copy()
merged_prs["ttm_hours"] = (
merged_prs["merged_at"] - merged_prs["created_at"]
).dt.total_seconds() / 3600
avg_ttm = merged_prs["ttm_hours"].mean()
else:
avg_ttm = None

return {
"issues_df": df_issues,
"pulls_df": df_pulls,
"avg_ttfr_hours": avg_ttfr,
"avg_ttm_hours": avg_ttm,
}
Loading