Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions scripts/container-startup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,23 @@ python manage.py clearsessions

# Add custom commands here

# Run a small AI reviews summary batch after startup so summary generation doesn't not block health checks.
if [ -n "${OPENROUTER_API_KEY:-}" ] && [ "${AI_SUMMARY_STARTUP_LIMIT:-5}" -gt 0 ]; then
(
sleep "${AI_SUMMARY_STARTUP_DELAY_SECONDS:-30}"
echo "Starting background AI summary generation..."

summary_cmd=(
python manage.py generate_ai_summaries
--limit "${AI_SUMMARY_STARTUP_LIMIT:-5}"
--min-reviews "${AI_SUMMARY_STARTUP_MIN_REVIEWS:-5}"
--max-reviews "${AI_SUMMARY_STARTUP_MAX_REVIEWS:-12}"
--missing-only # generate only new summaries
)

"${summary_cmd[@]}" || echo "Background AI summary generation failed."
) &
fi

echo 'Starting Django Server...'
exec gunicorn tcf_core.wsgi:application --bind 0.0.0.0:80 --log-level "info" --timeout 120
5 changes: 5 additions & 0 deletions tcf_core/settings/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# pylint: disable=fixme
"""Base Django settings for tcf_core project."""
# pylint: disable=duplicate-code
import os

import environ
Expand All @@ -17,6 +18,10 @@
env_file = os.path.join(BASE_DIR, ".env")
environ.Env.read_env(env_file)

# AI review summary configuration
OPENROUTER_API_KEY = env.str("OPENROUTER_API_KEY", default="")
OPENROUTER_MODEL = env.str("OPENROUTER_MODEL", default="openrouter/auto")

# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = env.str("SECRET_KEY")

Expand Down
1 change: 1 addition & 0 deletions tcf_website/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,4 @@ class ClubCategoryAdmin(admin.ModelAdmin):
admin.site.register(SectionTime, SectionTimeAdmin)
admin.site.register(Club, ClubAdmin)
admin.site.register(ClubCategory, ClubCategoryAdmin)
admin.site.register(ReviewLLMSummary)
229 changes: 229 additions & 0 deletions tcf_website/management/commands/generate_ai_summaries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
"""Manual generator for AI review summaries."""

import time
from typing import Any

from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.db.models import Count, Exists, Max, OuterRef

from ...models import Course, Instructor, Review, ReviewLLMSummary
from .generate_ai_summaries_helpers import generate_review_summary


# pylint: disable=too-many-branches,too-many-locals,no-member


class Command(BaseCommand):
"""Command to manually generate AI summaries for course/instructor pairs.

Usage examples:
# Help command:
docker compose exec web python manage.py generate_ai_summaries help

# Dry run: show top 20 pairs by written-review count (no LLM calls)
python manage.py generate_ai_summaries --dry-run
OR
docker compose exec web python manage.py generate_ai_summaries --dry-run

# Generate for top 30 course / instructor pairs with at least 3 written reviews
python manage.py generate_ai_summaries --limit 30 --min-reviews 3
OR
docker compose exec web python manage.py generate_ai_summaries --limit 30 --min-reviews 3

# Generate for top 20 course / instructor pairs missing a summary
python manage.py generate_ai_summaries --limit 20 --missing-only
OR
docker compose exec web python manage.py generate_ai_summaries --limit 20 --missing-only

# Generate for a specific course/instructor pair
python manage.py generate_ai_summaries --course-id 1 --instructor-id 4019
docker compose exec web python manage.py generate_ai_summaries
--course-id 1 --instructor-id 4019
"""

help = "Generate AI summaries manually for course/instructor pairs."
Comment thread
coderabbitai[bot] marked this conversation as resolved.

def add_arguments(self, parser):
parser.add_argument(
"--limit",
type=int,
default=15,
help="Number of top course/instructor pairs by review count to summarize.",
)
parser.add_argument(
"--course-id",
type=int,
help="Specific course id to summarize (must be used with --instructor-id).",
)
parser.add_argument(
"--instructor-id",
type=int,
help="Specific instructor id to summarize (must be used with --course-id).",
)
parser.add_argument(
"--min-reviews",
type=int,
default=5,
help="Minimum number of written reviews required to generate a summary.",
)
parser.add_argument(
"--max-reviews",
type=int,
default=25,
help="Use only the top N most recent written reviews in the prompt (default: 25).",
Comment thread
coderabbitai[bot] marked this conversation as resolved.
)
parser.add_argument(
"--missing-only",
action="store_true",
help="Only include pairs without an existing summary.",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show which pairs would be processed without calling the model.",
)

def handle(
self, *args: Any, **options: Any
): # pylint: disable=unused-argument,too-many-locals
if not getattr(settings, "OPENROUTER_API_KEY", ""):
raise CommandError("OPENROUTER_API_KEY is not configured.")

course_id = options.get("course_id")
instructor_id = options.get("instructor_id")
limit = options.get("limit")
min_reviews = options.get("min_reviews")
max_reviews = options.get("max_reviews")
missing_only = options.get("missing_only")
dry_run = options.get("dry_run")

if bool(course_id) ^ bool(instructor_id):
raise CommandError(
"Provide both --course-id and --instructor-id together, or neither."
)

# Base queryset: visible, non-toxic, with text
base_reviews = Review.objects.filter(
hidden=False, toxicity_rating__lt=settings.TOXICITY_THRESHOLD
).exclude(text="")

pairs: list[tuple[int, int, int, int]] = []

if course_id and instructor_id:
if (
missing_only
and ReviewLLMSummary.objects.filter(
course_id=course_id, instructor_id=instructor_id, club__isnull=True
).exists()
):
self.stdout.write(
self.style.WARNING(
f"Skipping course {course_id} / instructor {instructor_id}: "
"summary already exists."
)
)
return
review_count = base_reviews.filter(
course_id=course_id, instructor_id=instructor_id
).count()
if review_count < min_reviews:
msg = (
f"Skipping course {course_id} / instructor {instructor_id}: "
f"only {review_count} reviews."
)
self.stdout.write(self.style.WARNING(msg))
return
latest_id = (
base_reviews.filter(course_id=course_id, instructor_id=instructor_id)
.order_by("-id")
.values_list("id", flat=True)
.first()
or 0
)
pairs = [(course_id, instructor_id, review_count, latest_id)]
else:
agg = (
base_reviews.values("course_id", "instructor_id")
.annotate(review_count=Count("id"), last_id=Max("id"))
.filter(review_count__gte=min_reviews)
.order_by("-review_count")
)
if missing_only:
existing_summaries = ReviewLLMSummary.objects.filter(
course_id=OuterRef("course_id"),
instructor_id=OuterRef("instructor_id"),
club__isnull=True,
)
agg = agg.annotate(has_summary=Exists(existing_summaries)).filter(
has_summary=False
)
agg = agg[:limit]
pairs = [
(
row["course_id"],
row["instructor_id"],
row["review_count"],
row["last_id"],
)
for row in agg
]

if not pairs:
self.stdout.write(self.style.WARNING("No pairs matched criteria."))
return

self.stdout.write(f"Processing {len(pairs)} pair(s)...")

for idx, (course_id, instructor_id, review_count, latest_id) in enumerate(
pairs
):
course = Course.objects.get(id=course_id)
instructor = Instructor.objects.get(id=instructor_id)
qs = base_reviews.filter(course=course, instructor=instructor).order_by(
"-created"
)

if dry_run:
self.stdout.write(
f"[DRY RUN] Would summarize {course.code()} / {instructor.full_name} "
f"(course_id={course_id}, instructor_id={instructor_id}, "
f"{review_count} reviews)"
)
continue

summary_text, error, model_used = generate_review_summary(
course, instructor, qs, max_reviews=max_reviews
)
if not summary_text:
self.stdout.write(
self.style.ERROR(
f"Failed for {course.code()} / {instructor.full_name}: {error}"
)
)
continue

ReviewLLMSummary.objects.update_or_create(
course=course,
instructor=instructor,
club=None,
defaults={
"summary_text": summary_text,
"source_review_count": min(review_count, max_reviews)
if max_reviews is not None
else review_count,
"last_review_id": latest_id,
"source_metadata": {},
"model_id": model_used or getattr(settings, "OPENROUTER_MODEL", ""),
},
)
Comment thread
artiehumphreys marked this conversation as resolved.

self.stdout.write(
self.style.SUCCESS(
f"Saved summary for {course.code()} / {instructor.full_name} "
f"({review_count} reviews) using {model_used or 'unknown model'}"
)
)

if not dry_run and idx < len(pairs) - 1:
time.sleep(4.0) # avoids rate limits
Loading
Loading