diff --git a/scripts/container-startup.sh b/scripts/container-startup.sh index 6ce4d11ef..5cdf67cb3 100755 --- a/scripts/container-startup.sh +++ b/scripts/container-startup.sh @@ -15,6 +15,23 @@ python manage.py clearsessions # Add custom commands here +# Run a small AI reviews summary batch after startup so summary generation doesn't not block health checks. +if [ -n "${OPENROUTER_API_KEY:-}" ] && [ "${AI_SUMMARY_STARTUP_LIMIT:-5}" -gt 0 ]; then + ( + sleep "${AI_SUMMARY_STARTUP_DELAY_SECONDS:-30}" + echo "Starting background AI summary generation..." + + summary_cmd=( + python manage.py generate_ai_summaries + --limit "${AI_SUMMARY_STARTUP_LIMIT:-5}" + --min-reviews "${AI_SUMMARY_STARTUP_MIN_REVIEWS:-5}" + --max-reviews "${AI_SUMMARY_STARTUP_MAX_REVIEWS:-12}" + --missing-only # generate only new summaries + ) + + "${summary_cmd[@]}" || echo "Background AI summary generation failed." + ) & +fi echo 'Starting Django Server...' exec gunicorn tcf_core.wsgi:application --bind 0.0.0.0:80 --log-level "info" --timeout 120 diff --git a/tcf_core/settings/base.py b/tcf_core/settings/base.py index 31ed5d863..ae7736eec 100644 --- a/tcf_core/settings/base.py +++ b/tcf_core/settings/base.py @@ -1,5 +1,6 @@ # pylint: disable=fixme """Base Django settings for tcf_core project.""" +# pylint: disable=duplicate-code import os import environ @@ -17,6 +18,10 @@ env_file = os.path.join(BASE_DIR, ".env") environ.Env.read_env(env_file) +# AI review summary configuration +OPENROUTER_API_KEY = env.str("OPENROUTER_API_KEY", default="") +OPENROUTER_MODEL = env.str("OPENROUTER_MODEL", default="openrouter/auto") + # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = env.str("SECRET_KEY") diff --git a/tcf_website/admin.py b/tcf_website/admin.py index 1209aad51..dcb22a4af 100644 --- a/tcf_website/admin.py +++ b/tcf_website/admin.py @@ -138,3 +138,4 @@ class ClubCategoryAdmin(admin.ModelAdmin): admin.site.register(SectionTime, SectionTimeAdmin) admin.site.register(Club, ClubAdmin) admin.site.register(ClubCategory, ClubCategoryAdmin) +admin.site.register(ReviewLLMSummary) diff --git a/tcf_website/management/commands/generate_ai_summaries.py b/tcf_website/management/commands/generate_ai_summaries.py new file mode 100644 index 000000000..d85542f3c --- /dev/null +++ b/tcf_website/management/commands/generate_ai_summaries.py @@ -0,0 +1,229 @@ +"""Manual generator for AI review summaries.""" + +import time +from typing import Any + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError +from django.db.models import Count, Exists, Max, OuterRef + +from ...models import Course, Instructor, Review, ReviewLLMSummary +from .generate_ai_summaries_helpers import generate_review_summary + + +# pylint: disable=too-many-branches,too-many-locals,no-member + + +class Command(BaseCommand): + """Command to manually generate AI summaries for course/instructor pairs. + + Usage examples: + # Help command: + docker compose exec web python manage.py generate_ai_summaries help + + # Dry run: show top 20 pairs by written-review count (no LLM calls) + python manage.py generate_ai_summaries --dry-run + OR + docker compose exec web python manage.py generate_ai_summaries --dry-run + + # Generate for top 30 course / instructor pairs with at least 3 written reviews + python manage.py generate_ai_summaries --limit 30 --min-reviews 3 + OR + docker compose exec web python manage.py generate_ai_summaries --limit 30 --min-reviews 3 + + # Generate for top 20 course / instructor pairs missing a summary + python manage.py generate_ai_summaries --limit 20 --missing-only + OR + docker compose exec web python manage.py generate_ai_summaries --limit 20 --missing-only + + # Generate for a specific course/instructor pair + python manage.py generate_ai_summaries --course-id 1 --instructor-id 4019 + docker compose exec web python manage.py generate_ai_summaries + --course-id 1 --instructor-id 4019 + """ + + help = "Generate AI summaries manually for course/instructor pairs." + + def add_arguments(self, parser): + parser.add_argument( + "--limit", + type=int, + default=15, + help="Number of top course/instructor pairs by review count to summarize.", + ) + parser.add_argument( + "--course-id", + type=int, + help="Specific course id to summarize (must be used with --instructor-id).", + ) + parser.add_argument( + "--instructor-id", + type=int, + help="Specific instructor id to summarize (must be used with --course-id).", + ) + parser.add_argument( + "--min-reviews", + type=int, + default=5, + help="Minimum number of written reviews required to generate a summary.", + ) + parser.add_argument( + "--max-reviews", + type=int, + default=25, + help="Use only the top N most recent written reviews in the prompt (default: 25).", + ) + parser.add_argument( + "--missing-only", + action="store_true", + help="Only include pairs without an existing summary.", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show which pairs would be processed without calling the model.", + ) + + def handle( + self, *args: Any, **options: Any + ): # pylint: disable=unused-argument,too-many-locals + if not getattr(settings, "OPENROUTER_API_KEY", ""): + raise CommandError("OPENROUTER_API_KEY is not configured.") + + course_id = options.get("course_id") + instructor_id = options.get("instructor_id") + limit = options.get("limit") + min_reviews = options.get("min_reviews") + max_reviews = options.get("max_reviews") + missing_only = options.get("missing_only") + dry_run = options.get("dry_run") + + if bool(course_id) ^ bool(instructor_id): + raise CommandError( + "Provide both --course-id and --instructor-id together, or neither." + ) + + # Base queryset: visible, non-toxic, with text + base_reviews = Review.objects.filter( + hidden=False, toxicity_rating__lt=settings.TOXICITY_THRESHOLD + ).exclude(text="") + + pairs: list[tuple[int, int, int, int]] = [] + + if course_id and instructor_id: + if ( + missing_only + and ReviewLLMSummary.objects.filter( + course_id=course_id, instructor_id=instructor_id, club__isnull=True + ).exists() + ): + self.stdout.write( + self.style.WARNING( + f"Skipping course {course_id} / instructor {instructor_id}: " + "summary already exists." + ) + ) + return + review_count = base_reviews.filter( + course_id=course_id, instructor_id=instructor_id + ).count() + if review_count < min_reviews: + msg = ( + f"Skipping course {course_id} / instructor {instructor_id}: " + f"only {review_count} reviews." + ) + self.stdout.write(self.style.WARNING(msg)) + return + latest_id = ( + base_reviews.filter(course_id=course_id, instructor_id=instructor_id) + .order_by("-id") + .values_list("id", flat=True) + .first() + or 0 + ) + pairs = [(course_id, instructor_id, review_count, latest_id)] + else: + agg = ( + base_reviews.values("course_id", "instructor_id") + .annotate(review_count=Count("id"), last_id=Max("id")) + .filter(review_count__gte=min_reviews) + .order_by("-review_count") + ) + if missing_only: + existing_summaries = ReviewLLMSummary.objects.filter( + course_id=OuterRef("course_id"), + instructor_id=OuterRef("instructor_id"), + club__isnull=True, + ) + agg = agg.annotate(has_summary=Exists(existing_summaries)).filter( + has_summary=False + ) + agg = agg[:limit] + pairs = [ + ( + row["course_id"], + row["instructor_id"], + row["review_count"], + row["last_id"], + ) + for row in agg + ] + + if not pairs: + self.stdout.write(self.style.WARNING("No pairs matched criteria.")) + return + + self.stdout.write(f"Processing {len(pairs)} pair(s)...") + + for idx, (course_id, instructor_id, review_count, latest_id) in enumerate( + pairs + ): + course = Course.objects.get(id=course_id) + instructor = Instructor.objects.get(id=instructor_id) + qs = base_reviews.filter(course=course, instructor=instructor).order_by( + "-created" + ) + + if dry_run: + self.stdout.write( + f"[DRY RUN] Would summarize {course.code()} / {instructor.full_name} " + f"(course_id={course_id}, instructor_id={instructor_id}, " + f"{review_count} reviews)" + ) + continue + + summary_text, error, model_used = generate_review_summary( + course, instructor, qs, max_reviews=max_reviews + ) + if not summary_text: + self.stdout.write( + self.style.ERROR( + f"Failed for {course.code()} / {instructor.full_name}: {error}" + ) + ) + continue + + ReviewLLMSummary.objects.update_or_create( + course=course, + instructor=instructor, + club=None, + defaults={ + "summary_text": summary_text, + "source_review_count": min(review_count, max_reviews) + if max_reviews is not None + else review_count, + "last_review_id": latest_id, + "source_metadata": {}, + "model_id": model_used or getattr(settings, "OPENROUTER_MODEL", ""), + }, + ) + + self.stdout.write( + self.style.SUCCESS( + f"Saved summary for {course.code()} / {instructor.full_name} " + f"({review_count} reviews) using {model_used or 'unknown model'}" + ) + ) + + if not dry_run and idx < len(pairs) - 1: + time.sleep(4.0) # avoids rate limits diff --git a/tcf_website/management/commands/generate_ai_summaries_helpers.py b/tcf_website/management/commands/generate_ai_summaries_helpers.py new file mode 100644 index 000000000..d3faa2ca1 --- /dev/null +++ b/tcf_website/management/commands/generate_ai_summaries_helpers.py @@ -0,0 +1,155 @@ +"""Helpers for generating AI summaries of course/instructor reviews.""" + +from __future__ import annotations + +from typing import Iterable, Tuple + +import requests +from django.conf import settings + +from ...models import Review + +SUMMARY_THRESHOLD = 5 +MAX_REVIEWS_FOR_PROMPT = 20 +MAX_REVIEW_CHARS = 500 + + +def _build_prompt(course_code: str, instructor_name: str, reviews: Iterable[Review]): + """Return chat messages for the LLM request.""" + bullets = [] + for review in reviews: + text = (review.text or "").strip() + if not text: + continue + + truncated_text = text[:MAX_REVIEW_CHARS] + ratings = ( + f"instructor {review.instructor_rating}/5, " + f"enjoyability {review.enjoyability}/5, " + f"recommend {review.recommendability}/5, " + f"difficulty {review.difficulty}/5" + ) + bullets.append(f"- Ratings ({ratings}): {truncated_text}") + + joined = "\n".join(bullets) + user_prompt = ( + f"Course: {course_code}\n" + f"Instructor: {instructor_name}\n\n" + "Write a concise 4-6 sentence summary of student reviews. " + "Capture overall sentiment, teaching style, difficulty and workload " + "patterns, and actionable advice for future students. Be neutral, " + "specific, and avoid hedging. If themes conflict, state both. " + "Do not invent details.\n\n" + f"Reviews:\n{joined}" + ) + + return [ + { + "role": "system", + "content": ( + "You summarize university course reviews into clear, honest guidance. " + "Do not preface the summary with headings, labels, or " + "restating the course/instructor; " + "just provide the summary sentences. Keep it 4-6 sentences." + ), + }, + {"role": "user", "content": user_prompt}, + ] + + +def _call_openrouter( + model_name: str, messages +): # pylint: disable=too-many-locals + headers = { + "Authorization": f"Bearer {getattr(settings, 'OPENROUTER_API_KEY', '')}", + "Content-Type": "application/json", + # OpenRouter recommends these headers for usage attribution + "HTTP-Referer": getattr(settings, "OPENROUTER_REFERER", ""), + "Referer": getattr(settings, "OPENROUTER_REFERER", ""), + "X-Title": getattr(settings, "OPENROUTER_TITLE", ""), + } + payload = { + "model": model_name, + "messages": messages, + } + + response = requests.post( + "https://openrouter.ai/api/v1/chat/completions", + headers=headers, + json=payload, + timeout=20, + ) + request_id = ( + response.headers.get("x-request-id") + or response.headers.get("x-openrouter-request-id") + or response.headers.get("x-ratelimit-request-id") + ) + try: + data = response.json() + except ValueError: + data = None + + if not response.ok: + error_info = (data or {}).get("error") or {} + error_msg = ( + error_info.get("message") or response.reason or "Unknown API error" + ) + request_id_str = f", request_id={request_id}" if request_id else "" + return None, f"{response.status_code}: {error_msg}{request_id_str}" + + if data is None: + text_snippet = (response.text or "")[:300].replace("\n", " ").strip() + details = f"status {response.status_code}" + if request_id: + details += f", request_id={request_id}" + if text_snippet: + details += f", body='{text_snippet}'" + return None, f"Invalid JSON response from OpenRouter ({details})." + choices = data.get("choices") or [] + content = "" + if choices: + first = choices[0] or {} + content = ( + first.get("message", {}).get("content") or first.get("text") or "" + ).strip() + if content: + return content, None + + error_info = data.get("error") or {} + error_msg = error_info.get("message") or "Received empty summary from OpenRouter." + keys = sorted(data.keys()) if isinstance(data, dict) else [] + details = f"status {response.status_code}" + if request_id: + details += f", request_id={request_id}" + if keys: + details += f", response_keys={keys}" + return None, f"{error_msg} ({details})." + + +def generate_review_summary( + course, instructor, reviews_qs, max_reviews: int = MAX_REVIEWS_FOR_PROMPT +) -> Tuple[str | None, str | None, str | None]: + """Call OpenRouter to create a summary. Returns (summary, error, model_used).""" + + api_key = getattr(settings, "OPENROUTER_API_KEY", "") + if not api_key: + return None, "OpenRouter API key is missing.", None + + max_reviews = max( + 1, min(max_reviews or MAX_REVIEWS_FOR_PROMPT, MAX_REVIEWS_FOR_PROMPT) + ) + reviews = list(reviews_qs.order_by("-created")[:max_reviews]) # Most recent first + reviews = [r for r in reviews if (r.text or "").strip()] + if not reviews: + return None, "No reviews with text to summarize.", None + + messages = _build_prompt(course.code(), instructor.full_name, reviews) + + model_name = getattr(settings, "OPENROUTER_MODEL", "openrouter/auto") + try: + content, error = _call_openrouter(model_name, messages) + if content: + return content, None, model_name + return None, error, model_name + except Exception as exc: # pylint: disable=broad-except + return None, str(exc), model_name diff --git a/tcf_website/migrations/0024_reviewllmsummary.py b/tcf_website/migrations/0024_reviewllmsummary.py new file mode 100644 index 000000000..8d7ab5a07 --- /dev/null +++ b/tcf_website/migrations/0024_reviewllmsummary.py @@ -0,0 +1,79 @@ +# Generated by Django 4.2.26 on 2025-12-06 23:59 + +from django.db import migrations, models +import django.db.models.deletion +import django.db.models.expressions + + +class Migration(migrations.Migration): + + dependencies = [ + ("tcf_website", "0023_remove_sectionenrollment_section_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="ReviewLLMSummary", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("summary_text", models.TextField()), + ("model_id", models.CharField(default="openrouter/auto", max_length=255)), + ("source_review_count", models.PositiveIntegerField(default=0)), + ("last_review_id", models.IntegerField(default=0)), + ("source_metadata", models.JSONField(default=dict)), + ( + "created_at", + models.DateTimeField(auto_now_add=True, db_column="created_at"), + ), + ( + "updated_at", + models.DateTimeField(auto_now=True, db_column="updated_at"), + ), + ( + "course", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="tcf_website.course", + ), + ), + ( + "instructor", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="tcf_website.instructor", + ), + ), + ( + "club", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + to="tcf_website.club", + ), + ), + ], + options={ + "constraints": [ + models.UniqueConstraint( + condition=django.db.models.expressions.Q(("club__isnull", True)), + fields=("course", "instructor"), + name="unique_summary_per_course_instructor", + ), + models.UniqueConstraint( + condition=django.db.models.expressions.Q(("club__isnull", False)), + fields=("club",), + name="unique_summary_per_club", + ), + ], + }, + ), + ] diff --git a/tcf_website/models/__init__.py b/tcf_website/models/__init__.py index c6e44d132..ff0a77442 100644 --- a/tcf_website/models/__init__.py +++ b/tcf_website/models/__init__.py @@ -17,6 +17,7 @@ Instructor, Question, Review, + ReviewLLMSummary, Schedule, ScheduledCourse, School, diff --git a/tcf_website/models/models.py b/tcf_website/models/models.py index 73f4fcee3..c8c1fce47 100644 --- a/tcf_website/models/models.py +++ b/tcf_website/models/models.py @@ -1418,6 +1418,38 @@ class Meta: ] +class ReviewLLMSummary(models.Model): + """AI-generated summaries for course/instructor page""" + + club = models.ForeignKey(Club, on_delete=models.CASCADE, null=True, blank=True) + course = models.ForeignKey(Course, on_delete=models.CASCADE) + instructor = models.ForeignKey(Instructor, on_delete=models.CASCADE) + summary_text = models.TextField() + model_id = models.CharField(max_length=255, default="openrouter/auto") + source_review_count = models.PositiveIntegerField(default=0) + last_review_id = models.IntegerField(default=0) + source_metadata = models.JSONField(default=dict) + created_at = models.DateTimeField(auto_now_add=True, db_column="created_at") + updated_at = models.DateTimeField(auto_now=True, db_column="updated_at") + + def __str__(self): + return f"Summary for {self.course} / {self.instructor}" + + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["course", "instructor"], + name="unique_summary_per_course_instructor", + condition=models.Q(club__isnull=True), + ), + models.UniqueConstraint( + fields=["club"], + name="unique_summary_per_club", + condition=models.Q(club__isnull=False), + ), + ] + + class Question(models.Model): """Question model. Belongs to a User. @@ -1912,7 +1944,8 @@ def average_schedule_difficulty(self): result = ( ScheduledCourse.objects.filter(schedule=self) .annotate( - course_id=models.F("section__course_id"), # Reference to the course + # Reference to the course + course_id=models.F("section__course_id"), related_instructor_id=models.F( "instructor_id" ), # Reference to the instructor diff --git a/tcf_website/static/course/course_professor.css b/tcf_website/static/course/course_professor.css index d77c52fe7..205fb2a1d 100644 --- a/tcf_website/static/course/course_professor.css +++ b/tcf_website/static/course/course_professor.css @@ -26,6 +26,20 @@ row-gap: 10px; } +.ai-summary-box { + background: #fff; + border: 1px solid #e6e9f2; + border-radius: 8px; + padding: 16px 18px; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.04); + margin-bottom: 40px; +} + +.ai-summary-box .badge-secondary { + background-color: #007bff; + color: #fff; +} + .times-info-container { width: 100%; display: flex; diff --git a/tcf_website/templates/course/course_professor.html b/tcf_website/templates/course/course_professor.html index c17ae8aeb..960095e1a 100644 --- a/tcf_website/templates/course/course_professor.html +++ b/tcf_website/templates/course/course_professor.html @@ -283,6 +283,19 @@
{{ review_summary_text|linebreaksbr }}
++ AI generated based on {{ review_summary_source_count }} review{% if review_summary_source_count != 1 %}s{% endif %}. +
+