From 56eabac1e4223274763fc4663281840a23ee3a0d Mon Sep 17 00:00:00 2001 From: Phong Le Date: Sun, 1 Feb 2026 14:25:33 -0500 Subject: [PATCH 01/23] AI reviews summary feature --- tcf_core/settings/base.py | 9 + .../commands/generate_ai_summaries.py | 176 ++++++++++++++++++ .../migrations/0024_reviewllmsummary.py | 73 ++++++++ tcf_website/models/__init__.py | 1 + tcf_website/models/models.py | 32 ++++ tcf_website/services/review_summary.py | 127 +++++++++++++ .../static/course/course_professor.css | 8 + .../templates/course/course_professor.html | 23 +++ tcf_website/views/browse.py | 34 ++++ 9 files changed, 483 insertions(+) create mode 100644 tcf_website/management/commands/generate_ai_summaries.py create mode 100644 tcf_website/migrations/0024_reviewllmsummary.py create mode 100644 tcf_website/services/review_summary.py diff --git a/tcf_core/settings/base.py b/tcf_core/settings/base.py index 31ed5d863..c89a13970 100644 --- a/tcf_core/settings/base.py +++ b/tcf_core/settings/base.py @@ -17,6 +17,15 @@ env_file = os.path.join(BASE_DIR, ".env") environ.Env.read_env(env_file) +# AI review summary configuration +OPENROUTER_API_KEY = env.str("OPENROUTER_API_KEY", default="") +OPENROUTER_MODEL = env.str("OPENROUTER_MODEL", default="openrouter/auto") +OPENROUTER_REFERER = env.str( + "OPENROUTER_REFERER", default="https://thecourseforum.com" +) +OPENROUTER_TITLE = env.str("OPENROUTER_TITLE", default="theCourseForum AI Summary") +OPENROUTER_FALLBACK_MODELS = env.list("OPENROUTER_FALLBACK_MODELS", default=[]) + # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = env.str("SECRET_KEY") diff --git a/tcf_website/management/commands/generate_ai_summaries.py b/tcf_website/management/commands/generate_ai_summaries.py new file mode 100644 index 000000000..3c9f48e4d --- /dev/null +++ b/tcf_website/management/commands/generate_ai_summaries.py @@ -0,0 +1,176 @@ +"""command to manually generate AI summaries for course/instructor pairs. +if an AI summary exists for a course/instructor object then it will be displayed on the page +Prereq to use this command is to have openrouter key in .env file (`OPENROUTER_API_KEY`) and model name (`OPENROUTER_MODEL`) + +Usage examples: + # Help command: + docker compose exec web python manage.py generate_ai_summaries help + + # Dry run: show top 20 pairs by written-review count (no LLM calls) + python manage.py generate_ai_summaries --dry-run + OR + docker compose exec web python manage.py generate_ai_summaries --dry-run + + # Generate for top 30 course / instructor pairs with at least 3 written reviews + python manage.py generate_ai_summaries --limit 30 --min-reviews 3 + OR + docker compose exec web python manage.py generate_ai_summaries --limit 30 --min-reviews 3 + + # Generate for a specific course/instructor pair + python manage.py generate_ai_summaries --course-id 1 --instructor-id 4019 + OR + docker compose exec web python manage.py generate_ai_summaries --course-id 1 --instructor-id 4019 +""" + +from collections import defaultdict +from typing import Any + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError +from django.db.models import Count, Max, Q + +from ...models import Course, Instructor, Review, ReviewLLMSummary +from ...services.review_summary import generate_review_summary + + +class Command(BaseCommand): + help = "Generate AI summaries manually for course/instructor pairs." + + def add_arguments(self, parser): + parser.add_argument( + "--limit", + type=int, + default=15, + help="Number of top course/instructor pairs by review count to summarize.", + ) + parser.add_argument( + "--course-id", + type=int, + help="Specific course id to summarize (must be used with --instructor-id).", + ) + parser.add_argument( + "--instructor-id", + type=int, + help="Specific instructor id to summarize (must be used with --course-id).", + ) + parser.add_argument( + "--min-reviews", + type=int, + default=1, + help="Minimum number of written reviews required to generate a summary.", + ) + parser.add_argument( + "--max-reviews", + type=int, + default=25, + help="Use only the top N most recent written reviews in the prompt (default: 25).", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show which pairs would be processed without calling the model.", + ) + + def handle(self, *args: Any, **options: Any): + if not getattr(settings, "OPENROUTER_API_KEY", ""): + raise CommandError("OPENROUTER_API_KEY is not configured.") + + course_id = options.get("course_id") + instructor_id = options.get("instructor_id") + limit = options.get("limit") + min_reviews = options.get("min_reviews") + max_reviews = options.get("max_reviews") or 20 + dry_run = options.get("dry_run") + + if bool(course_id) ^ bool(instructor_id): + raise CommandError( + "Provide both --course-id and --instructor-id together, or neither." + ) + + # Base queryset: visible, non-toxic, with text + base_reviews = Review.objects.filter( + hidden=False, toxicity_rating__lt=settings.TOXICITY_THRESHOLD + ).exclude(text="") + + pairs: list[tuple[int, int, int, int]] = [] + + if course_id and instructor_id: + review_count = base_reviews.filter( + course_id=course_id, instructor_id=instructor_id + ).count() + if review_count < min_reviews: + self.stdout.write( + self.style.WARNING( + f"Skipping course {course_id} / instructor {instructor_id}: only {review_count} reviews." + ) + ) + return + latest_id = ( + base_reviews.filter(course_id=course_id, instructor_id=instructor_id) + .order_by("-id") + .values_list("id", flat=True) + .first() + or 0 + ) + pairs = [(course_id, instructor_id, review_count, latest_id)] + else: + agg = ( + base_reviews.values("course_id", "instructor_id") + .annotate(review_count=Count("id"), last_id=Max("id")) + .filter(review_count__gte=min_reviews) + .order_by("-review_count")[:limit] + ) + pairs = [ + (row["course_id"], row["instructor_id"], row["review_count"], row["last_id"]) + for row in agg + ] + + if not pairs: + self.stdout.write(self.style.WARNING("No pairs matched criteria.")) + return + + self.stdout.write(f"Processing {len(pairs)} pair(s)...") + + for course_id, instructor_id, review_count, latest_id in pairs: + course = Course.objects.get(id=course_id) + instructor = Instructor.objects.get(id=instructor_id) + qs = base_reviews.filter(course=course, instructor=instructor).order_by( + "-created" + ) + + if dry_run: + self.stdout.write( + f"[DRY RUN] Would summarize {course.code()} / {instructor.full_name} " + f"({review_count} reviews)" + ) + continue + + summary_text, error, model_used = generate_review_summary( + course, instructor, qs, max_reviews=max_reviews + ) + if not summary_text: + self.stdout.write( + self.style.ERROR( + f"Failed for {course.code()} / {instructor.full_name}: {error}" + ) + ) + continue + + ReviewLLMSummary.objects.update_or_create( + course=course, + instructor=instructor, + defaults={ + "summary_text": summary_text, + "source_review_count": review_count, + "last_review_id": latest_id, + "source_metadata": {}, + "model_id": model_used or getattr(settings, "OPENROUTER_MODEL", ""), + }, + ) + + self.stdout.write( + self.style.SUCCESS( + f"Saved summary for {course.code()} / {instructor.full_name} " + f"({review_count} reviews) using {model_used or 'unknown model'}" + ) + ) diff --git a/tcf_website/migrations/0024_reviewllmsummary.py b/tcf_website/migrations/0024_reviewllmsummary.py new file mode 100644 index 000000000..3e6c6d90d --- /dev/null +++ b/tcf_website/migrations/0024_reviewllmsummary.py @@ -0,0 +1,73 @@ +# Generated by Django 4.2.26 on 2025-12-06 23:59 + +from django.db import migrations, models +import django.db.models.deletion +import django.db.models.expressions + + +class Migration(migrations.Migration): + + dependencies = [ + ("tcf_website", "0023_remove_sectionenrollment_section_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="ReviewLLMSummary", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("summary_text", models.TextField()), + ("model_id", models.CharField(default="openrouter/auto", max_length=255)), + ("source_review_count", models.PositiveIntegerField(default=0)), + ("last_review_id", models.IntegerField(default=0)), + ("source_metadata", models.JSONField(default=dict)), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "course", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="tcf_website.course", + ), + ), + ( + "instructor", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="tcf_website.instructor", + ), + ), + ( + "club", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + to="tcf_website.club", + ), + ), + ], + options={ + "constraints": [ + models.UniqueConstraint( + condition=django.db.models.expressions.Q(("club__isnull", True)), + fields=("course", "instructor"), + name="unique_summary_per_course_instructor", + ), + models.UniqueConstraint( + condition=django.db.models.expressions.Q(("club__isnull", False)), + fields=("club",), + name="unique_summary_per_club", + ), + ], + }, + ), + ] diff --git a/tcf_website/models/__init__.py b/tcf_website/models/__init__.py index c6e44d132..ff0a77442 100644 --- a/tcf_website/models/__init__.py +++ b/tcf_website/models/__init__.py @@ -17,6 +17,7 @@ Instructor, Question, Review, + ReviewLLMSummary, Schedule, ScheduledCourse, School, diff --git a/tcf_website/models/models.py b/tcf_website/models/models.py index 73f4fcee3..e09ba2d20 100644 --- a/tcf_website/models/models.py +++ b/tcf_website/models/models.py @@ -1418,6 +1418,38 @@ class Meta: ] +class ReviewLLMSummary(models.Model): + """AI-generated summaries for course/instructor page""" + + club = models.ForeignKey(Club, on_delete=models.CASCADE, null=True, blank=True) + course = models.ForeignKey(Course, on_delete=models.CASCADE) + instructor = models.ForeignKey(Instructor, on_delete=models.CASCADE) + summary_text = models.TextField() + model_id = models.CharField(max_length=255, default="openrouter/auto") + source_review_count = models.PositiveIntegerField(default=0) + last_review_id = models.IntegerField(default=0) + source_metadata = models.JSONField(default=dict) + created_at = models.DateTimeField(auto_now_add=True, db_column="created_at") + updated_at = models.DateTimeField(auto_now=True, db_column="updated_at") + + def __str__(self): + return f"Summary for {self.course} / {self.instructor}" + + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["course", "instructor"], + name="unique_summary_per_course_instructor", + condition=models.Q(club__isnull=True), + ), + models.UniqueConstraint( + fields=["club"], + name="unique_summary_per_club", + condition=models.Q(club__isnull=False), + ), + ] + + class Question(models.Model): """Question model. Belongs to a User. diff --git a/tcf_website/services/review_summary.py b/tcf_website/services/review_summary.py new file mode 100644 index 000000000..395a88fc8 --- /dev/null +++ b/tcf_website/services/review_summary.py @@ -0,0 +1,127 @@ +"""Helpers for generating AI summaries of course/instructor reviews.""" + +from __future__ import annotations + +from typing import Iterable, Tuple + +import requests +from django.conf import settings + +from ..models import Review + +SUMMARY_THRESHOLD = 5 +MAX_REVIEWS_FOR_PROMPT = 20 +MAX_REVIEW_CHARS = 500 + + +def _build_prompt(course_code: str, instructor_name: str, reviews: Iterable[Review]): + """Return chat messages for the LLM request.""" + bullets = [] + for review in reviews: + text = (review.text or "").strip() + if not text: + continue + + truncated_text = text[:MAX_REVIEW_CHARS] + bullets.append( + f"- Ratings (instructor {review.instructor_rating}/5, enjoyability {review.enjoyability}/5, recommend {review.recommendability}/5, difficulty {review.difficulty}/5): {truncated_text}" + ) + + joined = "\n".join(bullets) + user_prompt = ( + f"Course: {course_code}\n" + f"Instructor: {instructor_name}\n\n" + "Write a concise 4-6 sentence summary of student reviews. Capture overall sentiment, teaching style, difficulty and workload patterns, and actionable advice for future students. " + "Be neutral, specific, and avoid hedging. If themes conflict, state both. Do not invent details.\n\n" + f"Reviews:\n{joined}" + ) + + return [ + { + "role": "system", + "content": ( + "You summarize university course reviews into clear, honest guidance. " + "Do not preface the summary with headings, labels, or restating the course/instructor; " + "just provide the summary sentences. Keep it 4-6 sentences." + ), + }, + {"role": "user", "content": user_prompt}, + ] + + +def _call_openrouter(model_name: str, messages): + headers = { + "Authorization": f"Bearer {getattr(settings, 'OPENROUTER_API_KEY', '')}", + "Content-Type": "application/json", + # OpenRouter recommends these headers for usage attribution + "HTTP-Referer": getattr(settings, "OPENROUTER_REFERER", ""), + "Referer": getattr(settings, "OPENROUTER_REFERER", ""), + "X-Title": getattr(settings, "OPENROUTER_TITLE", ""), + } + payload = { + "model": model_name, + "messages": messages, + "max_tokens": 220, + "temperature": 0.3, + } + + response = requests.post( + "https://openrouter.ai/api/v1/chat/completions", + headers=headers, + json=payload, + timeout=20, + ) + response.raise_for_status() + data = response.json() + choices = data.get("choices") or [] + content = "" + if choices: + first = choices[0] or {} + content = ( + first.get("message", {}).get("content") + or first.get("text") + or "" + ).strip() + if content: + return content, None + + error_info = data.get("error") or {} + error_msg = error_info.get("message") or "Received empty summary from OpenRouter." + return None, error_msg + + +def generate_review_summary( + course, instructor, reviews_qs, max_reviews: int = MAX_REVIEWS_FOR_PROMPT +) -> Tuple[str | None, str | None, str | None]: + """Call OpenRouter to create a summary. Returns (summary, error, model_used).""" + + api_key = getattr(settings, "OPENROUTER_API_KEY", "") + if not api_key: + return None, "OpenRouter API key is missing.", None + + max_reviews = max(1, min(max_reviews or MAX_REVIEWS_FOR_PROMPT, MAX_REVIEWS_FOR_PROMPT)) + reviews = list( + reviews_qs.order_by("-created")[:max_reviews] + ) # Most recent first + reviews = [r for r in reviews if (r.text or "").strip()] + if not reviews: + return None, "No reviews with text to summarize.", None + + messages = _build_prompt(course.code(), instructor.full_name, reviews) + + primary_model = getattr(settings, "OPENROUTER_MODEL", "openrouter/auto") + fallback_models = getattr(settings, "OPENROUTER_FALLBACK_MODELS", []) or [] + models_to_try = [primary_model] + [m for m in fallback_models if m] + + last_error = None + for model_name in models_to_try: + try: + content, error = _call_openrouter(model_name, messages) + if content: + return content, None, model_name + last_error = error + except Exception as exc: # pylint: disable=broad-except + last_error = str(exc) + continue + + return None, last_error or "No summary generated.", models_to_try[-1] if models_to_try else None diff --git a/tcf_website/static/course/course_professor.css b/tcf_website/static/course/course_professor.css index d77c52fe7..4ca572524 100644 --- a/tcf_website/static/course/course_professor.css +++ b/tcf_website/static/course/course_professor.css @@ -26,6 +26,14 @@ row-gap: 10px; } +.ai-summary-box { + background: #fff; + border: 1px solid #e6e9f2; + border-radius: 8px; + padding: 16px 18px; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.04); +} + .times-info-container { width: 100%; display: flex; diff --git a/tcf_website/templates/course/course_professor.html b/tcf_website/templates/course/course_professor.html index c17ae8aeb..5895947fc 100644 --- a/tcf_website/templates/course/course_professor.html +++ b/tcf_website/templates/course/course_professor.html @@ -283,6 +283,29 @@

{{ course.title }}

}); +
+
+

AI Review Summary

+ Beta +
+ {% if review_summary_text %} +

{{ review_summary_text|linebreaksbr }}

+

+ AI generated based on {{ review_summary_source_count }} review{% if review_summary_source_count != 1 %}s{% endif %}. +

+ {% elif review_summary_state == "no_reviews" %} +

No written reviews yet. Add yours to unlock a summary.

+ {% elif review_summary_state == "fallback" %} +

{{ review_summary_text|default:"" }}

+

Showing last saved summary. Error: {{ review_summary_error }}

+ {% elif review_summary_state == "error" %} +

We couldn't load the AI summary right now. Please try again later.

+ {% if review_summary_error %}

{{ review_summary_error }}

{% endif %} + {% else %} +

AI summary not available yet.

+ {% endif %} +
+