diff --git a/.gitignore b/.gitignore index ca4777d54..1274e9cd2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ failed_feeds.txt !stats.json !sources/ !sources/*.json +!category_summaries.json diff --git a/app.js b/app.js index 20d91cbe0..59decf5ca 100644 --- a/app.js +++ b/app.js @@ -826,6 +826,29 @@ } } + // Cache for AI-generated category summaries loaded from category_summaries.json + let categorySummariesCache = null; + + /** + * Load category_summaries.json once and return the entry for catName, or null. + * Silently ignores fetch errors (file may not exist if summaries have not been generated yet). + */ + async function fetchCategorySummary(catName) { + if (categorySummariesCache === null) { + try { + const res = await fetch('category_summaries.json', { cache: 'no-store' }); + if (res.ok) { + categorySummariesCache = await res.json(); + } else { + categorySummariesCache = {}; + } + } catch (_e) { + categorySummariesCache = {}; + } + } + return categorySummariesCache[catName] || null; + } + function filterByCategory(catName) { currentCategory = catName; const viewTitle = document.getElementById('current-view-title'); @@ -835,12 +858,20 @@ // KÄÄNNÖS: Näytetään käännetty nimi, mutta logiikka käyttää catNamea if (viewTitle) viewTitle.innerText = getTranslation('cat', catName); + // Show a default description immediately, then overlay with an AI summary if available if (viewDesc) viewDesc.innerText = `${t('ui.latest_news_from_the_category')} ${getTranslation('cat', catName)}`; if (logoCont) { logoCont.innerHTML = ""; logoCont.style.display = 'none'; } + // Asynchronously replace the description with the cached AI summary when present + fetchCategorySummary(catName).then(entry => { + if (entry && entry.summary && viewDesc) { + viewDesc.innerText = entry.summary; + } + }); + displayedCount = 0; document.querySelectorAll('.source-item').forEach(i => i.classList.remove('active')); mainFeedCache = null; diff --git a/category_summaries.json b/category_summaries.json new file mode 100644 index 000000000..564aba12d --- /dev/null +++ b/category_summaries.json @@ -0,0 +1,249 @@ +{ + "Global policy": { + "summary": "Recent coverage in Global policy (10 articles from 5 sources) includes \"Presidentti Stubb valtiovierailulle Intiaan\", \"Safeguarding the “Sovereignty of Mind” in the Age of AI\", \"Mitä haluat tietää Iranin sodasta? Kysy – me etsimme vastauksia\".", + "timestamp": "2026-03-02T19:14:05.993Z", + "articleCount": 10, + "topArticles": [ + { + "title": "Presidentti Stubb valtiovierailulle Intiaan", + "link": "https://yle.fi/a/74-20213189?origin=rss" + }, + { + "title": "Safeguarding the “Sovereignty of Mind” in the Age of AI", + "link": "https://www.global-solutions-initiative.org/article/test/" + }, + { + "title": "Mitä haluat tietää Iranin sodasta? Kysy – me etsimme vastauksia", + "link": "https://yle.fi/a/74-20213069?origin=rss" + } + ] + }, + "Human Rights + Democracy": { + "summary": "Recent coverage in Human Rights + Democracy (10 articles from 5 sources) includes \"Zimbabwe: Authorities must investigate brutal attack on constitutional lawyer and political activists\", \"Pakistan: Investigation urgently needed after killings during Iran protests \", \"Borna Naeimi, a Baha’i Citizen, Arrested in Kerman\".", + "timestamp": "2026-03-02T19:12:09.999Z", + "articleCount": 10, + "topArticles": [ + { + "title": "Zimbabwe: Authorities must investigate brutal attack on constitutional lawyer and political activists", + "link": "https://www.amnesty.org/en/latest/news/2026/03/zimbabwe-authorities-must-investigate-brutal-attack-on-constitutional-lawyer-and-political-activists/" + }, + { + "title": "Pakistan: Investigation urgently needed after killings during Iran protests ", + "link": "https://www.amnesty.org/en/latest/news/2026/03/pakistan-investigation-urgently-needed-after-killings-during-iran-protests/" + }, + { + "title": "Borna Naeimi, a Baha’i Citizen, Arrested in Kerman", + "link": "https://www.en-hrana.org/borna-naeimi-a-bahai-citizen-arrested-in-kerman/" + } + ] + }, + "Perspectives": { + "summary": "Recent coverage in Perspectives (10 articles from 3 sources) includes \"Miksi Trump vei Yhdysvallat sotaan, jota amerikkalaiset eivät halua? – Viestit sodan perusteluista ovat hämmentäviä\", \"Mikä show! Suomen tulevat euro­viisu­edustajat juhlivat konfetti­sateessa\", \"Trumpin tullit leikkasivat Suomen vientiä – EU voisi kovistella paremman sopimuksen, EK:n johtaja sanoo\".", + "timestamp": "2026-03-02T19:12:11.006Z", + "articleCount": 10, + "topArticles": [ + { + "title": "Miksi Trump vei Yhdysvallat sotaan, jota amerikkalaiset eivät halua? – Viestit sodan perusteluista ovat hämmentäviä", + "link": "https://suomenkuvalehti.fi/paajutut/miksi-trump-vei-yhdysvallat-sotaan-jota-amerikkalaiset-eivat-halua-viestit-sodan-perusteluista-ovat-hammentavia/" + }, + { + "title": "Mikä show! Suomen tulevat euro­viisu­edustajat juhlivat konfetti­sateessa", + "link": "https://suomenkuvalehti.fi/uutisviikko/mika-show-suomen-euroviisuedustajat-juhlivat-konfettisateessa/" + }, + { + "title": "Trumpin tullit leikkasivat Suomen vientiä – EU voisi kovistella paremman sopimuksen, EK:n johtaja sanoo", + "link": "https://suomenkuvalehti.fi/uutisviikko/trumpin-tullit-leikkasivat-suomen-vientia-eu-voisi-kovistella-paremman-sopimuksen-ekn-johtaja-sanoo/" + } + ] + }, + "Digital policy + rights": { + "summary": "Recent coverage in Digital policy + rights (10 articles from 7 sources) includes \"Rakennusala etsii ratkaisuja datan liikkumiseen – Varasto-hankkeen työpaja avasi keskustelun digitalisaation käytännön askelista \", \"National Book Tour for Cindy Cohn’s Memoir, ‘Privacy’s Defender’\", \"Putin’s Legacy: Nations of Widows and Orphans\".", + "timestamp": "2026-03-02T19:12:12.517Z", + "articleCount": 10, + "topArticles": [ + { + "title": "Rakennusala etsii ratkaisuja datan liikkumiseen – Varasto-hankkeen työpaja avasi keskustelun digitalisaation käytännön askelista ", + "link": "https://tieke.fi/rakennusala-etsii-ratkaisuja-datan-liikkumiseen-varasto-hankkeen-tyopaja-avasi-keskustelun-digitalisaation-kaytannon-askelista/" + }, + { + "title": "National Book Tour for Cindy Cohn’s Memoir, ‘Privacy’s Defender’", + "link": "https://www.eff.org/press/releases/national-book-tour-cindy-cohns-memoir-privacys-defender" + }, + { + "title": "Putin’s Legacy: Nations of Widows and Orphans", + "link": "https://cepa.org/article/putins-legacy-nations-of-widows-and-orphans/" + } + ] + }, + "(Open) Science + Research": { + "summary": "Recent coverage in (Open) Science + Research (10 articles from 5 sources) includes \"Yhteiskunnallisen turvallisuuden hub aloittaa toimintansa\", \"Siitepölykausi on alkanut Suomessa\", \"Reasearch visit: Carbon observation exchanges in China\".", + "timestamp": "2026-03-02T19:12:12.517Z", + "articleCount": 10, + "topArticles": [ + { + "title": "Yhteiskunnallisen turvallisuuden hub aloittaa toimintansa", + "link": "https://www.helsinki.fi/fi/valtiotieteellinen-tiedekunta/ajankohtaista/yhteiskunnallisen-turvallisuuden-hub-aloittaa-toimintansa" + }, + { + "title": "Siitepölykausi on alkanut Suomessa", + "link": "https://www.utu.fi/fi/ajankohtaista/mediatiedote/siitepolykausi-on-alkanut-suomessa" + }, + { + "title": "Reasearch visit: Carbon observation exchanges in China", + "link": "https://www.helsinki.fi/en/researchgroups/micrometeorology/micrometeorology-news/reasearch-visit-carbon-observation-exchanges-in-china" + } + ] + }, + "Politics + Government": { + "summary": "Recent coverage in Politics + Government (10 articles from 5 sources) includes \"Commission Unveils Updated Learning Opportunities for Customs and Tax Professionals\", \"‘Hope for Indigenous peoples’: An unexpected birth could bring this Amazon tribe back from the brink\", \"Flood of AI-generated ICE videos risks undermining trust in real footage, experts warn\".", + "timestamp": "2026-03-02T19:12:14.530Z", + "articleCount": 10, + "topArticles": [ + { + "title": "Commission Unveils Updated Learning Opportunities for Customs and Tax Professionals", + "link": "https://taxation-customs.ec.europa.eu/news/commission-unveils-updated-learning-opportunities-customs-and-tax-professionals-2026-03-02_en" + }, + { + "title": "‘Hope for Indigenous peoples’: An unexpected birth could bring this Amazon tribe back from the brink", + "link": "http://www.euronews.com/my-europe/2026/03/02/hope-for-indigenous-peoples-an-unexpected-birth-could-bring-this-amazon-tribe-back-from-th" + }, + { + "title": "Flood of AI-generated ICE videos risks undermining trust in real footage, experts warn", + "link": "http://www.euronews.com/my-europe/2026/03/02/flood-of-ai-generated-ice-videos-risks-undermining-trust-in-real-footage-experts-warn" + } + ] + }, + "(Open) Data + Technology": { + "summary": "Recent coverage in (Open) Data + Technology (10 articles from 4 sources) includes \"‘The digital colonization of flyover states’: how datacenters are tearing small-town America apart\", \"Stardew Valley at 10: the anticapitalist game that cures burnout and inspires queer art\", \"I’m on the Meta Oversight Board. We need AI protections now | Suzanne Nossel\".", + "timestamp": "2026-03-02T19:12:14.531Z", + "articleCount": 10, + "topArticles": [ + { + "title": "‘The digital colonization of flyover states’: how datacenters are tearing small-town America apart", + "link": "https://www.theguardian.com/us-news/2026/mar/02/amazon-data-centers-small-towns" + }, + { + "title": "Stardew Valley at 10: the anticapitalist game that cures burnout and inspires queer art", + "link": "https://www.theguardian.com/games/2026/mar/02/stardew-valley-at-10-the-anticapitalist-game-that-cures-burnout-and-inspires-queer-art" + }, + { + "title": "I’m on the Meta Oversight Board. We need AI protections now | Suzanne Nossel", + "link": "https://www.theguardian.com/commentisfree/2026/mar/02/meta-oversight-board-ai" + } + ] + }, + "Cultural policy": { + "summary": "Recent coverage in Cultural policy (10 articles from 4 sources) includes \"Libraries Advancing Community Engagement in Africa and Europe\", \"Europe Day 2026: Add Your Event to the Shared European Agenda\", \"Tervetuloa seuraamaan Kotuksen juhlaseminaaria!\".", + "timestamp": "2026-03-02T19:12:15.035Z", + "articleCount": 10, + "topArticles": [ + { + "title": "Libraries Advancing Community Engagement in Africa and Europe", + "link": "https://culturalfoundation.eu/stories/libraries-advancing-community-engagement-in-africa-and-europe/" + }, + { + "title": "Europe Day 2026: Add Your Event to the Shared European Agenda", + "link": "https://culturalfoundation.eu/stories/europe-day-2026-add-your-event-to-the-shared-european-agenda/" + }, + { + "title": "Tervetuloa seuraamaan Kotuksen juhlaseminaaria!", + "link": "https://kotus.fi/tervetuloa-seuraamaan-juhlaseminaaria/" + } + ] + }, + "Culture": { + "summary": "Recent coverage in Culture (10 articles from 8 sources) includes \"MERITA Platform announces 44 young chamber music ensembles from across Europe in the frame of MERITAcubed\", \"Hae EDUFI-harjoitteluun Tukholmaan!\", \"Sök EDUFI-praktik hos oss för hösten 2026!\".", + "timestamp": "2026-03-02T19:12:15.537Z", + "articleCount": 10, + "topArticles": [ + { + "title": "MERITA Platform announces 44 young chamber music ensembles from across Europe in the frame of MERITAcubed", + "link": "https://www.europanostra.org/merita-platform-announces-44-young-chamber-music-ensembles-from-across-europe-in-the-frame-of-meritacubed/" + }, + { + "title": "Hae EDUFI-harjoitteluun Tukholmaan!", + "link": "https://finlandsinstitutet.se/fi/hae-edufi-harjoitteluun-tukholmaan/" + }, + { + "title": "Sök EDUFI-praktik hos oss för hösten 2026!", + "link": "https://finlandsinstitutet.se/sok-praktikplats-for-hosten-2026/" + } + ] + }, + "Sustainability + Foresight": { + "summary": "Recent coverage in Sustainability + Foresight (10 articles from 4 sources) includes \"Analysis: Half of nations meet UN deadline for nature-loss reporting\", \"Kääntyykö Béla Bartók haudassaan oopperansa uustulkinnasta?\", \"Laaja sidosryhmäjoukko osallistui ympäristöministeriön strategiatyöhön liittyvään nykytilakyselyyn\".", + "timestamp": "2026-03-02T19:12:17.045Z", + "articleCount": 10, + "topArticles": [ + { + "title": "Analysis: Half of nations meet UN deadline for nature-loss reporting", + "link": "https://www.carbonbrief.org/analysis-half-of-nations-meet-un-deadline-for-nature-loss-reporting/" + }, + { + "title": "Kääntyykö Béla Bartók haudassaan oopperansa uustulkinnasta?", + "link": "https://www.verdelehti.fi/2026/03/02/kaantyyko-bela-bartok-haudassaan-oopperansa-uustulkinnasta/" + }, + { + "title": "Laaja sidosryhmäjoukko osallistui ympäristöministeriön strategiatyöhön liittyvään nykytilakyselyyn", + "link": "https://ym.fi/-/laaja-sidosryhmajoukko-osallistui-ymparistoministerion-strategiatyohon-liittyvaan-nykytilakyselyyn" + } + ] + }, + "Digital cultural heritage": { + "summary": "Recent coverage in Digital cultural heritage (10 articles from 6 sources) includes \"ENCATC Education and Research Sessions: Call for proposals\", \"Celebrating “Humans of AI”: A Journey Into Public-Interest Technology\", \"Behold the First Realistic Depiction of the Human Face (Circa 25,000 BCE)\".", + "timestamp": "2026-03-02T19:12:17.046Z", + "articleCount": 10, + "topArticles": [ + { + "title": "ENCATC Education and Research Sessions: Call for proposals", + "link": "https://www.echoes-eccch.eu/2026-encatc-education-and-research-sessions/" + }, + { + "title": "Celebrating “Humans of AI”: A Journey Into Public-Interest Technology", + "link": "https://www.internetarchive.eu/2026/03/02/celebrating-humans-of-ai-a-journey-into-public-interest-technology/" + }, + { + "title": "Behold the First Realistic Depiction of the Human Face (Circa 25,000 BCE)", + "link": "https://www.openculture.com/2026/03/behold-the-first-realistic-depiction-of-the-human-face-circa-25000-bce.html" + } + ] + }, + "Journalism + investigation": { + "summary": "Recent coverage in Journalism + investigation (10 articles from 5 sources) includes \"March 2026 Office Hours\", \"ProPublica Sues Education Department for Withholding Records About Discrimination in Schools\", \"Russia’s Information Grip on Ukraine’s Occupied Territories\".", + "timestamp": "2026-03-02T19:12:17.046Z", + "articleCount": 10, + "topArticles": [ + { + "title": "March 2026 Office Hours", + "link": "https://www.datarescueproject.org/march26-officehours/" + }, + { + "title": "ProPublica Sues Education Department for Withholding Records About Discrimination in Schools", + "link": "https://www.propublica.org/article/education-department-civil-rights-office-foia-lawsuit" + }, + { + "title": "Russia’s Information Grip on Ukraine’s Occupied Territories", + "link": "https://euvsdisinfo.eu/russias-information-grip-on-ukraines-occupied-territories/" + } + ] + }, + "Digital skills + Citizen science": { + "summary": "Recent coverage in Digital skills + Citizen science (10 articles from 3 sources) includes \"Pool-Strategic 2025 – Sustaining Free Knowledge in Tshiluba and Kikongo\", \"A pause, not an end: Reflecting on WikiAfrica Hour’s journey in 2025\", \"Sesotho: A tale of two different orthographies\".", + "timestamp": "2026-03-02T19:12:17.046Z", + "articleCount": 10, + "topArticles": [ + { + "title": "Pool-Strategic 2025 – Sustaining Free Knowledge in Tshiluba and Kikongo", + "link": "https://diff.wikimedia.org/2026/02/28/pool-strategic-2025-sustaining-free-knowledge-in-tshiluba-and-kikongo/" + }, + { + "title": "A pause, not an end: Reflecting on WikiAfrica Hour’s journey in 2025", + "link": "https://diff.wikimedia.org/2026/02/28/a-pause-not-an-end-reflecting-on-wikiafrica-hours-journey-in-2025/" + }, + { + "title": "Sesotho: A tale of two different orthographies", + "link": "https://diff.wikimedia.org/2026/02/27/sesotho-a-tale-of-two-different-orthographies/" + } + ] + } +} \ No newline at end of file diff --git a/generate_summaries.js b/generate_summaries.js new file mode 100644 index 000000000..a8c37e4b1 --- /dev/null +++ b/generate_summaries.js @@ -0,0 +1,343 @@ +/** + * generate_summaries.js + * + * On-demand AI category summary generator. + * + * Usage: + * node generate_summaries.js # Generate all categories + * node generate_summaries.js "Category Name" # Generate a single category + * node generate_summaries.js --force # Bypass cache and regenerate + * node generate_summaries.js --server # Start optional HTTP API server + * + * Environment variables: + * OPENAI_API_KEY – If set, uses OpenAI GPT to produce AI summaries. + * If unset, falls back to a lightweight extractive summary. + * SUMMARIES_PORT – Port for the optional HTTP server (default: 3001). + */ + +'use strict'; + +const axios = require('axios'); +const fs = require('fs'); +const path = require('path'); +const cheerio = require('cheerio'); +const http = require('http'); + +// --------------------------------------------------------------------------- +// Configuration +// --------------------------------------------------------------------------- + +const CACHE_FILE = path.join(__dirname, 'category_summaries.json'); +const DATA_FILE = path.join(__dirname, 'data.json'); + +const CACHE_MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24 hours +const MAX_ARTICLES_PER_CATEGORY = 10; +const THROTTLE_MS = 500; +const CRAWL_TIMEOUT_MS = 10000; + +// Marker for an empty HTML body that fetch_news.js sometimes produces +const EMPTY_CONTENT_MARKER = ''; + +// --------------------------------------------------------------------------- +// Cache helpers +// --------------------------------------------------------------------------- + +function loadCache() { + if (fs.existsSync(CACHE_FILE)) { + try { + return JSON.parse(fs.readFileSync(CACHE_FILE, 'utf8')); + } catch (_e) { + return {}; + } + } + return {}; +} + +function saveCache(cache) { + fs.writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2)); +} + +function isCacheFresh(entry) { + if (!entry || !entry.timestamp) return false; + const ageMs = Date.now() - new Date(entry.timestamp).getTime(); + return ageMs < CACHE_MAX_AGE_MS; +} + +// --------------------------------------------------------------------------- +// Content crawling +// --------------------------------------------------------------------------- + +/** + * Fetch and extract meaningful plain text from an article URL. + * Returns null on any error. + */ +async function crawlArticleContent(url) { + try { + const response = await axios.get(url, { + timeout: CRAWL_TIMEOUT_MS, + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) OpenMag-Robot-v1' + } + }); + const $ = cheerio.load(response.data); + // Remove non-content elements + $('script, style, nav, footer, header, aside, form, iframe').remove(); + // Prefer semantic content containers + const text = ($('article').first().text() || + $('main').first().text() || + $('[class*="content"]').first().text() || + $('body').text()) + .replace(/\s+/g, ' ') + .trim(); + return text.substring(0, 2000) || null; + } catch (_e) { + return null; + } +} + +// --------------------------------------------------------------------------- +// Summary generation +// --------------------------------------------------------------------------- + +/** + * Lightweight extractive fallback when no OpenAI key is available. + */ +function extractiveSummary(categoryName, articles) { + const top3 = articles.slice(0, 3); + const titles = top3.map(a => `"${a.title}"`).join(', '); + const sourceCount = new Set(articles.map(a => a.source)).size; + return ( + `Recent coverage in ${categoryName} (${articles.length} articles from ` + + `${sourceCount} source${sourceCount !== 1 ? 's' : ''}) includes ${titles}.` + ); +} + +/** + * Generate a summary using the OpenAI Chat API. + * Throws on API errors so the caller can fall back gracefully. + */ +async function openAISummary(categoryName, articles) { + const articlesText = articles + .map((a, i) => `${i + 1}. "${a.title}" (${a.source})\n${a.content || ''}`) + .join('\n\n'); + + const prompt = + `The following are recent news articles from the "${categoryName}" category.\n` + + `Provide a concise 2-3 sentence summary of the main themes and topics covered. ` + + `Reference the top 3 most significant articles by title.\n\n` + + `${articlesText}\n\nSummary:`; + + const response = await axios.post( + 'https://api.openai.com/v1/chat/completions', + { + model: process.env.OPENAI_MODEL || 'gpt-3.5-turbo', + messages: [{ role: 'user', content: prompt }], + max_tokens: 300, + temperature: 0.5 + }, + { + headers: { + Authorization: `Bearer ${process.env.OPENAI_API_KEY}`, + 'Content-Type': 'application/json' + }, + timeout: 30000 + } + ); + + return response.data.choices[0].message.content.trim(); +} + +/** + * Choose AI or extractive summarisation based on API key availability. + */ +async function generateSummary(categoryName, articles) { + if (process.env.OPENAI_API_KEY) { + return openAISummary(categoryName, articles); + } + return extractiveSummary(categoryName, articles); +} + +// --------------------------------------------------------------------------- +// Core logic +// --------------------------------------------------------------------------- + +/** + * Main entry point for generating summaries. + * + * @param {string|null} targetCategory – If set, only process this category. + * @param {boolean} force – Bypass cache and regenerate. + * @returns {{ generated: number, cached: number }} + */ +async function generateCategorySummaries(targetCategory = null, force = false) { + if (!fs.existsSync(DATA_FILE)) { + throw new Error('data.json not found. Run fetch_news.js first.'); + } + + const articles = JSON.parse(fs.readFileSync(DATA_FILE, 'utf8')); + const cache = loadCache(); + + // Group articles by category + const byCategory = {}; + articles.forEach(art => { + const cat = art.sheetCategory || 'General'; + if (!byCategory[cat]) byCategory[cat] = []; + byCategory[cat].push(art); + }); + + // Determine which categories to process + let categories = Object.keys(byCategory); + if (targetCategory) { + if (!byCategory[targetCategory]) { + const available = categories.join(', '); + throw new Error( + `Category "${targetCategory}" not found in data.json. ` + + `Available categories: ${available}` + ); + } + categories = [targetCategory]; + } + + let generated = 0; + let cached = 0; + + for (const catName of categories) { + // Check cache freshness + if (!force && isCacheFresh(cache[catName])) { + console.log(`[CACHED] ${catName}`); + cached++; + continue; + } + + console.log(`[GENERATING] ${catName}...`); + + // Top 10 most recent articles for the category + const catArticles = byCategory[catName] + .sort((a, b) => new Date(b.pubDate) - new Date(a.pubDate)) + .slice(0, MAX_ARTICLES_PER_CATEGORY); + + // Enrich articles that lack meaningful content + const enriched = []; + for (const art of catArticles) { + const hasContent = + art.content && + art.content.trim().length > 100 && + art.content !== EMPTY_CONTENT_MARKER; + + let content = hasContent ? art.content : null; + + if (!content && art.link) { + console.log(` Crawling: ${art.link.substring(0, 70)}...`); + content = await crawlArticleContent(art.link); + // Throttle requests to external sources + await new Promise(r => setTimeout(r, THROTTLE_MS)); + } + + enriched.push({ + title: art.title || '', + source: art.sourceTitle || '', + content: content || art.title || '' + }); + } + + try { + const summary = await generateSummary(catName, enriched); + const topArticles = catArticles.slice(0, 3).map(a => ({ + title: a.title || 'Untitled', + link: a.link || '' + })); + + cache[catName] = { + summary, + timestamp: new Date().toISOString(), + articleCount: catArticles.length, + topArticles + }; + + saveCache(cache); + console.log(`[DONE] ${catName}: ${summary.substring(0, 80)}...`); + generated++; + } catch (err) { + console.error(`[ERROR] ${catName}: ${err.message}`); + } + } + + console.log(`\nFinished. Generated: ${generated}, Cached: ${cached}`); + return { generated, cached }; +} + +// --------------------------------------------------------------------------- +// Optional HTTP server (POST /api/summaries/refresh, GET /api/summaries) +// --------------------------------------------------------------------------- + +function startServer(port) { + const server = http.createServer(async (req, res) => { + if (req.method === 'POST' && req.url === '/api/summaries/refresh') { + let body = ''; + req.on('data', chunk => { body += chunk; }); + req.on('end', async () => { + let params = {}; + try { params = JSON.parse(body || '{}'); } catch (_e) { /* ignore */ } + const { category = null, force = false } = params; + try { + const result = await generateCategorySummaries(category, force); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: true, ...result })); + } catch (err) { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: false, error: err.message })); + } + }); + } else if (req.method === 'GET' && req.url === '/api/summaries') { + const cache = loadCache(); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(cache)); + } else { + res.writeHead(404); + res.end(); + } + }); + + server.listen(port, () => { + console.log(`Summaries server listening on port ${port}`); + console.log(' POST /api/summaries/refresh – Generate summaries on demand'); + console.log(' GET /api/summaries – Read cached summaries'); + }); +} + +// --------------------------------------------------------------------------- +// CLI entry point +// --------------------------------------------------------------------------- + +// Only run CLI logic when this file is executed directly (not required as module) +if (require.main === module) { + const args = process.argv.slice(2); + const force = args.includes('--force'); + const serverMode = args.includes('--server'); + const targetCategory = args.find(a => !a.startsWith('--')) || null; + + if (serverMode) { + const port = parseInt(process.env.SUMMARIES_PORT || '3001', 10); + startServer(port); + } else { + generateCategorySummaries(targetCategory, force) + .then(() => process.exit(process.exitCode || 0)) + .catch(err => { + console.error('Fatal error:', err.message); + process.exit(1); + }); + } +} + +// --------------------------------------------------------------------------- +// Module exports (for programmatic use and testing) +// --------------------------------------------------------------------------- + +module.exports = { + generateCategorySummaries, + loadCache, + saveCache, + isCacheFresh, + crawlArticleContent, + generateSummary, + extractiveSummary +};