diff --git a/src/analyze.ts b/src/analyze.ts index 7c60e6d..727499a 100644 --- a/src/analyze.ts +++ b/src/analyze.ts @@ -21,6 +21,8 @@ type ParsedChunk = { findings: Finding[]; }; +const URL_RE = /https?:\/\/\S+/g; + /** * The LLM stage. This mirrors step (1) of the paper's attack — "extract * identity-relevant features" — but points it at a consenting subject and @@ -478,10 +480,11 @@ ${SCHEMA_HINT}`; const knownProofUrls = new Set(platformProfiles.map((p) => p.profileUrl)); for (const f of parsed.findings ?? []) { for (const e of f.evidence ?? []) { - if (typeof e?.quote === "string") { - for (const m of e.quote.matchAll(/https?:\/\/\S+/g)) { - knownProofUrls.add(m[0].replace(/[),.;!?]+$/, "")); - } + const quote = e?.quote; + if (typeof quote !== "string") continue; + URL_RE.lastIndex = 0; + for (const m of quote.matchAll(URL_RE)) { + knownProofUrls.add(m[0].replace(/[),.;!?]+$/, "")); } } } @@ -492,13 +495,22 @@ ${SCHEMA_HINT}`; } } - const span = - allItems.length > 0 - ? { - firstUtc: Math.min(...allItems.map((i) => i.createdUtc)), - lastUtc: Math.max(...allItems.map((i) => i.createdUtc)), - } - : undefined; + let span: + | { + firstUtc: number; + lastUtc: number; + } + | undefined; + if (allItems.length > 0) { + let firstUtc = allItems[0].createdUtc; + let lastUtc = allItems[0].createdUtc; + for (let i = 1; i < allItems.length; i += 1) { + const createdUtc = allItems[i].createdUtc; + if (createdUtc < firstUtc) firstUtc = createdUtc; + if (createdUtc > lastUtc) lastUtc = createdUtc; + } + span = { firstUtc, lastUtc }; + } // Deterministic identifier extraction. Runs over every item body plus the // model's evidence quotes, so anything regex-detectable lands in the diff --git a/src/extract.ts b/src/extract.ts index b25bb8c..b191616 100644 --- a/src/extract.ts +++ b/src/extract.ts @@ -117,8 +117,9 @@ export function extractSocialHandles(text: string): SocialHandle[] { const seen = new Set(); const out: SocialHandle[] = []; for (const { platform, pattern, reject } of SOCIAL_PATTERNS) { - const re = new RegExp(pattern.source, pattern.flags); - for (const m of text.matchAll(re)) { + // `matchAll` advances lastIndex on global regexes, so reset before reuse. + pattern.lastIndex = 0; + for (const m of text.matchAll(pattern)) { // For mastodon the user is in m[2], otherwise m[1]. const handle = platform === "mastodon" ? `${m[2]}@${m[1]}` : m[1]; const baseHandle = platform === "mastodon" ? m[2] : m[1];