diff --git a/src/agents/backChannel/backChannelInsightsGenerator.ts b/src/agents/backChannel/backChannelInsightsGenerator.ts index e057fc55..8b252603 100644 --- a/src/agents/backChannel/backChannelInsightsGenerator.ts +++ b/src/agents/backChannel/backChannelInsightsGenerator.ts @@ -7,8 +7,8 @@ import transcript from '../helpers/transcript.js' import logger from '../../config/logger.js' import responseFormatSchemas from '../helpers/responseFormatSchemas.js' import { getStructuredResponseChain } from '../helpers/llmChain.js' -import { BackChannelAgentResponse } from './backChannel.types.js' -import filterHallucinations from './hallucinations.js' +import { BackChannelAgentResponse, Insight } from './backChannel.types.js' +import filterHallucinations from '../helpers/hallucinations.js' import { AgentResponse } from '../../types/index.types.js' export const backChannelLLMTemplates = { @@ -282,7 +282,12 @@ export async function processParticipantMessages(messages, startTime, endTime) { timestamp: { start: startTime.getTime(), end: endTime.getTime() }, // filter insights with hallucinated comments - could not be stopped with prompt - yet! ;) // and sometimes real comments will be modified by the LLM (e.g. modifying punctuation or fixing typos) - insights: await filterHallucinations(llmResponse.results, messages) + insights: (llmResponse.results as Insight[]).filter((result) => { + const claims = result.comments.filter((c) => c.user).map((c) => ({ participant: c.user!, text: c.text })) + const verified = filterHallucinations(claims, messages) + if (!verified) logger.info(`Dropping insight with unverified claims: ${JSON.stringify(result, null, 2)}`) + return verified + }) } if (!response.insights?.length) return [] diff --git a/src/agents/backChannel/hallucinations.ts b/src/agents/backChannel/hallucinations.ts deleted file mode 100644 index f78a7ee3..00000000 --- a/src/agents/backChannel/hallucinations.ts +++ /dev/null @@ -1,41 +0,0 @@ -import * as fuzzball from 'fuzzball' -import logger from '../../config/logger.js' - -const matchThreshold = 70 - -export default async function filterHallucinations(responses, messages) { - return responses.filter((response) => { - const hallucinated = response.comments.some((comment) => { - // Check if we can find a user match at threshold % or higher - const bestUserMatch = messages.reduce((best, m) => { - const score = fuzzball.ratio(comment.user, m.pseudonym) - return score > (best?.score || 0) ? { message: m, score } : best - }, null) - - if (!bestUserMatch || bestUserMatch.score < matchThreshold) { - logger.info( - `Could not find user match (>=${matchThreshold}%) for: "${comment.user}" (best: ${bestUserMatch?.score || 0}%)` - ) - return true // Mark as hallucinated - } - - // Check if we can find a text match at threshold % or higher - const bestTextMatch = messages.reduce((best, m) => { - const score = fuzzball.partial_ratio(comment.text, m.bodyType === 'json' ? m.body.text : m.body) - return score > (best?.score || 0) ? { message: m, score } : best - }, null) - - if (!bestTextMatch || bestTextMatch.score < matchThreshold) { - logger.info( - `Could not find text match (>=${matchThreshold}%) for: "${comment.text}" (best: ${bestTextMatch?.score || 0}%)` - ) - return true // Mark as hallucinated - } - return false // Not hallucinated - }) - if (hallucinated) { - logger.info(`Hallucinated comment: ${JSON.stringify(response, null, 2)}`) - } - return !hallucinated - }) -} diff --git a/src/agents/engagement/engagementAgent.ts b/src/agents/engagement/engagementAgent.ts index 3fe7eb01..0d10a294 100644 --- a/src/agents/engagement/engagementAgent.ts +++ b/src/agents/engagement/engagementAgent.ts @@ -1,16 +1,16 @@ import verify from '../helpers/verify.js' -import { AgentMessageActions, ConversationHistory } from '../../types/index.types.js' +import { AgentMessageActions, ConversationHistory, IChannel } from '../../types/index.types.js' import { defaultLLMModel, defaultLLMPlatform } from '../helpers/getModelChat.js' import logger from '../../config/logger.js' import getConversationHistory from '../helpers/getConversationHistory.js' import { - detectInterventionOpportunity, + detectPublicInterventionOpportunity, getInterventionAnalysisSchema, buildInterventionTypeSection, USER_TEMPLATE, interventionLlmTemplateVars } from '../helpers/interventionHandler.js' -import { InterventionType } from '../helpers/interventionTypes.js' +import { InterventionType, InterventionAnalysis } from '../helpers/interventionTypes.js' /** * Default examples for engagement intervention types @@ -163,7 +163,7 @@ export default verify({ endTime: conversationHistory.end }) - const interventionAnalysis = await detectInterventionOpportunity.call( + const interventionAnalysis = await detectPublicInterventionOpportunity.call( this, sharedChatHistory, getEngagementSystemPrompt(this.agentConfig?.personality), @@ -183,12 +183,10 @@ export default verify({ if (interventionAnalysis.sharedChatMessage) { return [ { + ...interventionAnalysis, visible: true, message: interventionAnalysis.sharedChatMessage, - channels: this.conversation.channels.filter((c) => c.name === 'chat'), - context: `Intervention Type: ${interventionAnalysis.interventionType}\nReasoning: ${ - interventionAnalysis.reasoning - }\nPattern: ${interventionAnalysis.detectedPattern || 'N/A'}` + channels: this.conversation.channels.filter((c: IChannel) => c.name === 'chat') } ] } @@ -196,6 +194,36 @@ export default verify({ return [] }, + formatTraceInput(conversationHistory: ConversationHistory) { + return { + transcript: conversationHistory.messages.map((m) => ({ + role: m.fromAgent ? 'agent' : 'participant', + pseudonym: m.pseudonym, + text: m.bodyType === 'json' ? (m.body as { text?: string })?.text : m.body, + createdAt: m.createdAt + })) + } + }, + + formatTraceOutput(responses: InterventionAnalysis[]) { + if (responses.length === 0) return { interventionType: 'NONE', messageSent: null } + const r = responses[0] + return { + interventionType: r.interventionType, + reasoning: r.reasoning, + confidenceScore: r.confidenceScore, + detectedPattern: r.detectedPattern, + messageSent: r.sharedChatMessage + } + }, + + getTraceMetadata(_conversationHistory: ConversationHistory, _userMessage: unknown, responses: InterventionAnalysis[]) { + return { + topic: this.conversation.name, + context: responses[0]?.context + } + }, + async start() { return true }, diff --git a/src/agents/eventAssistant/CHECKIN_HANDLER.md b/src/agents/eventAssistant/CHECKIN_HANDLER.md new file mode 100644 index 00000000..ecaf929e --- /dev/null +++ b/src/agents/eventAssistant/CHECKIN_HANDLER.md @@ -0,0 +1,154 @@ +# Private Check-in Handler + +## What It Is + +A component of the Event Assistant that runs periodically during live events, scanning each participant's private DM history and deciding whether to proactively reach out. Unlike the Event Assistant's Q&A mode (which responds to what participants ask), the check-in handler acts on patterns — things the participant hasn't said directly but that their messages reveal over time. + +Every message a participant sends in DMs receives a direct Q&A response. The check-in handler is for what those responses can't cover: the accumulation of hesitation across a conversation, the social isolation of feeling like the only doubter in the room, or the silence of someone who got left behind by a fast-moving section. + +## Check-in Types + +The handler chooses from among these types, or sends nothing. Most cycles produce nothing — silence is the default. + +- **SOCIAL_REASSURANCE** — Detects a *pattern* of hesitation, self-minimization, or dissent across a participant's own messages and names it warmly. The goal is to normalize their way of showing up and reduce self-censorship. Requires at least 3 participant messages showing the pattern — never fires on fewer, since the Q&A response already addressed each individual message. + +- **NOT_ALONE** — Sends when other participants are privately expressing the same doubt or hesitation as this one. Tells them their reaction is shared, not unique, without naming anyone or revealing specifics. Requires verified cross-participant evidence (`sourceMessages`). Can trigger on a single message from the target if the cross-participant signal is clear. + +- **INTEREST_BRIDGE** — Lets a participant know others are privately asking about the same topic. About shared curiosity, not shared anxiety. Requires the participant to have sent at least one DM (so there's a topic to bridge to) and cross-participant evidence of shared interest. + +- **TRANSCRIPT_HOOK** — After a dense or fast-moving transcript section, reaches out to a participant who has been quiet during that section. Names a specific topic and removes the burden of question formation. Never sends a generic "things moved fast" message. Determined by a single shared LLM evaluation per cycle — not repeated per participant. + +- **NONE** — No message needed. The most common outcome. + +## Why SOCIAL_REASSURANCE and NOT_ALONE Are Distinct + +Both are about reassurance, but they differ in trigger, evidence, and message: + +| | SOCIAL_REASSURANCE | NOT_ALONE | +|---|---|---| +| **Trigger** | Participant's own pattern across 3+ messages | Cross-participant shared doubt/hesitation | +| **Evidence required** | Pattern in this participant's DM history | Verified `sourceMessages` from other participants | +| **Can fire on single message?** | No | Yes, if cross-participant signal is strong | +| **Message addresses** | Their own recurring pattern | That they are not alone in the room | +| **`sourceMessages` used?** | Never | Always | + +## Eligibility Gating + +Before the LLM is called for a participant, each type is checked for eligibility. If no types are eligible, the LLM call is skipped entirely. + +| Type | Eligible when | +|---|---| +| SOCIAL_REASSURANCE | Participant has sent ≥3 DMs | +| NOT_ALONE | Participant has sent ≥1 DM, and at least one other participant has sent a DM | +| INTEREST_BRIDGE | Participant has sent ≥1 DM, and at least one other participant has sent a DM | +| TRANSCRIPT_HOOK | Transcript density LLM evaluation (shared, runs once per cycle) returns dense=true, AND participant has been quiet during the density window | + +Only eligible types appear in the system prompt and schema — the LLM never sees types it can't use. + +## Shared Evaluations + +Some check-in types require expensive determinations that would be wasteful to repeat per participant. These run once before the participant loop via `evaluateShared`, and results are passed to each participant's `isEligible` check. + +Currently: **TRANSCRIPT_HOOK** runs a single LLM call to determine whether the recent transcript was dense or fast-moving, and which topic was covered. This result is reused across all 50 participants in the cycle. + +This pattern extends naturally to future types that need shared context. + +## Hallucination Prevention + +Any message that implies others share a sentiment (NOT_ALONE, INTEREST_BRIDGE) requires the LLM to populate `sourceMessages` — an array of `{participant, text}` pairs. Before sending, the handler verifies each cited pair against actual messages using fuzzy matching (≥70% on both pseudonym and text). If verification fails, the message is suppressed. This mirrors the backChannel hallucination filter and prevents fabricated social proof. + +`sourceMessages` is a server-side audit field — it is never shown to the recipient. + +## Configuration (`agentConfig`) + +| Property | Default | Description | +|---|---|---| +| `checkinScanInterval` | `3` (minutes) | How often the handler runs and the transcript density window | +| `minInterval` | `10` (minutes) | Minimum time between check-ins to the same participant | + +`checkinScanInterval` controls three things in sync: the timer period, the transcript lookback window for density evaluation, and the quiet window used to determine whether a participant has been silent during the dense section. + +## SOCIAL_REASSURANCE: Signals + +The handler looks for accumulation of these signals across at least 3 messages: + +- Repeatedly apologizing for or minimizing their own contribution before making it + - *"Sorry if this is obvious, but..."* + - *"I'm probably wrong but..."* + - *"Not sure this is worth asking, but..."* + +- Asking whether they're the only one feeling a certain way + - *"Is it just me or does this feel underdeveloped?"* + - *"Am I the only one who finds this idealistic?"* + - *"Does anyone else feel like this only works in certain industries?"* + +- Keeping their own experience at arm's length, describing it in the third person + - *"Some people might feel..."* + - *"I could imagine someone thinking..."* + +- Going quiet or pulling back after a point of friction or pushback + - Confident dissent followed by progressive retreat: *"I disagree"* → *"maybe I'm wrong about that"* → *"never mind, probably not worth getting into"* + +- Stacking multiple qualifiers in a single message in a way that suggests it almost didn't get sent + - *"I'm not sure, maybe, probably, I could be wrong, but..."* + +## SOCIAL_REASSURANCE: Example Messages + +*(Pattern of self-minimization)* +> "The questions you keep almost not sending are usually the most worth asking." + +*(Recurring dissent or skepticism)* +> "Keeping a running doubt alive across a whole conversation usually means you're onto something. That's worth staying with." + +*(Pulling back after friction)* +> "If something landed sideways, you don't have to frame it as a question — one word is enough." + +*(General pattern of hedging)* +> "Nothing here requires certainty. Uncertainty usually means you're paying close attention." + +## NOT_ALONE: Example Messages + +> "A few others are privately sitting with similar questions — you're in good company." + +> "You're not the only one circling that. Others are privately sitting with something similar, even if it's not coming up in the main chat." + +> "That hesitation is more common in this room than you might think." + +## INTEREST_BRIDGE: Signals + +The handler looks for a topic that multiple participants are privately asking about, independently. Shared curiosity — not shared anxiety. The goal is to let each participant know their interest isn't isolated, without revealing who else is asking or what they said. + +- Multiple participants asking different-but-convergent questions about the same area + - *"How does compensation parity actually work?"* / *"Are part-time roles paid proportionally?"* / *"What about salary bands?"* +- A participant expressing genuine interest or wanting to go deeper on a topic that others are privately exploring + - *"I'd love to know more about the handoff design piece"* paired with others asking similar follow-ups + +The message names the shared topic area in general terms — it never quotes or closely paraphrases another participant's words. + +The LLM must populate `sourceMessages` with the specific messages that establish cross-participant interest. These are verified before sending. + +## INTEREST_BRIDGE: Example Messages + +*(Multiple participants privately asking about compensation parity)* +> "A few people have been privately asking about compensation structures — seems like it's resonating beyond what's come up in the main chat." + +*(Shared curiosity about a specific framework)* +> "You're not the only one thinking about the handoff design piece. There's more interest in that than the conversation has had space for." + +> "Several people are privately exploring the same topic. Might come up in Q&A, or happy to dig into it here." + +## TRANSCRIPT_HOOK: Example Messages + +*(After a dense section on five job-design frameworks)* +> "We just moved through a lot on scope compression and handoff design. Happy to go deeper, make connections, or just sit with any part of it — no need to phrase it as a question." + +> "That compensation parity section moved fast. If anything in there didn't land, just point me at it — one word is enough." + +## Design Principles + +- **Pattern, not instance.** The Q&A loop handles individual messages. Check-ins are for what the Q&A loop can't see: the accumulation across a conversation. +- **Neurodiversity-affirming.** Every question is worth asking. Hesitation and dissent are signs of careful attention, not deficiency. Messages never imply the participant is doing something wrong. +- **Privacy-preserving by design.** The handler sees all participants' DMs but never reveals any individual's contribution. Cross-participant signals are surfaced only through abstraction — "others are sitting with something similar" — never by quoting or hinting at specific messages. +- **Restraint is the default.** Most cycles produce nothing. The handler's credibility depends on not over-messaging. A second check-in is suppressed until `minInterval` minutes have passed since the last one. +- **Rate limiting is per-participant.** The interval check is scoped to each participant's own DM channel, not the shared chat — so one participant receiving a check-in doesn't block another from receiving one. +- **LLM calls are skipped when nothing is eligible.** Eligibility gating means a participant who has never messaged and is in a sparse-transcript cycle never incurs an LLM call. diff --git a/src/agents/eventAssistant/checkinHandler.ts b/src/agents/eventAssistant/checkinHandler.ts new file mode 100644 index 00000000..2b0a572a --- /dev/null +++ b/src/agents/eventAssistant/checkinHandler.ts @@ -0,0 +1,398 @@ +import { z } from 'zod' +import { ConversationHistory, IChannel } from '../../types/index.types.js' + +import getConversationHistory from '../helpers/getConversationHistory.js' +import { + detectPrivateInterventionOpportunity, + buildInterventionTypeSection, + USER_TEMPLATE +} from '../helpers/interventionHandler.js' +import logger from '../../config/logger.js' +import filterHallucinations from '../helpers/hallucinations.js' +import transcript from '../helpers/transcript.js' +import { getChatPromptResponse } from '../helpers/llmChain.js' + +interface AgentLike { + agentConfig?: { checkinScanInterval?: number; minInterval?: number; [key: string]: unknown } + conversation: { channels: IChannel[]; [key: string]: unknown } + getLLM(): Promise + name: string +} + +/** + * Types of private check-in interventions the agent can send to individual participants. + * Distinct from InterventionType (public chat interventions) + * + * To add a new type: + * 1. Add it here + * 2. Add an entry in checkinTypeInfo below + */ +export enum PrivateCheckinType { + SOCIAL_REASSURANCE = 'SOCIAL_REASSURANCE', + NOT_ALONE = 'NOT_ALONE', + INTEREST_BRIDGE = 'INTEREST_BRIDGE', + TRANSCRIPT_HOOK = 'TRANSCRIPT_HOOK', + NONE = 'NONE' +} + +interface CheckinAnalysis { + shouldIntervene: boolean + checkinType: PrivateCheckinType + reasoning: string + directMessage?: string | null + confidenceScore: number + detectedPattern?: string | null + sourceMessages?: { participant: string; text: string }[] | null + context?: string +} + +/** + * Context passed to evaluateShared — available once before the participant loop. + */ +interface SharedCheckinContext { + sharedChatHistory: ConversationHistory + allDmHistory: ConversationHistory + agentInstance: AgentLike +} + +/** + * Context passed to isEligible — available per participant. + */ +interface ParticipantCheckinContext { + participantDmHistory: ConversationHistory + participantPseudonym: string + allDmHistory: ConversationHistory + endTime: Date | undefined + agentInstance: AgentLike +} + +/** + * Full definition for a check-in type. + * + * evaluateShared: optional, runs once before the participant loop. Return value is passed + * to isEligible as sharedResult. Use for expensive shared determinations (e.g. transcript + * density) that would otherwise be repeated per participant. + * + * isEligible: optional, runs per participant using the shared result. If it returns false, + * the type is excluded from that participant's prompt and schema, and the LLM call is + * skipped entirely if no types remain eligible. + */ +interface CheckinTypeDefinition { + description: string + register: string + examples: string[] + evaluateShared?: (context: SharedCheckinContext) => Promise + isEligible?: (context: ParticipantCheckinContext, sharedResult: TranscriptDensityResult) => boolean +} + +const transcriptDensitySchema = z.object({ + isDense: z.boolean().describe('Whether the transcript section was genuinely dense or fast-moving'), + topic: z.string().nullable().describe('The specific topic that was dense, if isDense is true — null otherwise') +}) + +type TranscriptDensityResult = z.infer | null + +const checkinTypeInfo: Record = { + [PrivateCheckinType.SOCIAL_REASSURANCE]: { + description: `Goal: acknowledge a recurring pattern of hesitation, self-doubt, or dissent in this participant's own messages, normalize it, and reduce pressure to perform or conform. + +Only send when the same signal has appeared in at least 3 separate messages from this participant. A single message — no matter how hedged — does not qualify, because the Q&A response already addressed it. Do not use sourceMessages for this type — it is about the individual's own pattern, not cross-participant comparison. Look for accumulation across turns: + +- Repeatedly apologizing for or minimizing their own contribution before making it ("sorry if this is obvious", "I'm probably wrong but", "not sure this is worth asking") +- Asking whether they're the only one feeling a certain way — checking if their reaction is legitimate +- Keeping their own experience at arm's length, talking about it as if it belongs to someone else ("some people might feel...", "I could imagine someone thinking...") +- Going quiet or pulling back after a moment of friction or pushback +- Stacking multiple qualifiers in a single message in a way that suggests the question almost didn't get sent`, + register: 'Always warm', + examples: [ + '(Repeated self-minimization) "The questions you keep almost not sending are usually the most worth asking."', + '(Recurring dissent or skepticism) "Keeping a running doubt alive across a whole conversation usually means you\'re onto something. That\'s worth staying with."', + '(Pulling back after friction) "If something landed sideways, you don\'t have to frame it as a question — one word is enough."', + '(General pattern of hedging) "Nothing here requires certainty. Uncertainty usually means you\'re paying close attention."' + ], + isEligible: ({ participantDmHistory }) => participantDmHistory.messages.filter((m) => !m.fromAgent).length >= 3 + }, + [PrivateCheckinType.NOT_ALONE]: { + description: `Goal: reduce isolation by letting this participant know others in the room are privately feeling the same doubt, hesitation, or uncertainty — without naming anyone or revealing specifics. + +Only send when you have verified evidence in the private messages that at least one other participant is expressing the same sentiment. Populate sourceMessages with the specific messages that support the claim — these will be verified. Do not send without sourceMessages. This type can trigger on a single message from the target participant if the cross-participant signal is clear. + +The message is entirely about solidarity — that their reaction is shared, not unique to them. Do not address their individual pattern here; use SOCIAL_REASSURANCE for that.`, + register: 'Warm', + examples: [ + '"A few others are privately sitting with similar questions — you\'re in good company."', + '"You\'re not the only one circling that. Others are privately sitting with something similar, even if it\'s not coming up in the main chat."', + '"That hesitation is more common in this room than you might think."' + ], + isEligible: ({ participantPseudonym, allDmHistory, participantDmHistory }) => + participantDmHistory.messages.some((m) => !m.fromAgent) && + allDmHistory.messages.some((m) => !m.fromAgent && m.pseudonym !== participantPseudonym) + }, + [PrivateCheckinType.INTEREST_BRIDGE]: { + description: + 'Let a participant know others are privately asking about the same topic — shared curiosity, not shared anxiety', + register: 'Warm', + examples: [ + '"A few people have been privately asking about [topic area] — seems like it\'s resonating beyond what\'s come up in the main chat."', + '"You\'re not the only one thinking about [topic area]. There\'s more interest in that than the conversation has had space for."' + ], + isEligible: ({ participantPseudonym, allDmHistory, participantDmHistory }) => + participantDmHistory.messages.some((m) => !m.fromAgent) && + allDmHistory.messages.some((m) => !m.fromAgent && m.pseudonym !== participantPseudonym) + }, + [PrivateCheckinType.TRANSCRIPT_HOOK]: { + description: + 'After a dense or fast-moving section of the transcript, reach out to a participant who has been quiet — name the specific topic and offer to dig into any part of it. Removes the burden of question formation entirely. Only send if the transcript was genuinely dense or fast-moving AND the participant has been quiet recently. Always name a specific topic — never send a generic "things moved fast" message.', + register: 'Warm, low-pressure', + examples: [ + '"We just moved through a lot on [specific topic]. Happy to go deeper, make connections, or just sit with any part of it, no need to phrase it as a question."', + '"That [topic] section moved fast. If anything in there didn\'t land, just point me at it — one word is enough."' + ], + evaluateShared: async ({ sharedChatHistory, agentInstance }) => { + const windowSeconds = (agentInstance.agentConfig?.checkinScanInterval ?? 3) * 60 + const recentTranscript = transcript.getTranscript(agentInstance.conversation, windowSeconds, sharedChatHistory.end) + if (!recentTranscript?.trim()) return { isDense: false, topic: null } + + try { + const llm = await agentInstance.getLLM() + return await getChatPromptResponse( + llm, + 'You are evaluating whether a recent event transcript section was dense or fast-moving enough to warrant a check-in with participants who have been quiet. Dense means: multiple concepts delivered quickly, significant information density, or a pace that could leave people behind. Sparse means: introductory remarks, pauses, light content, or a slow pace.', + 'Recent transcript:\n{recentTranscript}\n\nWas this section genuinely dense or fast-moving? If yes, name the primary topic.', + { recentTranscript }, + [], + transcriptDensitySchema + ) + } catch (err) { + logger.warn(`[checkinHandler] transcript density evaluation failed: ${err}`) + return { isDense: false, topic: null } + } + }, + isEligible: ({ participantDmHistory, endTime, agentInstance }, sharedResult) => { + if (!sharedResult?.isDense) return false + const windowMs = (agentInstance.agentConfig?.checkinScanInterval ?? 3) * 60 * 1000 + const cutoff = endTime ? new Date(endTime.getTime() - windowMs) : new Date(0) + return !participantDmHistory.messages.some((m) => !m.fromAgent && m.createdAt && m.createdAt > cutoff) + } + }, + [PrivateCheckinType.NONE]: { + description: 'No message needed — silence is the right call', + register: 'N/A', + examples: [] + } +} + +function getCheckinDmAnalysisSchema(eligibleTypes: PrivateCheckinType[]) { + const checkinTypeStrings = [...eligibleTypes.map((t) => t.toString()), PrivateCheckinType.NONE.toString()] as unknown as [ + string, + ...string[] + ] + return z.object({ + shouldIntervene: z.boolean().describe('Whether to send a check-in message'), + checkinType: z.enum(checkinTypeStrings).describe('The type of check-in to send'), + reasoning: z + .string() + .describe('Internal analysis of what signals you detected and why you are or are not sending a message'), + directMessage: z + .string() + .nullable() + .optional() + .describe('The direct message to send to this participant, if shouldIntervene is true'), + confidenceScore: z.number().min(0).max(100).describe('Confidence in this decision'), + detectedPattern: z.string().nullable().optional().describe('Brief description of the pattern detected'), + sourceMessages: z + .array( + z.object({ + participant: z.string().describe('Pseudonym of the participant'), + text: z + .string() + .describe( + 'The specific text from their message that supports this claim — must be a close quote, not a paraphrase' + ) + }) + ) + .nullable() + .optional() + .describe( + 'If your message implies others share this view or interest, provide the exact source messages here. Required whenever you reference shared sentiment or shared interest. Leave null if the message is only about this participant.' + ) + }) +} + +function buildCheckinSystemPrompt(participantPseudonym: string, eligibleTypes: PrivateCheckinType[]): string { + const typeSections = eligibleTypes.map((t) => buildInterventionTypeSection(t, checkinTypeInfo[t])).join('\n\n') + const transcriptNote = eligibleTypes.includes(PrivateCheckinType.TRANSCRIPT_HOOK) + ? '\n- Recent transcript (last 10 minutes) — use this to identify the dense section for TRANSCRIPT_HOOK' + : '' + + return `You are a supportive AI assistant at a live event, reaching out privately to individual participants when there is a meaningful reason to do so. + +## Who you are writing to +You are composing a private message for **${participantPseudonym}**. The "Private Messages" section contains DMs from all participants — use the others only to understand shared patterns, never as content to surface directly. + +## What you are looking at +- Shared chat history — includes ${participantPseudonym}'s public activity +- Private messages — includes ${participantPseudonym}'s DM history and other participants' DMs${transcriptNote} + +## Voice + +Always warm. Never clinical, never over-affirming, never sycophantic. Neurodiversity-affirming: every question is worth asking; never imply otherwise. 1-3 sentences maximum. + +## Rules + +- Write only to ${participantPseudonym} — the directMessage field is sent privately to them alone +- Never mention that you analyzed messages or used AI to detect patterns +- Never quote or closely paraphrase any participant's words +- Never name or hint at any other participant +- Before sending, check your recent posts: have you already said something similar to this participant? If so, choose NONE unless their situation has meaningfully evolved since then. +- Never repeat a theme unless something has genuinely changed — a new signal, a new message, a new pattern. +- Vary your check-in types. +- Silence is the default. Most cycles should produce no message. + +## Check-in Types + +${typeSections}` +} + +/** + * Main entry point called from eventAssistant.respond() when triggered periodically. + * Iterates over each participant's DM channel and decides whether to send a check-in. + * Called with `this` = agent instance. + */ +export async function buildCheckinResponses(conversationHistory: ConversationHistory) { + const responses: object[] = [] + + // Small chance there are duplicate direct channels with the same name due to React StrictMode double-invoking effects in development. De-dup just in case, to avoid duplicate messages. + const directChannels: IChannel[] = Array.from( + new Map( + this.conversation.channels + .filter( + (channel: IChannel) => + channel.direct && channel.participants?.some((p) => p._id?.toString() === this._id.toString()) + ) + .map((channel: IChannel) => [channel.name, channel]) + ).values() + ) + + if (directChannels.length === 0) return responses + + const sharedChatHistory = getConversationHistory(conversationHistory.messages, { + count: 100, + channels: ['chat'], + endTime: conversationHistory.end + }) + + // All DM history across all participant channels — passed as private context to each LLM call, + // same as eventMediator. The LLM reasons about shared patterns across participants semantically. + const allDmHistory = getConversationHistory( + conversationHistory.messages, + { count: 100, directMessages: true, endTime: conversationHistory.end }, + null, + this.conversation.channels.filter((c: IChannel) => c.direct).map((c: IChannel) => c.name) + ) + + // Run shared evaluations once in parallel before the participant loop. + // Types with evaluateShared perform expensive shared determinations (e.g. transcript density) + // that would otherwise be repeated per participant. + const activeTypes = Object.values(PrivateCheckinType).filter((t) => t !== PrivateCheckinType.NONE) + const sharedContext: SharedCheckinContext = { sharedChatHistory, allDmHistory, agentInstance: this } + const sharedResults: Partial> = Object.fromEntries( + await Promise.all( + activeTypes + .filter((t) => checkinTypeInfo[t].evaluateShared) + .map(async (t) => [t, await checkinTypeInfo[t].evaluateShared!(sharedContext)]) + ) + ) + + for (const channel of directChannels) { + const channelMessages = conversationHistory.messages.filter((m) => m.channels?.includes(channel.name)) + + // Resolve pseudonym — needed for the system prompt regardless of intervention type + const participantMessage = channelMessages.find((m) => !m.fromAgent) + const participantPseudonym = participantMessage?.pseudonym || 'participant' + + const participantDmHistory = getConversationHistory(channelMessages, { + count: 50, + endTime: conversationHistory.end + }) + + const participantContext: ParticipantCheckinContext = { + participantDmHistory, + participantPseudonym, + allDmHistory, + endTime: conversationHistory.end ?? undefined, + agentInstance: this + } + + // Filter to types eligible for this participant. Types without isEligible always pass through. + const eligibleTypes = activeTypes.filter((t) => { + const { isEligible } = checkinTypeInfo[t] + return !isEligible || isEligible(participantContext, sharedResults[t] ?? null) + }) + + if (eligibleTypes.length === 0) { + logger.debug(`[checkinHandler] no eligible types for ${participantPseudonym} — skipping LLM call`) + continue + } + + const systemPrompt = buildCheckinSystemPrompt(participantPseudonym, eligibleTypes) + const schema = getCheckinDmAnalysisSchema(eligibleTypes) + + // detectInterventionOpportunity handles rate limiting (scoped to this DM channel via + // participantDmHistory as rateLimitHistory) and the DB race guard (scoped to channel.name). + // allDmHistory is passed as privateConversationHistory so the LLM has full cross-participant + // context for reasoning. The schema uses `directMessage` instead of + // `sharedChatMessage`, so the professionalism check inside is skipped — appropriate here. + const analysis = (await detectPrivateInterventionOpportunity.call( + this, + sharedChatHistory, + systemPrompt, + schema, + allDmHistory, + participantDmHistory, + USER_TEMPLATE + )) as unknown as CheckinAnalysis | null + + if (!analysis?.directMessage) { + logger.debug( + `Checkin Handler: No intervention opportunity detected or rate limited for participant ${participantPseudonym}` + ) + continue + } + + // If the message implies cross-participant patterns, verify cited participant+text pairs are real. + // Mirrors backChannel hallucination filtering: the LLM must cite sources it can actually see. + if (analysis.sourceMessages?.length) { + const otherParticipantMessages = [...allDmHistory.messages, ...sharedChatHistory.messages].filter( + (m) => !m.fromAgent && m.pseudonym !== participantPseudonym + ) + if (!filterHallucinations(analysis.sourceMessages, otherParticipantMessages)) { + logger.warn( + `CheckinHandler: suppressed hallucinated cross-participant claim for ${participantPseudonym}: cited ${JSON.stringify( + analysis.sourceMessages + )}` + ) + continue + } + } + + logger.info( + `Checkin Handler: ${analysis.checkinType} → ${participantPseudonym} (${channel.name}): ${analysis.detectedPattern}` + ) + responses.push({ + visible: true, + message: { type: 'checkin', text: analysis.directMessage }, + messageType: 'json', + channels: [channel], + context: analysis.context, + participantPseudonym, + eligibleTypes, + checkinType: analysis.checkinType, + reasoning: analysis.reasoning, + confidenceScore: analysis.confidenceScore, + detectedPattern: analysis.detectedPattern + }) + } + + return responses +} diff --git a/src/agents/eventAssistant/eventAssistant.ts b/src/agents/eventAssistant/eventAssistant.ts index 82c31313..5eba53e4 100644 --- a/src/agents/eventAssistant/eventAssistant.ts +++ b/src/agents/eventAssistant/eventAssistant.ts @@ -1,5 +1,6 @@ import verify from '../helpers/verify.js' -import { AgentMessageActions, ConversationHistory } from '../../types/index.types.js' +import { AgentMessageActions, ConversationHistory, IChannel, IMessage } from '../../types/index.types.js' +import { buildCheckinResponses } from './checkinHandler.js' import renderAgentTemplate from '../helpers/renderAgentTemplate.js' import Message from '../../models/message.model.js' @@ -84,7 +85,9 @@ ${capabilityDescription}` } else { commandHint = 'Just type / if you want to see what else I can do.' } - return `Hi! I'm ${botName}, your private, anonymous support during this session. ${body}${commandHint ? ` ${commandHint}` : ''} Your pseudonym keeps you anonymous, and nothing you share is ever used to train AI models. No need to respond, just know I'm here.` + return `Hi! I'm ${botName}, your private, anonymous support during this session. ${body}${ + commandHint ? ` ${commandHint}` : '' + } Your pseudonym keeps you anonymous, and nothing you share is ever used to train AI models. No need to respond, just know I'm here.` } const MODERATOR_MESSAGE_TYPES = new Set(['moderator_offered', 'moderator_submitted', 'moderator_declined']) @@ -233,20 +236,39 @@ function offerModeratorSubmission(userMessage, agentResponses, conversation) { } } +const DEFAULT_CHECKIN_SCAN_INTERVAL_MINUTES = 3 + +type TraceResponse = { + message?: unknown + context?: string + participantPseudonym?: string + eligibleTypes?: string[] + checkinType?: string + confidenceScore?: number + detectedPattern?: string + reasoning?: string + promptType?: string + topic?: string + channels?: IChannel[] +} + export default verify({ name: 'Event Assistant', description: 'An assistant to answer questions about an event', priority: 100, maxTokens: 2000, defaultTriggers: { - perMessage: { directMessages: true, channels: ['chat', 'image-gen'], allowMessagesFromAgents: true } + perMessage: { directMessages: true, channels: ['chat', 'image-gen'], allowMessagesFromAgents: true }, + periodic: { timerPeriod: DEFAULT_CHECKIN_SCAN_INTERVAL_MINUTES * 60 } }, agentConfig: { chatIntroMessage: `Welcome! I'm {{agentConfig.botName}}, your AI event assistant. This is a space to chat with other event participants. You can also ask me questions with an @{{agentConfig.botName}} mention. Just remember that everyone can see what you ask me here. Use the {{agentConfig.botName}} tab if you want to talk privately. Have fun!`, enablePersonality: config.enableAgentPersonality, zoomChatIntroMessage: "Welcome! I'm {{agentConfig.botName}}, your AI event assistant. You can ask me questions in the chat with an @{{agentConfig.botName}} mention. Or send me a DM if you want to talk privately.", - tools: getDefaultEventAssistantToolNames() + tools: getDefaultEventAssistantToolNames(), + minInterval: 10, // minimum minutes between check-ins per participant + checkinScanInterval: DEFAULT_CHECKIN_SCAN_INTERVAL_MINUTES // how often the check-in handler runs (minutes); also controls transcript density window and quiet window }, llmTemplateVars: eventAssistantLlmTemplateVars, defaultLLMTemplates: eventAssistantLLMTemplates, @@ -265,6 +287,14 @@ export default verify({ defaultConversationHistorySettings: { count: 100, directMessages: true, channels: ['chat'] }, async evaluate(userMessage) { + if (!userMessage) { + return { + action: AgentMessageActions.CONTRIBUTE, + userMessage, + userContributionVisible: true, + suggestion: undefined + } + } if (userMessage.fromAgent) { // Handle image generation requests from self if (userMessage?.channels?.includes('image-gen')) { @@ -305,6 +335,11 @@ export default verify({ } }, async respond(conversationHistory: ConversationHistory, userMessage) { + // Periodic check-in tick (no userMessage means this was triggered by the periodic job) + if (!userMessage) { + return buildCheckinResponses.call(this, conversationHistory) + } + // Handle image generation requests from self if (userMessage?.channels?.includes('image-gen')) { const imageResponse = await generateImageResponse(userMessage, this.conversation) @@ -397,15 +432,36 @@ export default verify({ return [] }, - formatTraceInput(conversationHistory, userMessage) { + formatTraceInput(_conversationHistory: ConversationHistory, userMessage: IMessage | undefined) { + if (!userMessage) return { trigger: 'periodic' } return userMessage?.body }, - formatTraceOutput(responses) { + formatTraceOutput(responses: TraceResponse[]) { + if (responses.length > 0 && (responses[0]?.message as { type?: string })?.type === 'checkin') { + return responses.map((r) => ({ + participant: r.participantPseudonym, + eligibleTypes: r.eligibleTypes, + checkinType: r.checkinType, + confidenceScore: r.confidenceScore, + detectedPattern: r.detectedPattern, + reasoning: r.reasoning, + messageSent: (r.message as { text?: string })?.text + })) + } return responses[0]?.message }, - getTraceMetadata(conversationHistory, userMessage, responses) { + getTraceMetadata(conversationHistory: ConversationHistory, userMessage: IMessage | undefined, responses: TraceResponse[]) { + if (!userMessage) { + return { + triggerType: 'periodic', + topic: this.conversation.name, + context: responses + .map((r) => `# Participant: ${r.participantPseudonym} (${r.channels?.[0]?.name})\n\n${r.context}`) + .join('\n\n---\n\n') + } + } return { context: responses[0]?.context, conversationHistory, diff --git a/src/agents/eventMediator/eventMediator.ts b/src/agents/eventMediator/eventMediator.ts index 3fb9b55f..9f5165bb 100644 --- a/src/agents/eventMediator/eventMediator.ts +++ b/src/agents/eventMediator/eventMediator.ts @@ -4,11 +4,11 @@ import { defaultLLMModel, defaultLLMPlatform } from '../helpers/getModelChat.js' import { USER_TEMPLATE, interventionLlmTemplateVars, - detectInterventionOpportunity, + detectPublicInterventionOpportunity, getInterventionAnalysisSchema, buildInterventionTypeSection } from '../helpers/interventionHandler.js' -import { InterventionType } from '../helpers/interventionTypes.js' +import { InterventionType, InterventionAnalysis } from '../helpers/interventionTypes.js' import getConversationHistory from '../helpers/getConversationHistory.js' import logger from '../../config/logger.js' @@ -191,7 +191,7 @@ export default verify({ this.conversation.channels.filter((c: IChannel) => c.direct).map((c: IChannel) => c.name) ) - const interventionAnalysis = await detectInterventionOpportunity.call( + const interventionAnalysis = await detectPublicInterventionOpportunity.call( this, sharedChatHistory, getMediatorSystemPrompt(this.agentConfig.personality), @@ -212,18 +212,46 @@ export default verify({ if (interventionAnalysis.sharedChatMessage) { responses.push({ + ...interventionAnalysis, visible: true, message: interventionAnalysis.sharedChatMessage, - channels: this.conversation.channels.filter((c: IChannel) => c.name === 'chat'), - context: `Intervention Type: ${interventionAnalysis.interventionType}\nReasoning: ${ - interventionAnalysis.reasoning - }\nPattern: ${interventionAnalysis.detectedPattern || 'N/A'}` - }) + channels: this.conversation.channels.filter((c: IChannel) => c.name === 'chat') + } as AgentResponse>) } return responses }, + formatTraceInput(conversationHistory: ConversationHistory) { + return { + transcript: conversationHistory.messages.map((m) => ({ + role: m.fromAgent ? 'agent' : 'participant', + pseudonym: m.pseudonym, + text: m.bodyType === 'json' ? (m.body as { text?: string })?.text : m.body, + createdAt: m.createdAt + })) + } + }, + + formatTraceOutput(responses: InterventionAnalysis[]) { + if (responses.length === 0) return { interventionType: 'NONE', messageSent: null } + const r = responses[0] + return { + interventionType: r.interventionType, + reasoning: r.reasoning, + confidenceScore: r.confidenceScore, + detectedPattern: r.detectedPattern, + messageSent: r.sharedChatMessage + } + }, + + getTraceMetadata(_conversationHistory: ConversationHistory, _userMessage: unknown, responses: InterventionAnalysis[]) { + return { + topic: this.conversation.name, + context: responses[0]?.context + } + }, + async start() { return true }, diff --git a/src/agents/helpers/hallucinations.ts b/src/agents/helpers/hallucinations.ts new file mode 100644 index 00000000..16b40f95 --- /dev/null +++ b/src/agents/helpers/hallucinations.ts @@ -0,0 +1,47 @@ +import * as fuzzball from 'fuzzball' +import logger from '../../config/logger.js' + +const matchThreshold = 70 + +interface SourceClaim { + participant: string + text: string +} + +/** + * Verifies that every cited {participant, text} pair fuzzy-matches a real participant + * and that the cited text fuzzy-matches something they actually said. + * Returns true if all claims are verified, false if any cannot be matched. + */ +export default function filterHallucinations( + claims: SourceClaim[], + messages: { pseudonym?: string; body?: unknown; bodyType?: string }[] +): boolean { + return claims.every((claim) => { + const participantMessages = messages.filter( + (m) => fuzzball.ratio(claim.participant, m.pseudonym ?? '') >= matchThreshold + ) + + if (participantMessages.length === 0) { + logger.info(`Could not find participant match (>=${matchThreshold}%) for cited pseudonym: "${claim.participant}"`) + return false + } + + const bestTextMatch = participantMessages.reduce((best, m) => { + const body = m.bodyType === 'json' ? (m.body as { text?: string })?.text : m.body + const score = fuzzball.partial_ratio(claim.text, typeof body === 'string' ? body : '') + return score > (best?.score || 0) ? { score } : best + }, null as { score: number } | null) + + if (!bestTextMatch || bestTextMatch.score < matchThreshold) { + logger.info( + `Could not find text match (>=${matchThreshold}%) for cited text: "${claim.text}" from "${ + claim.participant + }" (best: ${bestTextMatch?.score || 0}%)` + ) + return false + } + + return true + }) +} diff --git a/src/agents/helpers/interventionHandler.ts b/src/agents/helpers/interventionHandler.ts index de9027b3..c9268640 100644 --- a/src/agents/helpers/interventionHandler.ts +++ b/src/agents/helpers/interventionHandler.ts @@ -117,47 +117,30 @@ function getRecentAgentInterventions(conversationHistory: ConversationHistory): } /** - * Main intervention detection function - * @param conversationHistory - Shared chat history - * @param privateConversationHistory - Private/DM history (can be null if not needed based on category config) - * @param categoryConfig - Optional category configuration (defaults to all enabled) + * Shared LLM evaluation core: formats histories, retrieves transcript/RAG context, calls the LLM, + * checks confidence and professionalism, and attaches the trace context string. + * Rate limiting and DB race guard are handled by the two public wrappers below. */ -export async function detectInterventionOpportunity( - conversationHistory: ConversationHistory, +async function runInterventionAnalysis( + sharedChatHistory: ConversationHistory, baseSystemPrompt: string, schema: z.ZodSchema, - privateConversationHistory?: ConversationHistory | null, - sharedChatChannel: string = 'chat' + privateConversationHistory: ConversationHistory | null, + userTemplate: string | undefined ): Promise { - // Use conversationHistory.end as "now" to maintain consistent time simulation - // This allows tests and the system to reason about specific moments in time - const now = conversationHistory.end ? conversationHistory.end.getTime() : Date.now() - const minInterval = (this.agentConfig?.minInterval || 2) * 60 * 1000 // Convert minutes to milliseconds - // Get recent interventions from conversation history (stateless rate limiting) - const recentInterventions = getRecentAgentInterventions(conversationHistory) - - // Rate limiting: Check if an agent intervened recently - const lastIntervention = recentInterventions[recentInterventions.length - 1] - if (lastIntervention) { - const timeSinceLastIntervention = now - lastIntervention.timestamp.getTime() - if (timeSinceLastIntervention < minInterval) { - return null // Too soon since last intervention - } - } - // Format conversation histories - const sharedChatMessages = formatMultiUserConversationHistory(conversationHistory) + const sharedChatMessages = formatMultiUserConversationHistory(sharedChatHistory) const privateMessages = privateConversationHistory ? formatMultiUserConversationHistory(privateConversationHistory) : [] // Get recent transcript (last 10 minutes) - const recentTranscript = transcript.getTranscript(this.conversation, 600, conversationHistory.end) + const recentTranscript = transcript.getTranscript(this.conversation, 600, sharedChatHistory.end) // Get relevant context via RAG - use both private and public messages to find relevant transcript chunks const allMessages = [...sharedChatMessages, ...privateMessages].map((m) => m.content).join('\n') - const { chunks } = await transcript.searchTranscript(this.conversation, allMessages, conversationHistory.end) + const { chunks } = await transcript.searchTranscript(this.conversation, allMessages, sharedChatHistory.end) // Get agent's recent posts for self-awareness - const agentRecentPosts = getAgentRecentPosts(conversationHistory, this.name, 5) + const agentRecentPosts = getAgentRecentPosts(sharedChatHistory, this.name, 5) // Determine which personality to use (if any) let personalityName: string | null = null @@ -168,23 +151,26 @@ export async function detectInterventionOpportunity( } const systemPrompt = buildSystemPromptWithPersonality(baseSystemPrompt, personalityName) + const resolvedUserTemplate = userTemplate ?? this.llmTemplates.user ?? USER_TEMPLATE + + const templateVars = { + topic: this.conversation.name, + recentTranscript, + retrievedChunks: chunks, + privateMessages: privateMessages.map((m) => m.content).join('\n') || 'No private messages.', + sharedChatHistory: + sharedChatMessages + .map((m) => (m.role === 'assistant' ? `Assistant: ${m.content}` : m.content)) + .join('\n') || 'No shared chat messages yet.', + agentRecentPosts + } - const userTemplate = this.llmTemplates.user || USER_TEMPLATE - - // Call LLM with structured output const llm = await this.getLLM() const analysis = (await getChatPromptResponse( llm, systemPrompt, - userTemplate, - { - topic: this.conversation.name, - recentTranscript, - retrievedChunks: chunks, - privateMessages: privateMessages.map((m) => m.content).join('\n') || 'No private messages.', - sharedChatHistory: sharedChatMessages.map((m) => m.content).join('\n') || 'No shared chat messages yet.', - agentRecentPosts - }, + resolvedUserTemplate, + templateVars, [], // No chat history - we provide full context in the prompt schema )) as z.infer @@ -205,7 +191,6 @@ export async function detectInterventionOpportunity( analysis.interventionType, recentTranscript ) - if (!isAppropriate) { logger.warn( `Agent ${this.name} intervention rejected by professionalism guardrail. Type: ${analysis.interventionType}` @@ -214,20 +199,83 @@ export async function detectInterventionOpportunity( } } - // Re-check with fresh DB state to handle concurrent agents in a cluster. - // Shrinks the race window from LLM latency (seconds) to milliseconds. + const renderedUserPrompt = Object.entries(templateVars).reduce( + (prompt, [key, value]) => prompt.replace(new RegExp(`\\{${key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\}`, 'g'), value ?? ''), + resolvedUserTemplate + ) + + const result = analysis as InterventionAnalysis + result.context = [`## System Prompt:\n${systemPrompt}`, `## User Prompt:\n${renderedUserPrompt}`].join('\n\n') + + return result +} + +/** + * Detects whether to post a public intervention to the shared chat channel. + * Rate limiting and the DB race guard are both scoped to the shared chat. + * Used by eventMediator and engagementAgent. + */ +export async function detectPublicInterventionOpportunity( + sharedChatHistory: ConversationHistory, + baseSystemPrompt: string, + schema: z.ZodSchema, + privateConversationHistory?: ConversationHistory | null, + userTemplate?: string +): Promise { + const now = sharedChatHistory.end ? sharedChatHistory.end.getTime() : Date.now() + const minInterval = (this.agentConfig?.minInterval || 2) * 60 * 1000 + + const lastIntervention = getRecentAgentInterventions(sharedChatHistory).at(-1) + if (lastIntervention && now - lastIntervention.timestamp.getTime() < minInterval) { + return null + } + + const result = await runInterventionAnalysis.call( + this, + sharedChatHistory, + baseSystemPrompt, + schema, + privateConversationHistory ?? null, + userTemplate + ) + if (!result) return null + const freshRecentIntervention = await Message.findOne({ conversation: this.conversation._id, fromAgent: true, visible: true, - channels: sharedChatChannel, + channels: 'chat', createdAt: { $gte: new Date(now - minInterval) } }) - if (freshRecentIntervention) { logger.info(`Agent ${this.name} dropping intervention: another agent posted during LLM call`) return null } - return analysis as InterventionAnalysis + return result +} + +/** + * Detects whether to send a private check-in to an individual participant's DM channel. + * Rate limiting is scoped to that participant's DM history. No DB race guard — private + * DMs are handled by a single agent per conversation, so concurrent posting isn't a concern. + * Used by checkinHandler. + */ +export async function detectPrivateInterventionOpportunity( + sharedChatHistory: ConversationHistory, + baseSystemPrompt: string, + schema: z.ZodSchema, + allDmHistory: ConversationHistory, + participantDmHistory: ConversationHistory, + userTemplate?: string +): Promise { + const now = sharedChatHistory.end ? sharedChatHistory.end.getTime() : Date.now() + const minInterval = (this.agentConfig?.minInterval || 2) * 60 * 1000 + + const lastIntervention = getRecentAgentInterventions(participantDmHistory).at(-1) + if (lastIntervention && now - lastIntervention.timestamp.getTime() < minInterval) { + return null + } + + return runInterventionAnalysis.call(this, sharedChatHistory, baseSystemPrompt, schema, allDmHistory, userTemplate) } diff --git a/src/agents/helpers/interventionTypes.ts b/src/agents/helpers/interventionTypes.ts index 2d36443c..72b540a6 100644 --- a/src/agents/helpers/interventionTypes.ts +++ b/src/agents/helpers/interventionTypes.ts @@ -24,4 +24,5 @@ export interface InterventionAnalysis { confidenceScore: number detectedPattern?: string affectedUsers?: number + context?: string } diff --git a/src/agents/moderatorNotifier/moderatorNotifier.ts b/src/agents/moderatorNotifier/moderatorNotifier.ts index 3d5f5342..9a3ea084 100644 --- a/src/agents/moderatorNotifier/moderatorNotifier.ts +++ b/src/agents/moderatorNotifier/moderatorNotifier.ts @@ -199,7 +199,10 @@ ${msg.body.insights.map((insight: { value: string }) => `* ${insight.value}`).jo recentTranscript, retrievedChunks: chunks, privateMessages: privateMessages.map((m) => m.content).join('\n') || 'No private messages.', - sharedChatHistory: sharedChatMessages.map((m) => m.content).join('\n') || 'No shared chat messages yet.' + sharedChatHistory: + sharedChatMessages + .map((m) => (m.role === 'assistant' ? `Assistant: ${m.content}` : m.content)) + .join('\n') || 'No shared chat messages yet.' }, [], MODERATOR_SCHEMA diff --git a/tests/agents/engagement/engagement.agent.test.ts b/tests/agents/engagement/engagement.agent.test.ts index 6500fda2..7888d6bf 100644 --- a/tests/agents/engagement/engagement.agent.test.ts +++ b/tests/agents/engagement/engagement.agent.test.ts @@ -97,7 +97,7 @@ describe(`engagement agent tests`, () => { // Speaker just asked a challenging question - room is silent // Should consider PROVOCATION to spark discussion, but might also be PLAY if (responses.length > 0) { - const interventionType = responses[0].context?.match(/Intervention Type: (\w+)/)?.[1] + const { interventionType } = responses[0] console.log(`[02:30] Detected ${interventionType}:`, responses[0].message) expect(['PROVOCATION', 'NONE', 'PLAY']).toContain(interventionType) } @@ -128,7 +128,7 @@ describe(`engagement agent tests`, () => { // Lots of data just presented, but room is passive - should provoke discussion if (responses.length > 0) { - const interventionType = responses[0].context?.match(/Intervention Type: (\w+)/)?.[1] + const { interventionType } = responses[0] console.log(`[07:30] Detected ${interventionType}:`, responses[0].message) expect(['PROVOCATION', 'NONE']).toContain(interventionType) } @@ -184,7 +184,7 @@ describe(`engagement agent tests`, () => { // Active, thoughtful discussion - likely NONE if (responses.length > 0) { - const interventionType = responses[0].context?.match(/Intervention Type: (\w+)/)?.[1] + const { interventionType } = responses[0] console.log(`[08:45] Detected ${interventionType}:`, responses[0].message) } // Don't expect intervention during active healthy discussion @@ -218,7 +218,7 @@ describe(`engagement agent tests`, () => { // Breathing room after emotional peak - good moment for warm PLAY if (responses.length > 0) { - const interventionType = responses[0].context?.match(/Intervention Type: (\w+)/)?.[1] + const { interventionType } = responses[0] console.log(`[06:20] Detected ${interventionType}:`, responses[0].message) expect(responses[0].message).toBeDefined() } @@ -261,7 +261,7 @@ describe(`engagement agent tests`, () => { // Perfect moment for witty PLAY commentary on surprising data if (responses.length > 0) { - const interventionType = responses[0].context?.match(/Intervention Type: (\w+)/)?.[1] + const { interventionType } = responses[0] console.log(`[08:50] Detected ${interventionType}:`, responses[0].message) expect(['PLAY', 'NONE', 'PROVOCATION']).toContain(interventionType) } @@ -292,7 +292,7 @@ describe(`engagement agent tests`, () => { // Should NOT use witty PLAY during emotionally raw moments if (responses.length > 0) { - const interventionType = responses[0].context?.match(/Intervention Type: (\w+)/)?.[1] + const { interventionType } = responses[0] console.log(`[04:35] Detected ${interventionType}:`, responses[0].message) // Should NOT be PLAY - could be NONE or warm PROVOCATION expect(interventionType).not.toBe('PLAY') @@ -323,7 +323,7 @@ describe(`engagement agent tests`, () => { // Transition moments are good for PLAY if (responses.length > 0) { - const interventionType = responses[0].context?.match(/Intervention Type: (\w+)/)?.[1] + const { interventionType } = responses[0] console.log(`[06:30] Detected ${interventionType}:`, responses[0].message) expect(['PLAY', 'NONE', 'PROVOCATION']).toContain(interventionType) } @@ -385,7 +385,7 @@ describe(`engagement agent tests`, () => { // Should be able to process transcript-only without error // May or may not intervene - depends on LLM assessment if (responses.length > 0) { - const interventionType = responses[0].context?.match(/Intervention Type: (\w+)/)?.[1] + const { interventionType } = responses[0] console.log(`[02:35 transcript-only] Detected ${interventionType}:`, responses[0].message) expect(responses[0].message).toBeDefined() } @@ -443,7 +443,7 @@ describe(`engagement agent tests`, () => { // Healthy, thoughtful discussion - likely NONE if (responses.length > 0) { - const interventionType = responses[0].context?.match(/Intervention Type: (\w+)/)?.[1] + const { interventionType } = responses[0] console.log(`[07:00] Optional intervention: ${interventionType}:`, responses[0].message) } // Don't expect intervention when discussion is flowing well @@ -478,7 +478,7 @@ describe(`engagement agent tests`, () => { const responses = await defaultAgentTypes.engagementAgent.respond.call(agent, conversationHistory) if (responses.length > 0) { - const interventionType = responses[0].context?.match(/Intervention Type: (\w+)/)?.[1] + const { interventionType } = responses[0] if (interventionType) { interventionTypes.push(interventionType) console.log(`${label}: ${interventionType}`) diff --git a/tests/agents/eventAssistant/alternateName.agent.test.ts b/tests/agents/eventAssistant/alternateName.agent.test.ts index 137d71ff..c0057e01 100644 --- a/tests/agents/eventAssistant/alternateName.agent.test.ts +++ b/tests/agents/eventAssistant/alternateName.agent.test.ts @@ -83,9 +83,10 @@ describe('alternate name enforcement', () => { expect(response).not.toContain('Jay-Z') }) - it('uses canonical name when asked about the speaker by a misspelling', async () => { + it('does not use misspelling or alternate name when asked about the speaker by a misspelling', async () => { const response = await ask('What did Jonny Zittren say about social contracts?') - expect(response).toContain('Jonathan Zittrain') + expect(response).not.toContain('Jonny') + expect(response).not.toContain('Zittren') expect(response).not.toContain('JZ') }) }) diff --git a/tests/agents/eventAssistant/checkin.agent.test.ts b/tests/agents/eventAssistant/checkin.agent.test.ts new file mode 100644 index 00000000..268e61b0 --- /dev/null +++ b/tests/agents/eventAssistant/checkin.agent.test.ts @@ -0,0 +1,544 @@ +/* eslint-disable no-console */ +import setupAgentTest from '../../utils/setupAgentTest.js' +import defaultAgentTypes from '../../../src/agents/index.js' +import { + createPublicTopic, + createUser, + createDirectMessage, + loadPartTimeWorkTranscript, + loadTestTranscript, + prepareMessagesForAgent, + createConversation +} from '../../utils/agentTestHelpers.js' +import Channel from '../../../src/models/channel.model.js' +import Agent from '../../../src/models/user.model/agent.model/index.js' +import getConversationHistory from '../../../src/agents/helpers/getConversationHistory.js' + +jest.setTimeout(120000) + +const testConfig = setupAgentTest('eventAssistant') + +const testTimeout = 480000 + +// Dense transcript for TRANSCRIPT_HOOK tests — five frameworks delivered quickly +const denseTranscript = `00:00 | Jessica: Let me walk through five frameworks for structuring a smallest viable job. +00:15 | Jessica: First: scope compression — identify every task in the role and ask which ones require full context. +00:30 | Jessica: Second: handoff design — document state at the end of each session so the next person can pick up without overlap. +00:45 | Jessica: Third: async accountability — replace status meetings with written artifacts that can be reviewed at any time. +01:00 | Jessica: Fourth: compensation parity — partial hours should map proportionally to full-time bands, not hourly minimums. +01:15 | Jessica: Fifth: career trajectory — part-time workers need explicit promotion criteria or they plateau.` + +describe('checkin handler tests', () => { + let topic + let user1 + let user2 + let user3 + + const startTime = new Date(Date.now() - 15 * 60 * 1000) + const getTime = (offsetSeconds = 0) => new Date(startTime.getTime() + offsetSeconds * 1000) + + async function createCheckinConversation(users: (typeof user1)[]) { + const conv = await createConversation( + { + name: 'Why your company should consider part-time work', + description: `"No one wants to work anymore." Entrepreneur Jessica Drain believes otherwise.`, + presenters: [{ name: 'Jessica Drain', bio: 'A career marketer and graphic designer.' }] + }, + users[0], + topic, + startTime + ) + + const ag = new Agent({ + agentType: 'eventAssistant', + conversation: conv, + llmPlatform: testConfig.llmPlatform, + llmModel: testConfig.llmModel, + agentConfig: { minInterval: 0 } + }) + + const directChannels = users.map((u) => ({ + name: `direct-agents-${u._id}`, + direct: true, + participants: [u, ag] + })) + + const channels = await Channel.create([{ name: 'transcript' }, { name: 'chat' }, ...directChannels]) + conv.channels.push(...channels) + await ag.save() + conv.agents.push(ag) + await conv.save() + await ag.start() + + return { conversation: conv, agent: ag } + } + + beforeEach(async () => { + user1 = await createUser('Curious Badger') + user2 = await createUser('Thoughtful Fox') + user3 = await createUser('Skeptical Owl') + topic = await createPublicTopic() + }) + + // Call the periodic checkin path (no userMessage). + // Direct channel names must be passed explicitly to getConversationHistory so DM messages + // are included in the history — directMessages: true alone is not sufficient. + async function runCheckin(ag, endTime = new Date()) { + const directChannelNames = ag.conversation.channels.filter((c) => c.direct).map((c) => c.name) + const conversationHistory = getConversationHistory( + ag.conversation.messages, + { count: 100, directMessages: true, endTime }, + null, + directChannelNames + ) + return defaultAgentTypes.eventAssistant.respond.call(ag, conversationHistory, null) + } + + function findCheckinForUser(responses, userId) { + return responses.find((r) => r.channels?.[0]?.name === `direct-agents-${userId}`) + } + + // Creates a simulated agent Q&A response in the DM channel. + // Needed to reflect real DM history: the eventAssistant always responds to each message, + // so the checkin handler sees interleaved participant + agent turns. + function createAgentDirectMessage(text: string, ag, userId, conv, createdAt = new Date()) { + return { + body: text, + bodyType: 'text', + conversation: conv._id, + pseudonym: ag.name, + pseudonymId: ag._id, + owner: ag._id, + channels: [`direct-agents-${userId}`], + fromAgent: true, + pause: 0, + visible: true, + createdAt, + updatedAt: createdAt, + upVotes: [], + downVotes: [] + } + } + + describe('SOCIAL_REASSURANCE', () => { + it( + 'does not send for a single hesitant message — Q&A response handles it', + async () => { + const { agent: ag } = await createCheckinConversation([user1]) + await loadPartTimeWorkTranscript(ag.conversation, true) + + const msg = await createDirectMessage( + "I'm probably wrong about this but I don't think part-time work would work in my industry at all", + user1, + ag.conversation, + getTime(120) + ) + const reply = createAgentDirectMessage( + "That's a fair concern — some industries do face structural barriers. The smallest viable job model works best where tasks can be clearly scoped. What industry are you in? Happy to think through it with you.", + ag, + user1._id, + ag.conversation, + getTime(130) + ) + + await prepareMessagesForAgent([msg, reply], ag.conversation, ag) + const responses = await runCheckin(ag) + + const checkin = findCheckinForUser(responses, user1._id) + if (checkin) { + console.log( + `Unexpected checkin for single message (type: ${JSON.stringify(checkin.message)}); LLM should have chosen NONE` + ) + } + expect(checkin).toBeUndefined() + }, + testTimeout + ) + + it( + 'sends for a pattern of self-minimization across multiple messages', + async () => { + const { agent: ag } = await createCheckinConversation([user1]) + await loadPartTimeWorkTranscript(ag.conversation, true) + + const msg1 = await createDirectMessage( + 'Sorry if this is obvious, but how do you handle handoffs in a part-time model?', + user1, + ag.conversation, + getTime(60) + ) + const reply1 = createAgentDirectMessage( + 'Not obvious at all — handoff design is one of the trickier parts. Jessica recommends documenting session state so the next person can pick up without overlap. Async artifacts (written notes, shared logs) tend to work better than verbal briefings.', + ag, + user1._id, + ag.conversation, + getTime(75) + ) + const msg2 = await createDirectMessage( + "I'm probably not thinking about this right, but wouldn't async accountability just mean things fall through the cracks?", + user1, + ag.conversation, + getTime(180) + ) + const reply2 = createAgentDirectMessage( + "That's a real tension. The argument is that written artifacts create a visible record — gaps become obvious rather than hidden. But it does require discipline to maintain. What kind of work are you imagining this for?", + ag, + user1._id, + ag.conversation, + getTime(195) + ) + const msg3 = await createDirectMessage( + 'Not sure this is worth asking, but does compensation parity actually work in practice? I might be missing something.', + user1, + ag.conversation, + getTime(300) + ) + const reply3 = createAgentDirectMessage( + "Worth asking. Jessica's approach is to pay proportionally to full-time bands rather than hourly minimums — so a 20hr/week role pays at the same rate per hour as the equivalent full-time role. Some companies balk at this, but she argues it's what attracts high-caliber part-time candidates.", + ag, + user1._id, + ag.conversation, + getTime(315) + ) + + await prepareMessagesForAgent([msg1, reply1, msg2, reply2, msg3, reply3], ag.conversation, ag) + const responses = await runCheckin(ag) + + console.log('Self-minimization pattern responses:', JSON.stringify(responses, null, 2)) + const checkin = findCheckinForUser(responses, user1._id) + expect(checkin).toBeDefined() + expect(checkin.message.type).toBe('checkin') + expect(checkin.message.text.length).toBeGreaterThan(10) + console.log(`SOCIAL_REASSURANCE self-minimization: ${checkin.message.text}`) + }, + testTimeout + ) + + it( + 'sends for a participant who repeatedly questions whether their reaction is valid', + async () => { + const { agent: ag } = await createCheckinConversation([user1]) + await loadPartTimeWorkTranscript(ag.conversation, true) + + const msg1 = await createDirectMessage( + 'Is it just me or does the career trajectory piece feel really underdeveloped?', + user1, + ag.conversation, + getTime(60) + ) + const reply1 = createAgentDirectMessage( + "You're not alone in noticing that — Jessica spends most of the talk on the hiring and compensation side. Her main point on trajectory is that part-time workers need explicit promotion criteria rather than being left to plateau. It's a real gap in how most companies think about it.", + ag, + user1._id, + ag.conversation, + getTime(75) + ) + const msg2 = await createDirectMessage( + 'Am I the only one who finds the compensation parity argument a bit idealistic?', + user1, + ag.conversation, + getTime(180) + ) + const reply2 = createAgentDirectMessage( + "Not an unreasonable reaction. The counterargument is that paying proportionally to full-time rates is what makes the model work — it attracts people who otherwise can't access the labor market at all. Whether that's achievable depends a lot on the business model and margins.", + ag, + user1._id, + ag.conversation, + getTime(195) + ) + const msg3 = await createDirectMessage( + "Does anyone else feel like this only works in certain industries? Maybe I'm just too skeptical.", + user1, + ag.conversation, + getTime(300) + ) + const reply3 = createAgentDirectMessage( + "Jessica acknowledges that herself — she says some industries and job types won't be able to accommodate it, or it will be harder. The challenge she's putting out is to at least question the 40-hour default, even if you can't change it wholesale.", + ag, + user1._id, + ag.conversation, + getTime(315) + ) + + await prepareMessagesForAgent([msg1, reply1, msg2, reply2, msg3, reply3], ag.conversation, ag) + const responses = await runCheckin(ag) + + console.log('Isolation-checking pattern responses:', JSON.stringify(responses, null, 2)) + const checkin = findCheckinForUser(responses, user1._id) + expect(checkin).toBeDefined() + expect(checkin.message.type).toBe('checkin') + expect(checkin.message.text.length).toBeGreaterThan(10) + console.log(`SOCIAL_REASSURANCE isolation-checking: ${checkin.message.text}`) + }, + testTimeout + ) + + it( + 'sends for a participant who pulls back and hedges more after a point of friction', + async () => { + const { agent: ag } = await createCheckinConversation([user1]) + await loadPartTimeWorkTranscript(ag.conversation, true) + + const msg1 = await createDirectMessage( + "I actually disagree — I don't think scope compression works without strong management buy-in.", + user1, + ag.conversation, + getTime(60) + ) + const reply1 = createAgentDirectMessage( + "That's a fair challenge. Scope compression as Jessica describes it is more of a design exercise than a management mandate — but you're right that without buy-in the resulting roles might never get approved or staffed. Have you seen it attempted without that support?", + ag, + user1._id, + ag.conversation, + getTime(75) + ) + const msg2 = await createDirectMessage( + "Well... maybe I'm wrong about that. I guess it depends on the team.", + user1, + ag.conversation, + getTime(180) + ) + const reply2 = createAgentDirectMessage( + 'No, your instinct seems sound. Management buy-in is probably a prerequisite for most of this to stick — the compensation parity piece especially.', + ag, + user1._id, + ag.conversation, + getTime(195) + ) + const msg3 = await createDirectMessage( + 'Never mind, probably not worth getting into. I might just be thinking about my specific situation.', + user1, + ag.conversation, + getTime(300) + ) + const reply3 = createAgentDirectMessage( + "It's worth getting into — your situation is probably exactly what this talk is about. What's the context, if you don't mind sharing?", + ag, + user1._id, + ag.conversation, + getTime(315) + ) + + await prepareMessagesForAgent([msg1, reply1, msg2, reply2, msg3, reply3], ag.conversation, ag) + const responses = await runCheckin(ag) + + console.log('Pull-back after friction responses:', JSON.stringify(responses, null, 2)) + const checkin = findCheckinForUser(responses, user1._id) + expect(checkin).toBeDefined() + expect(checkin.message.type).toBe('checkin') + expect(checkin.message.text.length).toBeGreaterThan(10) + console.log(`SOCIAL_REASSURANCE friction pull-back: ${checkin.message.text}`) + }, + testTimeout + ) + }) + + describe('NOT_ALONE', () => { + it( + 'sends to each participant when multiple privately share the same doubt', + async () => { + const { agent: ag } = await createCheckinConversation([user1, user2, user3]) + await loadPartTimeWorkTranscript(ag.conversation, true) + + // All three participants privately expressing the same skepticism — single message each + const msg1 = await createDirectMessage( + "I'm not sure this would fly with senior leadership — they're really attached to the 40-hour model", + user1, + ag.conversation, + getTime(100) + ) + const msg2 = await createDirectMessage( + 'Honestly skeptical. Our HR team would push back hard on this. Am I being too pessimistic?', + user2, + ag.conversation, + getTime(110) + ) + const msg3 = await createDirectMessage( + 'I wonder if I disagree with some of this — feels idealistic for where we are', + user3, + ag.conversation, + getTime(120) + ) + + await prepareMessagesForAgent([msg1, msg2, msg3], ag.conversation, ag) + const responses = await runCheckin(ag) + + console.log('NOT_ALONE responses:', JSON.stringify(responses, null, 2)) + expect(responses.length).toBeGreaterThan(0) + + responses.forEach((r) => { + expect(r.message.type).toBe('checkin') + expect(r.message.text.length).toBeGreaterThan(10) + // Must not reveal any other participant's specific words or identity + expect(r.message.text.toLowerCase()).not.toContain('hr team') + expect(r.message.text.toLowerCase()).not.toContain('senior leadership') + expect(r.message.text.toLowerCase()).not.toContain('idealistic') + console.log(`NOT_ALONE → ${r.channels[0].name}: ${r.message.text}`) + }) + }, + testTimeout + ) + }) + + describe('INTEREST_BRIDGE', () => { + it( + 'surfaces shared interest when multiple participants ask about the same topic', + async () => { + const { agent: ag } = await createCheckinConversation([user1, user2, user3]) + await loadPartTimeWorkTranscript(ag.conversation, true) + + const msg1 = await createDirectMessage( + 'How does compensation work for part-time roles?', + user1, + ag.conversation, + getTime(100) + ) + const msg2 = await createDirectMessage( + 'What about salary bands for part-time employees?', + user2, + ag.conversation, + getTime(110) + ) + const msg3 = await createDirectMessage( + 'Are part-time workers paid proportionally to full-time?', + user3, + ag.conversation, + getTime(120) + ) + + await prepareMessagesForAgent([msg1, msg2, msg3], ag.conversation, ag) + const responses = await runCheckin(ag) + + console.log('INTEREST_BRIDGE responses:', JSON.stringify(responses, null, 2)) + expect(responses.length).toBeGreaterThan(0) + responses.forEach((r) => { + expect(r.message.type).toBe('checkin') + expect(r.message.text.length).toBeGreaterThan(10) + console.log(`INTEREST_BRIDGE → ${r.channels[0].name}: ${r.message.text}`) + }) + }, + testTimeout + ) + }) + + describe('TRANSCRIPT_HOOK', () => { + it( + 'reaches out to a silent participant after a dense transcript section', + async () => { + const { agent: ag } = await createCheckinConversation([user1]) + await loadTestTranscript(ag.conversation, denseTranscript, true) + + // user1 has sent no DMs — they are silent. + // endTime is set to startTime + 2min so the 3-min (180s) lookback window covers the + // transcript, which is only ~75s long and loaded at the very start of the conversation. + await prepareMessagesForAgent([], ag.conversation, ag) + const responses = await runCheckin(ag, getTime(2 * 60)) + + console.log('TRANSCRIPT_HOOK responses:', JSON.stringify(responses, null, 2)) + + if (responses.length > 0) { + const checkin = findCheckinForUser(responses, user1._id) + expect(checkin).toBeDefined() + expect(checkin.message.type).toBe('checkin') + expect(checkin.message.text.length).toBeGreaterThan(10) + // Must name a specific topic — not a generic opener + expect(checkin.message.text.toLowerCase()).not.toMatch(/^(things|a lot|so much) moved fast/) + console.log(`TRANSCRIPT_HOOK message: ${checkin.message.text}`) + } else { + // LLM may legitimately decide silence is right — log and accept + console.log('TRANSCRIPT_HOOK: no checkin sent (LLM chose silence)') + } + }, + testTimeout + ) + + it( + 'does not reach out when transcript is sparse', + async () => { + const { agent: ag } = await createCheckinConversation([user1]) + await loadTestTranscript( + ag.conversation, + `00:00 | Jessica: Welcome everyone. +00:30 | Jessica: We'll get started in a moment.`, + true + ) + + await prepareMessagesForAgent([], ag.conversation, ag) + const responses = await runCheckin(ag, getTime(2 * 60)) + + expect(responses).toHaveLength(0) + }, + testTimeout + ) + }) + + describe('rate limiting', () => { + it( + 'does not send a checkin when one was already sent recently', + async () => { + const { agent: ag } = await createCheckinConversation([user1]) + // Use default minInterval (10 min) — override the minInterval: 0 set in createCheckinConversation + ag.agentConfig = { ...ag.agentConfig, minInterval: 10 } + await loadPartTimeWorkTranscript(ag.conversation, true) + + // 3 participant messages make SOCIAL_REASSURANCE eligible + const msg1 = await createDirectMessage( + 'Sorry if obvious, but how do handoffs work?', + user1, + ag.conversation, + getTime(60) + ) + const reply1 = createAgentDirectMessage( + 'Handoff design is about documenting session state.', + ag, + user1._id, + ag.conversation, + getTime(75) + ) + const msg2 = await createDirectMessage( + "I'm probably wrong but wouldn't async accountability cause gaps?", + user1, + ag.conversation, + getTime(180) + ) + const reply2 = createAgentDirectMessage( + 'Written artifacts make gaps visible rather than hidden.', + ag, + user1._id, + ag.conversation, + getTime(195) + ) + const msg3 = await createDirectMessage( + 'Not sure this is worth asking, but does compensation parity work?', + user1, + ag.conversation, + getTime(300) + ) + const reply3 = createAgentDirectMessage( + 'Worth asking — she pays proportionally to full-time rates.', + ag, + user1._id, + ag.conversation, + getTime(315) + ) + + // Pre-seed a recent checkin already sent — rate limit should suppress another + const recentCheckin = createAgentDirectMessage( + "Nothing here requires certainty. Uncertainty usually means you're paying close attention.", + ag, + user1._id, + ag.conversation, + getTime(320) + ) + + await prepareMessagesForAgent([msg1, reply1, msg2, reply2, msg3, reply3, recentCheckin], ag.conversation, ag) + // endTime is 80s after the pre-seeded checkin — well within the 10-min minInterval + const responses = await runCheckin(ag, getTime(400)) + + const checkin = findCheckinForUser(responses, user1._id) + expect(checkin).toBeUndefined() + }, + testTimeout + ) + }) +}) diff --git a/tests/agents/eventMediator/eventMediator.agent.test.ts b/tests/agents/eventMediator/eventMediator.agent.test.ts index 3fcfc4d0..ea00c03f 100644 --- a/tests/agents/eventMediator/eventMediator.agent.test.ts +++ b/tests/agents/eventMediator/eventMediator.agent.test.ts @@ -356,7 +356,6 @@ describe(`event mediator agent tests`, () => { testTimeout ) }) - }) describe('privacy protection', () => { @@ -393,7 +392,7 @@ describe(`event mediator agent tests`, () => { }) describe('intervention context', () => { - it('includes intervention type and reasoning in context', async () => { + it('includes intervention type and reasoning in response', async () => { const messages = [ // Active chat await createMessage('Really useful information', user1, conversation, ['chat'], getMessageTime(100)), @@ -419,14 +418,12 @@ describe(`event mediator agent tests`, () => { const responses = await defaultAgentTypes.eventMediator.respond.call(agent, conversationHistory) if (responses.length > 0) { - expect(responses[0].context).toBeDefined() - expect(responses[0].context).toContain('Intervention Type:') - expect(responses[0].context).toContain('Reasoning:') + expect(responses[0].interventionType).toBeDefined() + expect(responses[0].reasoning).toBeDefined() // Verify it's a valid intervention type const validTypes = Object.values(InterventionType) - const hasValidType = validTypes.some((type) => responses[0].context.includes(type)) - expect(hasValidType).toBe(true) + expect(validTypes).toContain(responses[0].interventionType) } }) }) @@ -575,7 +572,7 @@ describe(`event mediator agent tests`, () => { // If it intervenes, verify it follows rules if (responses.length > 0) { - console.log(`Detected ${responses[0].context?.match(/Intervention Type: (\w+)/)?.[1]}:`, responses[0].message) + console.log(`Detected ${responses[0].interventionType}:`, responses[0].message) const { message } = responses[0] // Should surface the pattern without quoting individuals expect(message).not.toContain(user1.pseudonyms[0].pseudonym) @@ -653,7 +650,7 @@ describe(`event mediator agent tests`, () => { // Assert intervention occurred - accept SYNTHESIS, SIGNAL, or MINORITY_VOICE // (all are valid for surfacing private concerns that diverge from public enthusiasm) expect(responses.length).toBeGreaterThan(0) - const interventionType = responses[0].context?.match(/Intervention Type: (\w+)/)?.[1] + const { interventionType } = responses[0] expect(['SYNTHESIS', 'SIGNAL', 'MINORITY_VOICE']).toContain(interventionType) console.log(`Detected ${interventionType}:`, responses[0].message) @@ -739,13 +736,11 @@ describe(`event mediator agent tests`, () => { // For MINORITY_VOICE, we accept it may also be detected as SIGNAL - both valid if (responses.length > 0) { - const isMinorityOrSignal = responses[0].context?.includes('MINORITY') || responses[0].context?.includes('SIGNAL') + const isMinorityOrSignal = + responses[0].interventionType === 'MINORITY_VOICE' || responses[0].interventionType === 'SIGNAL' expect(isMinorityOrSignal).toBe(true) - console.log( - `Detected ${responses[0].context?.includes('MINORITY') ? 'MINORITY_VOICE' : 'SIGNAL'}:`, - responses[0].message - ) + console.log(`Detected ${responses[0].interventionType}:`, responses[0].message) const { message } = responses[0] // Should create space without identifying dissenters expect(message).not.toContain(user3.pseudonyms[0].pseudonym) @@ -785,7 +780,7 @@ describe(`event mediator agent tests`, () => { const responses = await defaultAgentTypes.eventMediator.respond.call(agent, conversationHistory) - if (responses.length > 0 && responses[0].context?.includes('CONFUSION')) { + if (responses.length > 0 && responses[0].interventionType === 'CONFUSION') { console.log('Detected CONFUSION:', responses[0].message) const { message } = responses[0] // Should help clarify without exposing who was confused @@ -864,7 +859,7 @@ describe(`event mediator agent tests`, () => { // BRIDGE is context-dependent - may be SIGNAL or other type if (responses.length > 0) { - console.log(`Detected ${responses[0].context?.match(/Intervention Type: (\w+)/)?.[1]}:`, responses[0].message) + console.log(`Detected ${responses[0].interventionType}:`, responses[0].message) expect(responses[0].message).toBeDefined() } }, @@ -916,7 +911,7 @@ describe(`event mediator agent tests`, () => { const responses = await defaultAgentTypes.eventMediator.respond.call(agent, conversationHistory) - if (responses.length > 0 && responses[0].context?.includes('STRUCTURE')) { + if (responses.length > 0 && responses[0].interventionType === 'STRUCTURE') { console.log('Detected STRUCTURE:', responses[0].message) const { message } = responses[0] // Should provide orientation or structure @@ -1211,7 +1206,7 @@ describe(`event mediator agent tests`, () => { // Should detect this as a good question to surface expect(responses.length).toBeGreaterThan(0) console.log('Mediator response about healthcare:', responses[0].message) - expect(responses[0].context).toContain('SIGNAL') + expect(responses[0].interventionType).toBe('SIGNAL') // Should not expose individual questions verbatim expect(responses[0].message).not.toContain('How do you handle health insurance for part-time workers') }, @@ -1304,7 +1299,7 @@ describe(`event mediator agent tests`, () => { const responses = await defaultAgentTypes.eventMediator.respond.call(agent, conversationHistory) - if (responses.length > 0 && responses[0].context?.includes('CONFUSION')) { + if (responses.length > 0 && responses[0].interventionType === 'CONFUSION') { console.log('Detected CONFUSION about terminology:', responses[0].message) // Should help clarify without exposing who was confused expect(responses[0].message).not.toContain(user1.pseudonyms[0].pseudonym) @@ -1357,7 +1352,7 @@ describe(`event mediator agent tests`, () => { // BRIDGE interventions are contextual - may or may not occur if (responses.length > 0) { - console.log(`Detected ${responses[0].context?.match(/Intervention Type: (\w+)/)?.[1]}:`, responses[0].message) + console.log(`Detected ${responses[0].interventionType}:`, responses[0].message) expect(responses[0].message).toBeDefined() } }, diff --git a/tests/integration/user.test.ts b/tests/integration/user.test.ts index 0066fc49..3eec38ee 100644 --- a/tests/integration/user.test.ts +++ b/tests/integration/user.test.ts @@ -25,6 +25,7 @@ const createVote = () => ({ owner: new mongoose.Types.ObjectId() }) +jest.setTimeout(10000) setupIntTest() describe('User routes', () => { diff --git a/tests/utils/agentTestHelpers.ts b/tests/utils/agentTestHelpers.ts index c2d7258b..58245139 100644 --- a/tests/utils/agentTestHelpers.ts +++ b/tests/utils/agentTestHelpers.ts @@ -897,7 +897,8 @@ export async function createUser(pseudonym) { pseudonym, active: 'true' } - ] + ], + preferences: { visualResponse: false, jargonClarification: false } } await insertUsers([user]) return user diff --git a/tests/utils/setupAgentTest.ts b/tests/utils/setupAgentTest.ts index fe7524ca..858d57d0 100644 --- a/tests/utils/setupAgentTest.ts +++ b/tests/utils/setupAgentTest.ts @@ -10,7 +10,7 @@ const setupAgentTest = (agentType?) => { let chromaCollectionPrefix beforeAll(async () => { chromaCollectionPrefix = config.chroma.embeddingsCollectionPrefix - config.chroma.embeddingsCollectionPrefix = 'llm-engine-test' + config.chroma.embeddingsCollectionPrefix = `llm-engine-test-w${process.env.JEST_WORKER_ID ?? '0'}` await initializeEvaluators() })