diff --git a/apps/sim/app/api/knowledge/[id]/documents/route.ts b/apps/sim/app/api/knowledge/[id]/documents/route.ts index d9f5beb7e30..82cd8f6bfa4 100644 --- a/apps/sim/app/api/knowledge/[id]/documents/route.ts +++ b/apps/sim/app/api/knowledge/[id]/documents/route.ts @@ -8,6 +8,7 @@ import { bulkKnowledgeDocumentsContract, createKnowledgeDocumentsContract, listKnowledgeDocumentsQuerySchema, + parseDocumentTagFiltersParam, } from '@/lib/api/contracts/knowledge' import { parseRequest } from '@/lib/api/server' import { getSession } from '@/lib/auth' @@ -67,6 +68,18 @@ export const GET = withRouteHandler( const { enabledFilter, search, limit, offset, sortBy, sortOrder, tagFilters } = queryResult.data + let parsedTagFilters: TagFilterCondition[] | undefined + try { + parsedTagFilters = parseDocumentTagFiltersParam(tagFilters) as + | TagFilterCondition[] + | undefined + } catch { + return NextResponse.json( + { error: 'tagFilters must be a valid JSON array' }, + { status: 400 } + ) + } + const result = await getDocuments( knowledgeBaseId, { @@ -76,7 +89,7 @@ export const GET = withRouteHandler( offset, ...(sortBy && { sortBy }), ...(sortOrder && { sortOrder }), - tagFilters: tagFilters as TagFilterCondition[] | undefined, + tagFilters: parsedTagFilters, }, requestId ) diff --git a/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/base.tsx b/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/base.tsx index 2f7bfa3b140..f43dfa29181 100644 --- a/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/base.tsx +++ b/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/base.tsx @@ -264,13 +264,20 @@ export function KnowledgeBase({ const activeTagFilters: DocumentTagFilter[] = useMemo( () => tagFilterEntries - .filter((f) => f.tagSlot && f.value.trim()) + .filter((f) => { + if (!f.tagSlot || !f.value.trim()) return false + // A `between` filter only applies once both bounds are set. Sending it + // with just the lower bound would be rejected at the API boundary and + // break the whole list while the user is still entering the range. + if (f.operator === 'between' && !f.valueTo.trim()) return false + return true + }) .map((f) => ({ tagSlot: f.tagSlot, fieldType: f.fieldType, operator: f.operator, value: f.value, - ...(f.operator === 'between' && f.valueTo ? { valueTo: f.valueTo } : {}), + ...(f.operator === 'between' ? { valueTo: f.valueTo } : {}), })), [tagFilterEntries] ) @@ -1466,11 +1473,25 @@ const createEmptyEntry = (): TagFilterEntry => ({ tagName: '', tagSlot: '', fieldType: 'text', - operator: 'eq', + operator: 'contains', value: '', valueTo: '', }) +/** + * Default operator when a tag is selected. Text filters default to `contains` + * so typing part of a value finds matches (exact `equals` stays one click away + * in the operator dropdown); other field types keep their first, equality + * operator. + */ +function getDefaultOperatorForFieldType( + fieldType: FilterFieldType, + operators: ReturnType +): string { + if (fieldType === 'text') return 'contains' + return operators[0]?.value ?? 'eq' +} + interface TagFilterSectionProps { tagDefinitions: TagDefinition[] entries: TagFilterEntry[] @@ -1601,7 +1622,7 @@ function TagFilterSection({ tagDefinitions, entries, onChange }: TagFilterSectio tagName, tagSlot: def?.tagSlot || '', fieldType, - operator: operators[0]?.value || 'eq', + operator: getDefaultOperatorForFieldType(fieldType, operators), value: '', valueTo: '', }) diff --git a/apps/sim/lib/api/client/request.test.ts b/apps/sim/lib/api/client/request.test.ts new file mode 100644 index 00000000000..e28ed0837b2 --- /dev/null +++ b/apps/sim/lib/api/client/request.test.ts @@ -0,0 +1,71 @@ +/** + * @vitest-environment node + */ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { z } from 'zod' +import { requestJson } from '@/lib/api/client/request' +import { listKnowledgeDocumentsContract } from '@/lib/api/contracts/knowledge' +import { defineRouteContract } from '@/lib/api/contracts/types' + +/** + * Captures the URL of the last fetch call and returns a valid JSON response so + * `requestJson`'s response validation passes. + */ +function mockFetchReturning(body: unknown) { + const fetchMock = vi.fn( + async () => + new Response(JSON.stringify(body), { + status: 200, + headers: { 'content-type': 'application/json' }, + }) + ) + vi.stubGlobal('fetch', fetchMock) + return fetchMock +} + +describe('requestJson query serialization', () => { + afterEach(() => { + vi.unstubAllGlobals() + }) + + it('serializes a JSON-string query param verbatim (regression: tagFilters)', async () => { + const fetchMock = mockFetchReturning({ + success: true, + data: { + documents: [], + pagination: { total: 0, limit: 50, offset: 0, hasMore: false }, + }, + }) + + const tagFilters = JSON.stringify([ + { tagSlot: 'tag1', fieldType: 'text', operator: 'contains', value: 'Ada Lovelace' }, + ]) + + await requestJson(listKnowledgeDocumentsContract, { + params: { id: 'kb-1' }, + query: { tagFilters }, + }) + + const calledUrl = String(fetchMock.mock.calls[0][0]) + const url = new URL(calledUrl, 'https://example.test') + // The param must round-trip as the exact JSON, never "[object Object]". + expect(url.searchParams.get('tagFilters')).toBe(tagFilters) + expect(calledUrl).not.toContain('object+Object') + expect(calledUrl).not.toContain('[object Object]') + }) + + it('throws instead of silently corrupting an array-of-objects query param', async () => { + mockFetchReturning({ ok: true }) + + const badContract = defineRouteContract({ + method: 'GET', + path: '/api/test', + query: z.object({ items: z.array(z.object({ a: z.string() })) }), + response: { mode: 'json', schema: z.object({ ok: z.boolean() }) }, + }) + + await expect(requestJson(badContract, { query: { items: [{ a: 'x' }] } })).rejects.toThrow( + /arrays of objects are not URL-safe/ + ) + }) +}) diff --git a/apps/sim/lib/api/client/request.ts b/apps/sim/lib/api/client/request.ts index d0e8255d2f5..85a7d2ac7fb 100644 --- a/apps/sim/lib/api/client/request.ts +++ b/apps/sim/lib/api/client/request.ts @@ -68,9 +68,19 @@ function appendQuery(path: string, query: unknown): string { if (Array.isArray(value)) { for (const item of value) { - if (item !== undefined && item !== null && item !== '') { - searchParams.append(key, String(item)) + if (item === undefined || item === null || item === '') continue + // A non-scalar in a query array would stringify to "[object Object]" and + // silently corrupt the request. Encode such values as a single JSON + // string param and decode them server-side instead. Failing loudly here + // keeps the boundary honest (this is how the knowledge tagFilters bug + // shipped undetected). + if (typeof item === 'object') { + throw new Error( + `Cannot serialize query param "${key}": arrays of objects are not URL-safe — ` + + 'encode the value as a JSON string param and decode it server-side.' + ) } + searchParams.append(key, String(item)) } continue } diff --git a/apps/sim/lib/api/contracts/knowledge/documents.test.ts b/apps/sim/lib/api/contracts/knowledge/documents.test.ts new file mode 100644 index 00000000000..f39274830ce --- /dev/null +++ b/apps/sim/lib/api/contracts/knowledge/documents.test.ts @@ -0,0 +1,145 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { + listKnowledgeDocumentsQuerySchema, + parseDocumentTagFiltersParam, +} from '@/lib/api/contracts/knowledge/documents' + +describe('listKnowledgeDocumentsQuerySchema.tagFilters', () => { + it('keeps tagFilters a raw string (must NOT transform to an array)', () => { + // A transform-to-array here breaks requestJson outbound serialization + // (the array serializes as "[object Object]"). The wire type must stay a + // string; decoding happens server-side via parseDocumentTagFiltersParam. + const tagFilters = JSON.stringify([ + { tagSlot: 'tag1', fieldType: 'text', operator: 'contains', value: 'x' }, + ]) + const parsed = listKnowledgeDocumentsQuerySchema.parse({ tagFilters }) + expect(parsed.tagFilters).toBe(tagFilters) + expect(typeof parsed.tagFilters).toBe('string') + }) +}) + +describe('parseDocumentTagFiltersParam', () => { + it('returns undefined for an absent param', () => { + expect(parseDocumentTagFiltersParam(undefined)).toBeUndefined() + expect(parseDocumentTagFiltersParam('')).toBeUndefined() + }) + + it('decodes a valid JSON array of filters', () => { + const filters = [ + { tagSlot: 'tag1', fieldType: 'text', operator: 'contains', value: 'x' }, + { tagSlot: 'date1', fieldType: 'date', operator: 'eq', value: '2026-04-21' }, + ] + expect(parseDocumentTagFiltersParam(JSON.stringify(filters))).toEqual(filters) + }) + + it('throws on malformed JSON', () => { + expect(() => parseDocumentTagFiltersParam('[object Object]')).toThrow() + expect(() => parseDocumentTagFiltersParam('{not json')).toThrow() + }) + + it('throws when the shape is wrong', () => { + expect(() => parseDocumentTagFiltersParam(JSON.stringify([{ tagSlot: '' }]))).toThrow() + }) + + it('rejects an operator that is not valid for the field type', () => { + // unknown operator + expect(() => + parseDocumentTagFiltersParam( + JSON.stringify([{ tagSlot: 'tag1', fieldType: 'text', operator: 'bogus', value: 'x' }]) + ) + ).toThrow() + // valid operator name, wrong field type (contains is text-only) + expect(() => + parseDocumentTagFiltersParam( + JSON.stringify([ + { tagSlot: 'number1', fieldType: 'number', operator: 'contains', value: '1' }, + ]) + ) + ).toThrow() + }) + + it('rejects a fieldType that does not match the tag slot', () => { + // number1 is a numeric column; claiming it is text must fail + expect(() => + parseDocumentTagFiltersParam( + JSON.stringify([ + { tagSlot: 'number1', fieldType: 'text', operator: 'contains', value: 'x' }, + ]) + ) + ).toThrow() + }) + + it('rejects an unknown tag slot', () => { + expect(() => + parseDocumentTagFiltersParam( + JSON.stringify([{ tagSlot: 'tag99', fieldType: 'text', operator: 'eq', value: 'x' }]) + ) + ).toThrow() + }) + + it('rejects values that are unusable for the field type', () => { + // non-numeric value on a number field + expect(() => + parseDocumentTagFiltersParam( + JSON.stringify([{ tagSlot: 'number1', fieldType: 'number', operator: 'eq', value: 'abc' }]) + ) + ).toThrow() + // non-date value on a date field + expect(() => + parseDocumentTagFiltersParam( + JSON.stringify([{ tagSlot: 'date1', fieldType: 'date', operator: 'eq', value: 'nope' }]) + ) + ).toThrow() + // well-formed but impossible calendar dates (would 500 on ::date) + for (const value of ['2026-02-30', '2026-99-99', '2026-13-01', '2026-00-10']) { + expect(() => + parseDocumentTagFiltersParam( + JSON.stringify([{ tagSlot: 'date1', fieldType: 'date', operator: 'eq', value }]) + ) + ).toThrow() + } + // non-boolean value on a boolean field + expect(() => + parseDocumentTagFiltersParam( + JSON.stringify([ + { tagSlot: 'boolean1', fieldType: 'boolean', operator: 'eq', value: 'maybe' }, + ]) + ) + ).toThrow() + }) + + it('rejects a between filter missing a usable upper bound', () => { + expect(() => + parseDocumentTagFiltersParam( + JSON.stringify([ + { + tagSlot: 'number1', + fieldType: 'number', + operator: 'between', + value: '1', + valueTo: 'x', + }, + ]) + ) + ).toThrow() + }) + + it('accepts a valid number, date, boolean, and between filter', () => { + const filters = [ + { tagSlot: 'number1', fieldType: 'number', operator: 'gte', value: '42' }, + { tagSlot: 'date1', fieldType: 'date', operator: 'eq', value: '2026-04-21' }, + { tagSlot: 'boolean1', fieldType: 'boolean', operator: 'eq', value: 'true' }, + { + tagSlot: 'number2', + fieldType: 'number', + operator: 'between', + value: '1', + valueTo: '10', + }, + ] + expect(parseDocumentTagFiltersParam(JSON.stringify(filters))).toEqual(filters) + }) +}) diff --git a/apps/sim/lib/api/contracts/knowledge/documents.ts b/apps/sim/lib/api/contracts/knowledge/documents.ts index 6a85005f700..bca7425f89e 100644 --- a/apps/sim/lib/api/contracts/knowledge/documents.ts +++ b/apps/sim/lib/api/contracts/knowledge/documents.ts @@ -13,14 +13,66 @@ import { wireDateSchema, } from '@/lib/api/contracts/knowledge/shared' import { defineRouteContract } from '@/lib/api/contracts/types' +import { getFieldTypeForSlot } from '@/lib/knowledge/constants' +import { getOperatorsForFieldType, isValidFilterValue } from '@/lib/knowledge/filters/types' -export const documentTagFilterSchema = z.object({ - tagSlot: z.string().min(1), - fieldType: z.enum(['text', 'number', 'date', 'boolean']), - operator: z.string().min(1), - value: z.unknown(), - valueTo: z.unknown().optional(), -}) +export const documentTagFilterSchema = z + .object({ + tagSlot: z.string().min(1), + fieldType: z.enum(['text', 'number', 'date', 'boolean']), + operator: z.string().min(1), + value: z.unknown(), + valueTo: z.unknown().optional(), + }) + .superRefine((filter, ctx) => { + // The tag slot determines the column type, so validate against the slot + // (the source of truth) — not just the client-supplied fieldType. Rejecting + // unknown slots, type mismatches, and bad operators at the boundary returns + // a 400 instead of the query builder silently dropping or mis-handling the + // filter (e.g. a text `contains` aimed at a numeric column). + const slotFieldType = getFieldTypeForSlot(filter.tagSlot) + if (slotFieldType === null) { + ctx.addIssue({ + code: 'custom', + path: ['tagSlot'], + message: `Unknown tag slot "${filter.tagSlot}"`, + }) + return + } + if (slotFieldType !== filter.fieldType) { + ctx.addIssue({ + code: 'custom', + path: ['fieldType'], + message: `fieldType "${filter.fieldType}" does not match tag slot "${filter.tagSlot}" (expected "${slotFieldType}")`, + }) + return + } + const validOperators = getOperatorsForFieldType(filter.fieldType).map((op) => op.value) + if (!validOperators.includes(filter.operator)) { + ctx.addIssue({ + code: 'custom', + path: ['operator'], + message: `Unsupported operator "${filter.operator}" for a ${filter.fieldType} tag filter`, + }) + return + } + if (!isValidFilterValue(filter.fieldType, filter.value)) { + ctx.addIssue({ + code: 'custom', + path: ['value'], + message: `Invalid value for a ${filter.fieldType} tag filter`, + }) + } + // `between` is only valid for number/date (enforced by the operator check + // above), and needs a usable upper bound. + if (filter.operator === 'between' && !isValidFilterValue(filter.fieldType, filter.valueTo)) { + ctx.addIssue({ + code: 'custom', + path: ['valueTo'], + message: `Invalid second value for a ${filter.fieldType} "between" tag filter`, + }) + } + }) export type DocumentTagFilter = z.output export const listKnowledgeDocumentsQuerySchema = z.object({ @@ -40,20 +92,25 @@ export const listKnowledgeDocumentsQuerySchema = z.object({ ]) .optional(), sortOrder: z.enum(['asc', 'desc']).optional(), - tagFilters: z - .string() - .optional() - .transform((value, ctx) => { - if (!value) return undefined - try { - return z.array(documentTagFilterSchema).parse(JSON.parse(value)) - } catch { - ctx.addIssue({ code: 'custom', message: 'tagFilters must be a valid JSON array' }) - return z.NEVER - } - }), + // A query param is a string on the wire, so `tagFilters` is carried as a JSON + // string and decoded by the route via `parseDocumentTagFiltersParam`. It must + // NOT be a `.transform()` to an array here: the client's `requestJson` parses + // the query before serializing it, so a transform would turn the string into + // an array that serializes to `tagFilters=[object Object]` and 400s the route. + tagFilters: z.string().optional(), }) +/** + * Decodes the `tagFilters` query string (a JSON array) into validated filters. + * Throws on malformed JSON or a shape mismatch; callers map that to a 400. + */ +export function parseDocumentTagFiltersParam( + value: string | undefined +): DocumentTagFilter[] | undefined { + if (!value) return undefined + return z.array(documentTagFilterSchema).parse(JSON.parse(value)) +} + export const createDocumentBodySchema = z.object({ filename: z.string().min(1, 'Filename is required'), fileUrl: knowledgeDocumentFileUrlSchema, diff --git a/apps/sim/lib/knowledge/filters/types.ts b/apps/sim/lib/knowledge/filters/types.ts index 3416b6c4f7b..90e5846a149 100644 --- a/apps/sim/lib/knowledge/filters/types.ts +++ b/apps/sim/lib/knowledge/filters/types.ts @@ -189,3 +189,45 @@ export function getOperatorsForFieldType(fieldType: FilterFieldType): OperatorIn return [] } } + +/** Wire format for a date filter value (`YYYY-MM-DD`). */ +const DATE_ONLY_VALUE = /^\d{4}-\d{2}-\d{2}$/ + +/** + * Whether a `YYYY-MM-DD` string is a real calendar date. The format regex alone + * still admits impossible dates (`2026-02-30`, `2026-99-99`) that pass the + * boundary and then make the document query's `::date` cast throw a 500; this + * round-trips the parsed parts to reject them. + */ +function isRealCalendarDate(value: string): boolean { + if (!DATE_ONLY_VALUE.test(value)) return false + const [year, month, day] = value.split('-').map(Number) + const date = new Date(year, month - 1, day) + return date.getFullYear() === year && date.getMonth() === month - 1 && date.getDate() === day +} + +/** + * Whether a raw filter value is usable for the given field type. Shared source + * of truth so the API boundary can reject unusable values (e.g. `"abc"` for a + * number, `"not-a-date"` for a date) instead of letting them be silently + * dropped further down. Values arrive as strings from the filter UI. + */ +export function isValidFilterValue(fieldType: FilterFieldType, value: unknown): boolean { + if (value === undefined || value === null) return false + switch (fieldType) { + case 'text': + return typeof value === 'string' && value.length > 0 + case 'number': + if (typeof value === 'number') return Number.isFinite(value) + return typeof value === 'string' && value.trim() !== '' && Number.isFinite(Number(value)) + case 'date': + return typeof value === 'string' && isRealCalendarDate(value) + case 'boolean': + return ( + typeof value === 'boolean' || + (typeof value === 'string' && ['true', 'false'].includes(value.trim().toLowerCase())) + ) + default: + return false + } +}