From bee1a2109bb2c7277eca3a295946251664be747f Mon Sep 17 00:00:00 2001 From: Ritwij Aryan Parmar Date: Wed, 10 Jun 2026 14:34:40 -0400 Subject: [PATCH] fix: normalize act cache url keys --- packages/core/lib/v3/cache/ActCache.ts | 39 +++++++- packages/core/lib/v3/cache/utils.ts | 17 ++++ packages/core/lib/v3/types/private/cache.ts | 1 + packages/core/tests/cache-variables.test.ts | 98 +++++++++++++++++++ .../unit/cache-url-normalization.test.ts | 27 +++++ 5 files changed, 179 insertions(+), 3 deletions(-) create mode 100644 packages/core/tests/unit/cache-url-normalization.test.ts diff --git a/packages/core/lib/v3/cache/ActCache.ts b/packages/core/lib/v3/cache/ActCache.ts index d11ba9cc04..0507e2cb15 100644 --- a/packages/core/lib/v3/cache/ActCache.ts +++ b/packages/core/lib/v3/cache/ActCache.ts @@ -4,7 +4,11 @@ import type { LLMClient } from "../llm/LLMClient.js"; import type { Action, ActResult, Logger } from "../types/public/index.js"; import type { Page } from "../understudy/page.js"; import { CacheStorage } from "./CacheStorage.js"; -import { safeGetPageUrl, waitForCachedSelector } from "./utils.js"; +import { + normalizeUrlForCacheKey, + safeGetPageUrl, + waitForCachedSelector, +} from "./utils.js"; import { ActCacheContext, ActCacheDeps, @@ -50,14 +54,21 @@ export class ActCache { ? Object.keys(sanitizedVariables).sort() : []; const pageUrl = await safeGetPageUrl(page); + const normalizedPageUrl = normalizeUrlForCacheKey(pageUrl); const cacheKey = this.buildActCacheKey( sanitizedInstruction, - pageUrl, + normalizedPageUrl, variableKeys, ); + const legacyCacheKey = + normalizedPageUrl === pageUrl + ? undefined + : this.buildActCacheKey(sanitizedInstruction, pageUrl, variableKeys); + return { instruction: sanitizedInstruction, cacheKey, + legacyCacheKey, pageUrl, variableKeys, variables: sanitizedVariables, @@ -72,7 +83,7 @@ export class ActCache { ): Promise { if (!this.enabled) return null; - const { + let { value: entry, error, path, @@ -88,6 +99,28 @@ export class ActCache { }); return null; } + + if (!entry && context.legacyCacheKey) { + const legacyResult = await this.storage.readJson( + `${context.legacyCacheKey}.json`, + ); + entry = legacyResult.value; + error = legacyResult.error; + path = legacyResult.path; + + if (error && path) { + this.logger({ + category: "cache", + message: `failed to read legacy act cache entry: ${path}`, + level: 2, + auxiliary: { + error: { value: String(error), type: "string" }, + }, + }); + return null; + } + } + if (!entry) return null; if (entry.version !== 1) return null; if (!Array.isArray(entry.actions) || entry.actions.length === 0) { diff --git a/packages/core/lib/v3/cache/utils.ts b/packages/core/lib/v3/cache/utils.ts index b4490b1717..9e6b2f4c38 100644 --- a/packages/core/lib/v3/cache/utils.ts +++ b/packages/core/lib/v3/cache/utils.ts @@ -15,6 +15,23 @@ export async function safeGetPageUrl(page: Page): Promise { } } +export function normalizeUrlForCacheKey(url: string): string { + if (!url) return url; + + try { + const parsedUrl = new URL(url); + if (!parsedUrl.search) return url; + + const originalSearch = parsedUrl.search; + parsedUrl.searchParams.sort(); + if (parsedUrl.search === originalSearch) return url; + + return parsedUrl.toString(); + } catch { + return url; + } +} + /** * Waits for a cached action's selector to be attached to the DOM before executing. * Logs a warning and proceeds if the wait times out (non-blocking). diff --git a/packages/core/lib/v3/types/private/cache.ts b/packages/core/lib/v3/types/private/cache.ts index 074f4e59b7..6abef8e0e9 100644 --- a/packages/core/lib/v3/types/private/cache.ts +++ b/packages/core/lib/v3/types/private/cache.ts @@ -49,6 +49,7 @@ export type AgentCacheDeps = { export type ActCacheContext = { instruction: string; cacheKey: string; + legacyCacheKey?: string; pageUrl: string; variableKeys: string[]; variables?: Record; diff --git a/packages/core/tests/cache-variables.test.ts b/packages/core/tests/cache-variables.test.ts index fc63abbafb..4945eb8fe1 100644 --- a/packages/core/tests/cache-variables.test.ts +++ b/packages/core/tests/cache-variables.test.ts @@ -74,6 +74,104 @@ describe("ActCache variable handling", () => { expect(context2?.variables).toEqual({ username: "user2@example.com" }); }); + it("normalizes query parameter order for cache keys only", async () => { + const storage = { + enabled: true, + readJson: vi.fn(), + writeJson: vi.fn().mockResolvedValue({}), + directory: "/tmp/cache", + } as unknown as CacheStorage; + + const cache = new ActCache({ + storage, + logger: vi.fn(), + getActHandler: () => null as unknown as ActHandler, + getDefaultLlmClient: () => ({}) as LLMClient, + domSettleTimeoutMs: undefined, + }); + + const firstPage = { + url: vi.fn().mockReturnValue("https://example.com/search?b=2&a=1"), + } as unknown as Page; + const secondPage = { + url: vi.fn().mockReturnValue("https://example.com/search?a=1&b=2"), + } as unknown as Page; + + const context1 = await cache.prepareContext( + "click the first result", + firstPage, + ); + const context2 = await cache.prepareContext( + "click the first result", + secondPage, + ); + + expect(context1?.cacheKey).toBe(context2?.cacheKey); + expect(context1?.legacyCacheKey).toBeDefined(); + expect(context1?.legacyCacheKey).not.toBe(context1?.cacheKey); + expect(context1?.pageUrl).toBe("https://example.com/search?b=2&a=1"); + expect(context2?.pageUrl).toBe("https://example.com/search?a=1&b=2"); + }); + + it("falls back to legacy raw URL cache keys", async () => { + const action: Action = { + selector: "xpath=/html/body/button", + description: "click button", + method: "click", + arguments: [], + }; + const entry: CachedActEntry = { + version: 1, + instruction: "click the first result", + url: "https://example.com/search?b=2&a=1", + variableKeys: [], + actions: [action], + actionDescription: "click button", + message: "done", + }; + const storage = { + enabled: true, + readJson: vi + .fn() + .mockResolvedValueOnce({ value: null }) + .mockResolvedValueOnce({ value: entry }), + writeJson: vi.fn().mockResolvedValue({}), + directory: "/tmp/cache", + } as unknown as CacheStorage; + const handler = { + takeDeterministicAction: vi.fn().mockResolvedValue({ + success: true, + message: "ok", + actionDescription: "click button", + actions: [action], + }), + } as unknown as ActHandler; + const cache = new ActCache({ + storage, + logger: vi.fn(), + getActHandler: () => handler, + getDefaultLlmClient: () => ({}) as LLMClient, + domSettleTimeoutMs: undefined, + }); + const page = { + waitForSelector: vi.fn().mockResolvedValue(undefined), + } as unknown as Page; + const context = { + instruction: "click the first result", + cacheKey: "normalized-key", + legacyCacheKey: "legacy-key", + pageUrl: "https://example.com/search?b=2&a=1", + variableKeys: [], + }; + + const result = await cache.tryReplay(context, page); + + expect(result?.success).toBe(true); + expect(storage.readJson).toHaveBeenCalledWith("normalized-key.json"); + expect(storage.readJson).toHaveBeenCalledWith("legacy-key.json"); + expect(handler.takeDeterministicAction).toHaveBeenCalledTimes(1); + }); + it("replays cached actions with variable substitution", async () => { // Cached action contains variable placeholder %username% const action: Action = { diff --git a/packages/core/tests/unit/cache-url-normalization.test.ts b/packages/core/tests/unit/cache-url-normalization.test.ts new file mode 100644 index 0000000000..67e44a4953 --- /dev/null +++ b/packages/core/tests/unit/cache-url-normalization.test.ts @@ -0,0 +1,27 @@ +import { describe, expect, it } from "vitest"; +import { normalizeUrlForCacheKey } from "../../lib/v3/cache/utils.js"; + +describe("normalizeUrlForCacheKey", () => { + it("sorts query parameters without changing the rest of the URL", () => { + expect( + normalizeUrlForCacheKey("https://example.com/search?b=2&a=1#items"), + ).toBe("https://example.com/search?a=1&b=2#items"); + }); + + it("keeps repeated parameter order stable within the same key", () => { + expect( + normalizeUrlForCacheKey("https://example.com/search?tag=b&a=1&tag=a"), + ).toBe("https://example.com/search?a=1&tag=b&tag=a"); + }); + + it("returns non-URL inputs unchanged", () => { + expect(normalizeUrlForCacheKey("about:blank")).toBe("about:blank"); + expect(normalizeUrlForCacheKey("not a url")).toBe("not a url"); + }); + + it("leaves URLs unchanged when query parameters are already stable", () => { + expect(normalizeUrlForCacheKey("https://example.com/search?a=1&b=2")).toBe( + "https://example.com/search?a=1&b=2", + ); + }); +});