From 21643a4532f7d43ec4e20cb93bac2280826dd603 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 20 Apr 2026 19:41:33 +0000 Subject: [PATCH 01/11] Bump version to 2.10.11 --- api/dependencies/package-lock.json | 4 ++-- api/dependencies/package.json | 2 +- api/package-lock.json | 4 ++-- api/package.json | 2 +- api/src/package-lock.json | 4 ++-- api/src/package.json | 2 +- av-download/lambdas/package-lock.json | 4 ++-- av-download/lambdas/package.json | 2 +- chat/pyproject.toml | 2 +- chat/uv.lock | 2 +- docs/pyproject.toml | 2 +- docs/uv.lock | 2 +- mcp/apps/mcp/package.json | 2 +- mcp/package-lock.json | 4 ++-- mcp/package.json | 2 +- mcp/server.json | 6 +++--- 16 files changed, 23 insertions(+), 23 deletions(-) diff --git a/api/dependencies/package-lock.json b/api/dependencies/package-lock.json index 3c700f75..13581b22 100644 --- a/api/dependencies/package-lock.json +++ b/api/dependencies/package-lock.json @@ -1,12 +1,12 @@ { "name": "dc-api-dependencies", - "version": "2.10.10", + "version": "2.10.11", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dc-api-dependencies", - "version": "2.10.10", + "version": "2.10.11", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "^2.0.1", diff --git a/api/dependencies/package.json b/api/dependencies/package.json index fd0a467d..e2af173d 100644 --- a/api/dependencies/package.json +++ b/api/dependencies/package.json @@ -1,6 +1,6 @@ { "name": "dc-api-dependencies", - "version": "2.10.10", + "version": "2.10.11", "description": "NUL Digital Collections API Dependencies", "repository": "https://github.com/nulib/dc-api-v2", "author": "nulib", diff --git a/api/package-lock.json b/api/package-lock.json index 62f9aa2d..c6b8365e 100644 --- a/api/package-lock.json +++ b/api/package-lock.json @@ -1,12 +1,12 @@ { "name": "dc-api-build", - "version": "2.10.10", + "version": "2.10.11", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dc-api-build", - "version": "2.10.10", + "version": "2.10.11", "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { diff --git a/api/package.json b/api/package.json index 30a9644c..58fcc27e 100644 --- a/api/package.json +++ b/api/package.json @@ -1,6 +1,6 @@ { "name": "dc-api-build", - "version": "2.10.10", + "version": "2.10.11", "description": "NUL Digital Collections API Build Environment", "repository": "https://github.com/nulib/dc-api-v2", "author": "nulib", diff --git a/api/src/package-lock.json b/api/src/package-lock.json index d2cd8db6..ba60495e 100644 --- a/api/src/package-lock.json +++ b/api/src/package-lock.json @@ -1,12 +1,12 @@ { "name": "dc-api", - "version": "2.10.10", + "version": "2.10.11", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dc-api", - "version": "2.10.10", + "version": "2.10.11", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "^2.0.1", diff --git a/api/src/package.json b/api/src/package.json index 823d5aa8..7286011a 100644 --- a/api/src/package.json +++ b/api/src/package.json @@ -1,6 +1,6 @@ { "name": "dc-api", - "version": "2.10.10", + "version": "2.10.11", "description": "NUL Digital Collections API", "repository": "https://github.com/nulib/dc-api-v2", "author": "nulib", diff --git a/av-download/lambdas/package-lock.json b/av-download/lambdas/package-lock.json index 955ae10a..66116294 100644 --- a/av-download/lambdas/package-lock.json +++ b/av-download/lambdas/package-lock.json @@ -1,12 +1,12 @@ { "name": "lambdas", - "version": "2.10.10", + "version": "2.10.11", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "lambdas", - "version": "2.10.10", + "version": "2.10.11", "license": "Apache-2.0", "dependencies": { "fluent-ffmpeg": "2.1.3" diff --git a/av-download/lambdas/package.json b/av-download/lambdas/package.json index 70fbc8d1..f3339b77 100644 --- a/av-download/lambdas/package.json +++ b/av-download/lambdas/package.json @@ -1,6 +1,6 @@ { "name": "lambdas", - "version": "2.10.10", + "version": "2.10.11", "description": "Non-API handler lambdas", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" diff --git a/chat/pyproject.toml b/chat/pyproject.toml index 8f661926..99cd2aaf 100644 --- a/chat/pyproject.toml +++ b/chat/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dc-api-v2-chat" -version = "2.10.10" +version = "2.10.11" requires-python = ">=3.12" dependencies = [ "boto3~=1.34", diff --git a/chat/uv.lock b/chat/uv.lock index f71e924c..5d95dc07 100644 --- a/chat/uv.lock +++ b/chat/uv.lock @@ -302,7 +302,7 @@ wheels = [ [[package]] name = "dc-api-v2-chat" -version = "2.10.10" +version = "2.10.11" source = { virtual = "." } dependencies = [ { name = "boto3" }, diff --git a/docs/pyproject.toml b/docs/pyproject.toml index 68bff83c..17c4caef 100644 --- a/docs/pyproject.toml +++ b/docs/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dc-api-v2-docs" -version = "2.10.10" +version = "2.10.11" requires-python = ">=3.12" dependencies = [ "mkdocs>=1.1.2,<2.0.0", diff --git a/docs/uv.lock b/docs/uv.lock index 246e1200..5d9d770d 100644 --- a/docs/uv.lock +++ b/docs/uv.lock @@ -123,7 +123,7 @@ wheels = [ [[package]] name = "dc-api-v2-docs" -version = "2.10.10" +version = "2.10.11" source = { virtual = "." } dependencies = [ { name = "diagrams" }, diff --git a/mcp/apps/mcp/package.json b/mcp/apps/mcp/package.json index 953a6004..c968b9cc 100644 --- a/mcp/apps/mcp/package.json +++ b/mcp/apps/mcp/package.json @@ -1,5 +1,5 @@ { "name": "mcp", - "version": "2.10.10", + "version": "2.10.11", "type": "module" } diff --git a/mcp/package-lock.json b/mcp/package-lock.json index 5cee0150..1498aab3 100644 --- a/mcp/package-lock.json +++ b/mcp/package-lock.json @@ -1,12 +1,12 @@ { "name": "@nulib/dc-api-mcp", - "version": "2.10.10", + "version": "2.10.11", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@nulib/dc-api-mcp", - "version": "2.10.10", + "version": "2.10.11", "license": "MIT", "dependencies": { "@aws-sdk/client-secrets-manager": "^3.563.0", diff --git a/mcp/package.json b/mcp/package.json index f5ebc344..9b82a35d 100644 --- a/mcp/package.json +++ b/mcp/package.json @@ -1,7 +1,7 @@ { "name": "@nulib/dc-api-mcp", "mcpName": "io.github.nulib/dc-api", - "version": "2.10.10", + "version": "2.10.11", "description": "Agent integration with the Northwestern University Libraries Digital Collections API", "repository": { "type": "git", diff --git a/mcp/server.json b/mcp/server.json index 0f83c168..380991d0 100644 --- a/mcp/server.json +++ b/mcp/server.json @@ -1,7 +1,7 @@ { "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json", "name": "io.github.nulib/dc-api", - "version": "2.10.10", + "version": "2.10.11", "title": "Northwestern University Libraries Digital Collections API", "description": "Agent integration with the Northwestern University Libraries Digital Collections API", "repository": { @@ -13,14 +13,14 @@ { "registryType": "npm", "identifier": "@nulib/dc-api-mcp", - "version": "2.10.10", + "version": "2.10.11", "transport": { "type": "stdio" } }, { "registryType": "oci", - "identifier": "ghcr.io/nulib/dc-api-mcp:2.10.10", + "identifier": "ghcr.io/nulib/dc-api-mcp:2.10.11", "transport": { "type": "stdio" } From 142a0deda2b1d90122a3f2cd2406f4fc2f34ba73 Mon Sep 17 00:00:00 2001 From: Karen Shaw Date: Fri, 24 Apr 2026 18:58:28 +0000 Subject: [PATCH 02/11] Add IIIF content search route --- api/src/api/opensearch.js | 4 + api/src/api/response/iiif/manifest.js | 6 + .../response/iiif/presentation-api/items.js | 2 + api/src/api/response/iiif/search.js | 103 ++++++++++ api/src/handlers/get-work-search.js | 32 +++ api/template.yaml | 24 +++ api/test/integration/get-work-by-id.test.js | 4 + api/test/integration/get-work-search.test.js | 187 ++++++++++++++++++ 8 files changed, 362 insertions(+) create mode 100644 api/src/api/response/iiif/search.js create mode 100644 api/src/handlers/get-work-search.js create mode 100644 api/test/integration/get-work-search.test.js diff --git a/api/src/api/opensearch.js b/api/src/api/opensearch.js index 9db37173..1fae35f9 100644 --- a/api/src/api/opensearch.js +++ b/api/src/api/opensearch.js @@ -27,6 +27,7 @@ async function getWorkFileSets(workId, opts = {}) { const { allowPrivate = false, allowUnpublished = false, + annotationsQuery = null, role = null, source = null, sortBy = null, @@ -52,6 +53,9 @@ async function getWorkFileSets(workId, opts = {}) { if (role) { mustClauses.push({ term: { role: role } }); } + if (annotationsQuery) { + mustClauses.push({ match: { "annotations.content": annotationsQuery } }); + } const searchBody = { size: 10000, diff --git a/api/src/api/response/iiif/manifest.js b/api/src/api/response/iiif/manifest.js index e3dd4953..688f6060 100644 --- a/api/src/api/response/iiif/manifest.js +++ b/api/src/api/response/iiif/manifest.js @@ -368,6 +368,12 @@ async function transform(response, options = {}) { } } + jsonManifest.service = [ + { + id: `${dcApiEndpoint()}/works/${source.id}/search?as=iiif`, + type: "SearchService2", + }, + ]; jsonManifest.provider = [provider]; jsonManifest.logo = [nulLogo]; const navPlace = buildNavPlace(source); diff --git a/api/src/api/response/iiif/presentation-api/items.js b/api/src/api/response/iiif/presentation-api/items.js index d694cb60..6cf0743b 100644 --- a/api/src/api/response/iiif/presentation-api/items.js +++ b/api/src/api/response/iiif/presentation-api/items.js @@ -157,8 +157,10 @@ module.exports = { buildImageService, buildSupplementingAnnotation, buildTranscriptionAnnotation, + getTranscriptionContent, isAltFormat, isAudioVideo, isImage, isPDF, + normalizeLanguages, }; diff --git a/api/src/api/response/iiif/search.js b/api/src/api/response/iiif/search.js new file mode 100644 index 00000000..3fa559da --- /dev/null +++ b/api/src/api/response/iiif/search.js @@ -0,0 +1,103 @@ +const { dcApiEndpoint } = require("../../../environment"); +const { getWorkFileSets } = require("../../opensearch"); +const { + getTranscriptionContent, + normalizeLanguages, +} = require("./presentation-api/items"); + +function extractSnippet(content, q, contextChars = 100) { + const idx = content.toLowerCase().indexOf(q.toLowerCase()); + if (idx === -1) return null; + const start = Math.max(0, idx - contextChars); + const end = Math.min(content.length, idx + q.length + contextChars); + let snippet = content.slice(start, end).trim(); + if (start > 0) snippet = "..." + snippet; + if (end < content.length) snippet = snippet + "..."; + return snippet; +} + +function buildSearchAnnotationBody(annotation, snippet) { + const body = { + type: "TextualBody", + value: snippet, + format: "text/plain", + }; + const languages = normalizeLanguages(annotation.language); + if (languages.length === 1) { + body.language = languages[0]; + } else if (languages.length > 1) { + body.language = languages; + } + return body; +} + +async function transform(workId, q, opts = {}) { + const { allowPrivate = false, allowUnpublished = false } = opts; + + const manifestId = `${dcApiEndpoint()}/works/${workId}?as=iiif`; + const searchId = `${dcApiEndpoint()}/works/${workId}/search?as=iiif&q=${encodeURIComponent( + q + )}`; + + const response = await getWorkFileSets(workId, { + allowPrivate, + allowUnpublished, + annotationsQuery: q, + role: "Access", + source: ["id", "annotations", "group_with"], + sortBy: "rank", + }); + + const fileSets = + response.statusCode === 200 + ? JSON.parse(response.body).hits.hits.map((h) => h._source) + : []; + + // Replicate manifest.js grouping: ungrouped file sets use their own id as key + const fileSetGroups = {}; + fileSets.forEach((fs) => { + const key = fs.group_with || fs.id; + if (!fileSetGroups[key]) fileSetGroups[key] = []; + fileSetGroups[key].push(fs); + }); + + const items = []; + + Object.entries(fileSetGroups).forEach(([groupKey, groupFileSets], index) => { + const canvasId = `${manifestId}/canvas/${index}`; + + // Primary file set is the one whose id matches the group key (same as manifest.js) + const primary = + groupFileSets.find((fs) => fs.id === groupKey) || groupFileSets[0]; + if (!primary?.annotations) return; + + primary.annotations + .filter((ann) => ann.type === "transcription") + .forEach((ann) => { + const content = getTranscriptionContent(ann); + const snippet = extractSnippet(content, q); + if (!snippet) return; + + items.push({ + id: `${canvasId}/annotation/${ann.id}`, + type: "Annotation", + motivation: "supplementing", + body: buildSearchAnnotationBody(ann, snippet), + target: canvasId, + }); + }); + }); + + return { + statusCode: 200, + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + "@context": "http://iiif.io/api/search/2/context.json", + id: searchId, + type: "AnnotationPage", + items, + }), + }; +} + +module.exports = { transform }; diff --git a/api/src/handlers/get-work-search.js b/api/src/handlers/get-work-search.js new file mode 100644 index 00000000..eef833eb --- /dev/null +++ b/api/src/handlers/get-work-search.js @@ -0,0 +1,32 @@ +const { getWork } = require("../api/opensearch"); +const iiifSearchResponse = require("../api/response/iiif/search"); +const { wrap } = require("./middleware"); + +exports.handler = wrap(async (event) => { + const id = event.pathParameters.id; + const { as, q } = event.queryStringParameters; + + const allowPrivate = + event.userToken.isSuperUser() || + event.userToken.isReadingRoom() || + event.userToken.hasEntitlement(id); + const allowUnpublished = + event.userToken.isSuperUser() || event.userToken.hasEntitlement(id); + + if (as !== "iiif" || !q?.trim()) { + return { + statusCode: 400, + body: JSON.stringify({ + message: "Request must include ?as=iiif&q={query}", + }), + }; + } + + const workResponse = await getWork(id, { allowPrivate, allowUnpublished }); + if (workResponse.statusCode !== 200) return workResponse; + + return iiifSearchResponse.transform(id, q, { + allowPrivate, + allowUnpublished, + }); +}); diff --git a/api/template.yaml b/api/template.yaml index 6fc14ede..c5203d35 100644 --- a/api/template.yaml +++ b/api/template.yaml @@ -584,6 +584,30 @@ Resources: ApiId: !Ref dcApi Path: /works/{id}/thumbnail Method: HEAD + getWorkSearchFunction: + Type: AWS::Serverless::Function + Condition: DeployAPI + Properties: + Handler: handlers/get-work-search.handler + Description: IIIF Search 2.0 for a Work's transcription annotations. + #* Layers: + #* - !Ref apiDependencies + Policies: + - !Ref SecretsPolicy + - !Ref readIndexPolicy + Events: + WorkApiGet: + Type: HttpApi + Properties: + ApiId: !Ref dcApi + Path: /works/{id}/search + Method: GET + WorkApiHead: + Type: HttpApi + Properties: + ApiId: !Ref dcApi + Path: /works/{id}/search + Method: HEAD getSimilarFunction: Type: AWS::Serverless::Function Condition: DeployAPI diff --git a/api/test/integration/get-work-by-id.test.js b/api/test/integration/get-work-by-id.test.js index 534c5d03..77aba58f 100644 --- a/api/test/integration/get-work-by-id.test.js +++ b/api/test/integration/get-work-by-id.test.js @@ -91,6 +91,10 @@ describe("Retrieve work by id", () => { "http://iiif.io/api/presentation/3/context.json" ); expect(resultBody.label.none[0]).to.eq("Canary Record TEST 1"); + expect(resultBody.service).to.deep.include({ + id: `${process.env.DC_API_ENDPOINT}/works/1234/search?as=iiif`, + type: "SearchService2", + }); }); it("will retrieve a private, unpublished work document with an entitlement", async () => { diff --git a/api/test/integration/get-work-search.test.js b/api/test/integration/get-work-search.test.js new file mode 100644 index 00000000..d01433fd --- /dev/null +++ b/api/test/integration/get-work-search.test.js @@ -0,0 +1,187 @@ +"use strict"; + +const chai = require("chai"); +const expect = chai.expect; +chai.use(require("chai-http")); + +const ApiToken = requireSource("api/api-token"); + +const annotatedFileSetsResponse = { + hits: { + total: { value: 1 }, + hits: [ + { + _source: { + id: "36a47020-5410-4dda-a7ca-967fe3885bcd", + group_with: null, + annotations: [ + { + id: "anno-uuid-1", + type: "transcription", + language: ["en"], + content: + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer vitae nisl a leo faucibus consectetur.", + model: "us.anthropic.claude-sonnet-4-5-20250929-v1:0", + }, + ], + }, + }, + ], + }, +}; + +const emptyFileSetsResponse = { + hits: { + total: { value: 1 }, + hits: [ + { + _source: { + id: "36a47020-5410-4dda-a7ca-967fe3885bcd", + group_with: null, + annotations: [], + }, + }, + ], + }, +}; + +describe("IIIF Search 2.0 for a work", () => { + helpers.saveEnvironment(); + const mock = helpers.mockIndex(); + + describe("GET /works/{id}/search", () => { + const { handler } = requireSource("handlers/get-work-search"); + + it("returns a IIIF Search 2.0 AnnotationPage with matching items", async () => { + mock + .get("/dc-v2-work/_doc/1234") + .reply(200, helpers.testFixture("mocks/work-1234.json")); + mock + .post("/dc-v2-file-set/_search", () => true) + .reply(200, annotatedFileSetsResponse); + + const event = helpers + .mockEvent("GET", "/works/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ as: "iiif", q: "Lorem" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(200); + + const body = JSON.parse(result.body); + expect(body["@context"]).to.eq( + "http://iiif.io/api/search/2/context.json" + ); + expect(body.type).to.eq("AnnotationPage"); + expect(body.id).to.include("?as=iiif&q=Lorem"); + expect(body.items).to.have.lengthOf(1); + + const item = body.items[0]; + expect(item.type).to.eq("Annotation"); + expect(item.motivation).to.eq("supplementing"); + expect(item.body.type).to.eq("TextualBody"); + expect(item.body.value).to.include("Lorem"); + expect(item.body.format).to.eq("text/plain"); + expect(item.body.language).to.eq("en"); + expect(item.target).to.include("/canvas/0"); + }); + + it("returns an empty items array when no annotations match", async () => { + mock + .get("/dc-v2-work/_doc/1234") + .reply(200, helpers.testFixture("mocks/work-1234.json")); + mock + .post("/dc-v2-file-set/_search", () => true) + .reply(200, annotatedFileSetsResponse); + + const event = helpers + .mockEvent("GET", "/works/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ as: "iiif", q: "zzznomatch" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(200); + + const body = JSON.parse(result.body); + expect(body.type).to.eq("AnnotationPage"); + expect(body.items).to.deep.eq([]); + }); + + it("returns 400 when q parameter is missing", async () => { + const event = helpers + .mockEvent("GET", "/works/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ as: "iiif" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(400); + }); + + it("returns 400 when as parameter is not iiif", async () => { + const event = helpers + .mockEvent("GET", "/works/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ q: "Lorem" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(400); + }); + + it("returns 404 when the work does not exist", async () => { + mock + .get("/dc-v2-work/_doc/1234") + .reply(200, helpers.testFixture("mocks/missing-work-1234.json")); + + const event = helpers + .mockEvent("GET", "/works/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ as: "iiif", q: "Lorem" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(404); + }); + + it("returns 403 when the work is private and no token is provided", async () => { + mock + .get("/dc-v2-work/_doc/1234") + .reply(200, helpers.testFixture("mocks/private-work-1234.json")); + + const event = helpers + .mockEvent("GET", "/works/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ as: "iiif", q: "Lorem" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(403); + }); + + it("returns results for a private work with a valid entitlement token", async () => { + mock + .get("/dc-v2-work/_doc/1234") + .reply(200, helpers.testFixture("mocks/private-work-1234.json")); + mock + .post("/dc-v2-file-set/_search", () => true) + .reply(200, emptyFileSetsResponse); + + const token = new ApiToken().addEntitlement("1234").sign(); + const event = helpers + .mockEvent("GET", "/works/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ as: "iiif", q: "Lorem" }) + .headers({ Cookie: `${process.env.API_TOKEN_NAME}=${token};` }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(200); + + const body = JSON.parse(result.body); + expect(body.type).to.eq("AnnotationPage"); + }); + }); +}); From fa3a580ccce364a921e8900b7efc61bf876269f7 Mon Sep 17 00:00:00 2001 From: Karen Shaw Date: Mon, 27 Apr 2026 16:24:59 +0000 Subject: [PATCH 03/11] Fix target canvas id in content search results --- api/src/api/response/iiif/search.js | 23 +++++++-- api/src/handlers/get-work-search.js | 3 +- api/test/integration/get-work-search.test.js | 49 ++++++++++++++++++-- 3 files changed, 65 insertions(+), 10 deletions(-) diff --git a/api/src/api/response/iiif/search.js b/api/src/api/response/iiif/search.js index 3fa559da..a79d7191 100644 --- a/api/src/api/response/iiif/search.js +++ b/api/src/api/response/iiif/search.js @@ -31,21 +31,33 @@ function buildSearchAnnotationBody(annotation, snippet) { return body; } -async function transform(workId, q, opts = {}) { +async function transform(workSource, q, opts = {}) { const { allowPrivate = false, allowUnpublished = false } = opts; + const workId = workSource.id; const manifestId = `${dcApiEndpoint()}/works/${workId}?as=iiif`; const searchId = `${dcApiEndpoint()}/works/${workId}/search?as=iiif&q=${encodeURIComponent( q )}`; + // Build canvas index map from the work's file_sets array — same ordering as manifest.js + const groupIndexMap = {}; + let groupIndex = 0; + (workSource.file_sets || []) + .filter((fs) => fs.role === "Access") + .forEach((fs) => { + const key = fs.group_with || fs.id; + if (!(key in groupIndexMap)) { + groupIndexMap[key] = groupIndex++; + } + }); + const response = await getWorkFileSets(workId, { allowPrivate, allowUnpublished, annotationsQuery: q, role: "Access", source: ["id", "annotations", "group_with"], - sortBy: "rank", }); const fileSets = @@ -53,7 +65,6 @@ async function transform(workId, q, opts = {}) { ? JSON.parse(response.body).hits.hits.map((h) => h._source) : []; - // Replicate manifest.js grouping: ungrouped file sets use their own id as key const fileSetGroups = {}; fileSets.forEach((fs) => { const key = fs.group_with || fs.id; @@ -63,8 +74,10 @@ async function transform(workId, q, opts = {}) { const items = []; - Object.entries(fileSetGroups).forEach(([groupKey, groupFileSets], index) => { - const canvasId = `${manifestId}/canvas/${index}`; + Object.entries(fileSetGroups).forEach(([groupKey, groupFileSets]) => { + const canvasIndex = groupIndexMap[groupKey]; + if (canvasIndex === undefined) return; + const canvasId = `${manifestId}/canvas/${canvasIndex}`; // Primary file set is the one whose id matches the group key (same as manifest.js) const primary = diff --git a/api/src/handlers/get-work-search.js b/api/src/handlers/get-work-search.js index eef833eb..fb26f1e0 100644 --- a/api/src/handlers/get-work-search.js +++ b/api/src/handlers/get-work-search.js @@ -25,7 +25,8 @@ exports.handler = wrap(async (event) => { const workResponse = await getWork(id, { allowPrivate, allowUnpublished }); if (workResponse.statusCode !== 200) return workResponse; - return iiifSearchResponse.transform(id, q, { + const workSource = JSON.parse(workResponse.body)._source; + return iiifSearchResponse.transform(workSource, q, { allowPrivate, allowUnpublished, }); diff --git a/api/test/integration/get-work-search.test.js b/api/test/integration/get-work-search.test.js index d01433fd..6090413a 100644 --- a/api/test/integration/get-work-search.test.js +++ b/api/test/integration/get-work-search.test.js @@ -6,13 +6,14 @@ chai.use(require("chai-http")); const ApiToken = requireSource("api/api-token"); +// Matches the first Access file set (canvas/0) in mocks/work-1234.json const annotatedFileSetsResponse = { hits: { total: { value: 1 }, hits: [ { _source: { - id: "36a47020-5410-4dda-a7ca-967fe3885bcd", + id: "076dcbd8-8c57-40e8-bdf7-dc9153c87a36", group_with: null, annotations: [ { @@ -30,21 +31,38 @@ const annotatedFileSetsResponse = { }, }; -const emptyFileSetsResponse = { +// Matches the second Access file set (canvas/1) in mocks/work-1234.json +const annotatedSecondFileSetsResponse = { hits: { total: { value: 1 }, hits: [ { _source: { - id: "36a47020-5410-4dda-a7ca-967fe3885bcd", + id: "51862c1c-c024-45dc-ab26-694bd8ebc16c", group_with: null, - annotations: [], + annotations: [ + { + id: "anno-uuid-2", + type: "transcription", + language: ["en"], + content: + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer vitae nisl a leo faucibus consectetur.", + model: "us.anthropic.claude-sonnet-4-5-20250929-v1:0", + }, + ], }, }, ], }, }; +const emptyFileSetsResponse = { + hits: { + total: { value: 0 }, + hits: [], + }, +}; + describe("IIIF Search 2.0 for a work", () => { helpers.saveEnvironment(); const mock = helpers.mockIndex(); @@ -87,6 +105,29 @@ describe("IIIF Search 2.0 for a work", () => { expect(item.target).to.include("/canvas/0"); }); + it("uses the correct canvas index from the manifest ordering, not sequential search result order", async () => { + mock + .get("/dc-v2-work/_doc/1234") + .reply(200, helpers.testFixture("mocks/work-1234.json")); + mock + .post("/dc-v2-file-set/_search", () => true) + .reply(200, annotatedSecondFileSetsResponse); + + const event = helpers + .mockEvent("GET", "/works/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ as: "iiif", q: "Lorem" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(200); + + const body = JSON.parse(result.body); + expect(body.items).to.have.lengthOf(1); + // Second Access file set in work-1234.json must map to canvas/1, not canvas/0 + expect(body.items[0].target).to.include("/canvas/1"); + }); + it("returns an empty items array when no annotations match", async () => { mock .get("/dc-v2-work/_doc/1234") From e533ff5ca37ad446e2f7669fe8ca554f6848fba4 Mon Sep 17 00:00:00 2001 From: Karen Shaw Date: Thu, 30 Apr 2026 19:16:05 +0000 Subject: [PATCH 04/11] Return full annotation rather than snippet for content search --- api/src/api/opensearch.js | 4 +++- api/src/api/response/iiif/search.js | 20 ++++---------------- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/api/src/api/opensearch.js b/api/src/api/opensearch.js index 1fae35f9..ce8070aa 100644 --- a/api/src/api/opensearch.js +++ b/api/src/api/opensearch.js @@ -54,7 +54,9 @@ async function getWorkFileSets(workId, opts = {}) { mustClauses.push({ term: { role: role } }); } if (annotationsQuery) { - mustClauses.push({ match: { "annotations.content": annotationsQuery } }); + mustClauses.push({ + match_phrase: { "annotations.content": annotationsQuery }, + }); } const searchBody = { diff --git a/api/src/api/response/iiif/search.js b/api/src/api/response/iiif/search.js index a79d7191..66f0908a 100644 --- a/api/src/api/response/iiif/search.js +++ b/api/src/api/response/iiif/search.js @@ -5,21 +5,10 @@ const { normalizeLanguages, } = require("./presentation-api/items"); -function extractSnippet(content, q, contextChars = 100) { - const idx = content.toLowerCase().indexOf(q.toLowerCase()); - if (idx === -1) return null; - const start = Math.max(0, idx - contextChars); - const end = Math.min(content.length, idx + q.length + contextChars); - let snippet = content.slice(start, end).trim(); - if (start > 0) snippet = "..." + snippet; - if (end < content.length) snippet = snippet + "..."; - return snippet; -} - -function buildSearchAnnotationBody(annotation, snippet) { +function buildSearchAnnotationBody(annotation, content) { const body = { type: "TextualBody", - value: snippet, + value: content, format: "text/plain", }; const languages = normalizeLanguages(annotation.language); @@ -88,14 +77,13 @@ async function transform(workSource, q, opts = {}) { .filter((ann) => ann.type === "transcription") .forEach((ann) => { const content = getTranscriptionContent(ann); - const snippet = extractSnippet(content, q); - if (!snippet) return; + if (!content.toLowerCase().includes(q.toLowerCase())) return; items.push({ id: `${canvasId}/annotation/${ann.id}`, type: "Annotation", motivation: "supplementing", - body: buildSearchAnnotationBody(ann, snippet), + body: buildSearchAnnotationBody(ann, content), target: canvasId, }); }); From e24267d173a97ce66c3964dfa5587410cbd70375 Mon Sep 17 00:00:00 2001 From: Karen Shaw Date: Tue, 5 May 2026 16:57:48 +0000 Subject: [PATCH 05/11] Create file set thumbnail route --- api/src/handlers/get-thumbnail.js | 28 ++- api/template.yaml | 12 ++ .../mocks/fileset-image-access-1234.json | 16 ++ .../mocks/fileset-image-auxiliary-1234.json | 16 ++ .../mocks/fileset-institution-image-1234.json | 16 ++ .../mocks/fileset-private-image-1234.json | 16 ++ .../mocks/fileset-unpublished-image-1234.json | 16 ++ api/test/integration/get-thumbnail.test.js | 177 ++++++++++++++++++ 8 files changed, 296 insertions(+), 1 deletion(-) create mode 100644 api/test/fixtures/mocks/fileset-image-access-1234.json create mode 100644 api/test/fixtures/mocks/fileset-image-auxiliary-1234.json create mode 100644 api/test/fixtures/mocks/fileset-institution-image-1234.json create mode 100644 api/test/fixtures/mocks/fileset-private-image-1234.json create mode 100644 api/test/fixtures/mocks/fileset-unpublished-image-1234.json diff --git a/api/src/handlers/get-thumbnail.js b/api/src/handlers/get-thumbnail.js index 11efb52a..ddb26b88 100644 --- a/api/src/handlers/get-thumbnail.js +++ b/api/src/handlers/get-thumbnail.js @@ -3,7 +3,7 @@ const axios = require("axios").default; const cookie = require("cookie"); const opensearchResponse = require("../api/response/opensearch"); const { apiTokenName } = require("../environment"); -const { getCollection, getWork } = require("../api/opensearch"); +const { getCollection, getWork, getFileSet } = require("../api/opensearch"); const { wrap } = require("./middleware"); function getAxiosResponse(url, config) { @@ -30,6 +30,16 @@ function validateRequest(event) { return { id, aspect, size }; } +function isImageFileSet(doc) { + const source = doc._source; + return ( + doc.found && + source.mime_type != null && + source.mime_type.split("/")[0] === "image" && + ["Access", "Auxiliary"].includes(source.role) + ); +} + const getThumbnail = async (id, aspect, size, event) => { const allowUnpublished = event.userToken.isSuperUser() || event.userToken.hasEntitlement(id); @@ -47,6 +57,22 @@ const getThumbnail = async (id, aspect, size, event) => { return { error: await opensearchResponse.transform(esResponse) }; body = JSON.parse(esResponse.body); iiif_base = body?._source?.representative_image?.url; + } else if (event.rawPath.match(/\/file-sets\//)) { + esResponse = await getFileSet(id, { + allowPrivate, + allowUnpublished, + }); + if (esResponse.statusCode != 200) + return { error: await opensearchResponse.transform(esResponse) }; + body = JSON.parse(esResponse.body); + if (!isImageFileSet(body)) { + return { + statusCode: 404, + headers: { "content-type": "text/plain" }, + body: "Not Found", + }; + } + iiif_base = body?._source?.representative_image_url; } else { esResponse = await getWork(id, { allowPrivate, diff --git a/api/template.yaml b/api/template.yaml index c5203d35..e12a9ce9 100644 --- a/api/template.yaml +++ b/api/template.yaml @@ -584,6 +584,18 @@ Resources: ApiId: !Ref dcApi Path: /works/{id}/thumbnail Method: HEAD + FileSetApiGet: + Type: HttpApi + Properties: + ApiId: !Ref dcApi + Path: /file-sets/{id}/thumbnail + Method: GET + FileSetApiHead: + Type: HttpApi + Properties: + ApiId: !Ref dcApi + Path: /file-sets/{id}/thumbnail + Method: HEAD getWorkSearchFunction: Type: AWS::Serverless::Function Condition: DeployAPI diff --git a/api/test/fixtures/mocks/fileset-image-access-1234.json b/api/test/fixtures/mocks/fileset-image-access-1234.json new file mode 100644 index 00000000..2f776130 --- /dev/null +++ b/api/test/fixtures/mocks/fileset-image-access-1234.json @@ -0,0 +1,16 @@ +{ + "_index": "dev-dc-v2-file-set", + "_type": "_doc", + "_id": "1234", + "_version": 1, + "found": true, + "_source": { + "id": "1234", + "api_model": "FileSet", + "visibility": "Public", + "published": true, + "role": "Access", + "mime_type": "image/tiff", + "representative_image_url": "https://index.test.library.northwestern.edu/iiif/2/mbk-dev/1234" + } +} diff --git a/api/test/fixtures/mocks/fileset-image-auxiliary-1234.json b/api/test/fixtures/mocks/fileset-image-auxiliary-1234.json new file mode 100644 index 00000000..974591b1 --- /dev/null +++ b/api/test/fixtures/mocks/fileset-image-auxiliary-1234.json @@ -0,0 +1,16 @@ +{ + "_index": "dev-dc-v2-file-set", + "_type": "_doc", + "_id": "1234", + "_version": 1, + "found": true, + "_source": { + "id": "1234", + "api_model": "FileSet", + "visibility": "Public", + "published": true, + "role": "Auxiliary", + "mime_type": "image/jpeg", + "representative_image_url": "https://index.test.library.northwestern.edu/iiif/2/mbk-dev/1234" + } +} diff --git a/api/test/fixtures/mocks/fileset-institution-image-1234.json b/api/test/fixtures/mocks/fileset-institution-image-1234.json new file mode 100644 index 00000000..65d840f4 --- /dev/null +++ b/api/test/fixtures/mocks/fileset-institution-image-1234.json @@ -0,0 +1,16 @@ +{ + "_index": "dev-dc-v2-file-set", + "_type": "_doc", + "_id": "1234", + "_version": 1, + "found": true, + "_source": { + "id": "1234", + "api_model": "FileSet", + "visibility": "Institution", + "published": true, + "role": "Access", + "mime_type": "image/tiff", + "representative_image_url": "https://index.test.library.northwestern.edu/iiif/2/mbk-dev/1234" + } +} diff --git a/api/test/fixtures/mocks/fileset-private-image-1234.json b/api/test/fixtures/mocks/fileset-private-image-1234.json new file mode 100644 index 00000000..0ca57220 --- /dev/null +++ b/api/test/fixtures/mocks/fileset-private-image-1234.json @@ -0,0 +1,16 @@ +{ + "_index": "dev-dc-v2-file-set", + "_type": "_doc", + "_id": "1234", + "_version": 1, + "found": true, + "_source": { + "id": "1234", + "api_model": "FileSet", + "visibility": "Private", + "published": true, + "role": "Access", + "mime_type": "image/tiff", + "representative_image_url": "https://index.test.library.northwestern.edu/iiif/2/mbk-dev/1234" + } +} diff --git a/api/test/fixtures/mocks/fileset-unpublished-image-1234.json b/api/test/fixtures/mocks/fileset-unpublished-image-1234.json new file mode 100644 index 00000000..74d97cf4 --- /dev/null +++ b/api/test/fixtures/mocks/fileset-unpublished-image-1234.json @@ -0,0 +1,16 @@ +{ + "_index": "dev-dc-v2-file-set", + "_type": "_doc", + "_id": "1234", + "_version": 1, + "found": true, + "_source": { + "id": "1234", + "api_model": "FileSet", + "visibility": "Public", + "published": false, + "role": "Access", + "mime_type": "image/tiff", + "representative_image_url": "https://index.test.library.northwestern.edu/iiif/2/mbk-dev/1234" + } +} diff --git a/api/test/integration/get-thumbnail.test.js b/api/test/integration/get-thumbnail.test.js index 92d3cfd8..2c3ee039 100644 --- a/api/test/integration/get-thumbnail.test.js +++ b/api/test/integration/get-thumbnail.test.js @@ -198,6 +198,183 @@ describe("Thumbnail routes", () => { }); }); + describe("FileSet", () => { + const event = helpers + .mockEvent("GET", "/file-sets/{id}/thumbnail") + .headers({ origin: "https://test.example.edu/" }) + .pathParams({ id: 1234 }); + + it("retrieves a thumbnail for an Access image file set", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply( + 200, + helpers.testFixture("mocks/fileset-image-access-1234.json") + ); + mock + .get("/iiif/2/mbk-dev/1234/full/!300,300/0/default.jpg") + .reply(200, helpers.testFixture("mocks/thumbnail_full.jpg"), { + "Content-Type": "image/jpeg", + }); + + const result = await handler(event.render()); + expect(result.statusCode).to.eq(200); + expect(result.headers["content-type"]).to.eq("image/jpeg"); + expectCorsHeaders(result); + }); + + it("retrieves a thumbnail for an Auxiliary image file set", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply( + 200, + helpers.testFixture("mocks/fileset-image-auxiliary-1234.json") + ); + mock + .get("/iiif/2/mbk-dev/1234/full/!300,300/0/default.jpg") + .reply(200, helpers.testFixture("mocks/thumbnail_full.jpg"), { + "Content-Type": "image/jpeg", + }); + + const result = await handler(event.render()); + expect(result.statusCode).to.eq(200); + expectCorsHeaders(result); + }); + + it("returns 404 for a non-image (audio) file set", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply(200, helpers.testFixture("mocks/fileset-audio-1234.json")); + + const result = await handler(event.render()); + expect(result.statusCode).to.eq(404); + expectCorsHeaders(result); + }); + + it("returns 404 for a non-image (video) file set", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply(200, helpers.testFixture("mocks/fileset-video-1234.json")); + + const result = await handler(event.render()); + expect(result.statusCode).to.eq(404); + expectCorsHeaders(result); + }); + + it("returns 404 if the file set doc can't be found", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply(200, helpers.testFixture("mocks/missing-fileset-1234.json")); + + const result = await handler(event.render()); + expect(result.error.statusCode).to.eq(404); + expectCorsHeaders(result); + }); + + it("returns 403 if the file set is private", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply( + 200, + helpers.testFixture("mocks/fileset-private-image-1234.json") + ); + + const result = await handler(event.render()); + expect(result.error.statusCode).to.eq(403); + expectCorsHeaders(result); + }); + + it("returns 200 if the file set is private and the user is in the reading room", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply( + 200, + helpers.testFixture("mocks/fileset-private-image-1234.json") + ); + mock + .get("/iiif/2/mbk-dev/1234/full/!300,300/0/default.jpg") + .reply(200, helpers.testFixture("mocks/thumbnail_full.jpg"), { + "Content-Type": "image/jpeg", + }); + + const renderedEvent = event.render(); + process.env.READING_ROOM_IPS = renderedEvent.requestContext.http.sourceIp; + const result = await handler(renderedEvent); + expect(result.statusCode).to.eq(200); + }); + + it("returns 200 for an institution file set", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply( + 200, + helpers.testFixture("mocks/fileset-institution-image-1234.json") + ); + mock + .get("/iiif/2/mbk-dev/1234/full/!300,300/0/default.jpg") + .reply(200, helpers.testFixture("mocks/thumbnail_full.jpg"), { + "Content-Type": "image/jpeg", + }); + + const result = await handler(event.render()); + expect(result.statusCode).to.eq(200); + expectCorsHeaders(result); + }); + + it("returns 404 if the file set is unpublished", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply( + 200, + helpers.testFixture("mocks/fileset-unpublished-image-1234.json") + ); + + const result = await handler(event.render()); + expect(result.error.statusCode).to.eq(404); + expectCorsHeaders(result); + }); + + it("returns 200 for an unpublished file set if the user is a superuser", async () => { + const token = new ApiToken().superUser().sign(); + const superEvent = helpers + .mockEvent("GET", "/file-sets/{id}/thumbnail") + .headers({ authorization: `Bearer ${token}` }) + .pathParams({ id: 1234 }); + + mock + .get("/dc-v2-file-set/_doc/1234") + .reply( + 200, + helpers.testFixture("mocks/fileset-unpublished-image-1234.json") + ); + mock + .get("/iiif/2/mbk-dev/1234/full/!300,300/0/default.jpg") + .reply(200, helpers.testFixture("mocks/thumbnail_full.jpg"), { + "Content-Type": "image/jpeg", + }); + + const result = await handler(superEvent.render()); + expect(result.statusCode).to.eq(200); + }); + + it("returns an error from the IIIF server", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply( + 200, + helpers.testFixture("mocks/fileset-image-access-1234.json") + ); + mock + .get("/iiif/2/mbk-dev/1234/full/!300,300/0/default.jpg") + .reply(403, "Forbidden", { "Content-Type": "text/plain" }); + + const result = await handler(event.render()); + expect(result.statusCode).to.eq(403); + expect(result.body).to.eq("Forbidden"); + expectCorsHeaders(result); + }); + }); + describe("Superuser", () => { let event; From c74ee66581036caf947ed9498eee6ed15dc982c7 Mon Sep 17 00:00:00 2001 From: "Michael B. Klein" Date: Wed, 6 May 2026 19:33:02 +0000 Subject: [PATCH 06/11] Collapse requests: Hoist inner_hits to top, use additional agg to calculate pages Include aggs and collapse in searchToken --- api/src/api/pagination.js | 32 +- api/src/api/request/pipeline.js | 14 +- api/src/api/response/iiif/collection.js | 2 +- api/src/api/response/opensearch/index.js | 2 +- api/src/api/response/transformer.js | 30 +- api/test/fixtures/mocks/collapse-search.json | 376 ++++++++++++++++++ api/test/unit/api/pagination.test.js | 15 +- .../unit/api/response/transformer.test.js | 105 +++++ docs/docs/spec/data-types.yaml | 66 ++- 9 files changed, 612 insertions(+), 30 deletions(-) create mode 100644 api/test/fixtures/mocks/collapse-search.json create mode 100644 api/test/unit/api/response/transformer.test.js diff --git a/api/src/api/pagination.js b/api/src/api/pagination.js index 9388e241..694bbb86 100644 --- a/api/src/api/pagination.js +++ b/api/src/api/pagination.js @@ -4,7 +4,15 @@ const { } = require("lz-string"); const { defaultSearchSize } = require("../environment"); -const encodeFields = ["query", "size", "sort", "fields", "_source"]; +const encodeFields = [ + "query", + "size", + "sort", + "fields", + "collapse", + "aggs", + "_source", +]; async function decodeSearchToken(token) { return JSON.parse(await decompress(token)); @@ -17,6 +25,7 @@ async function encodeSearchToken(models, body, format, options) { token.body[field] = body[field]; } } + if (token.body.aggs?._pagination) delete token.body.aggs._pagination; return await compress(JSON.stringify(token)); } @@ -55,6 +64,13 @@ class Paginator { this.options = options; } + async pageResponseInfo(responseBody, opts = {}) { + return this.pageInfo(responseBody.hits.total.value, { + aggregatedCount: responseBody.hits.collapsed?.value, + ...opts, + }); + } + async pageInfo(count, opts = {}) { let url = new URL(this.route, this.baseUrl); let searchToken; @@ -74,15 +90,18 @@ class Paginator { } const queryStringParameters = - this.options?.parameterOverrides || this.options?.queryStringParameters; + this.options?.parameterOverrides || + this.options?.queryStringParameters || + {}; if (typeof queryStringParameters === "object") { for (const param in queryStringParameters) { url.searchParams.set(param, queryStringParameters[param]); } } - const prev = prevPage(this.body, count); - const next = nextPage(this.body, count); + const aggregatedCount = opts?.aggregatedCount || count; + const prev = prevPage(this.body, aggregatedCount); + const next = nextPage(this.body, aggregatedCount); url.searchParams.delete("from"); let result = { @@ -91,9 +110,12 @@ class Paginator { limit: size(this.body), offset: from(this.body), total_hits: count, - total_pages: maxPage(this.body, count), + total_pages: maxPage(this.body, aggregatedCount), format: this.format, }; + if (this.body.collapse) { + result.collapsed_by = this.body.collapse.field; + } if (opts.includeOptions) { result.options = this.options; } diff --git a/api/src/api/request/pipeline.js b/api/src/api/request/pipeline.js index 2c0d85e6..111fe47f 100644 --- a/api/src/api/request/pipeline.js +++ b/api/src/api/request/pipeline.js @@ -104,8 +104,20 @@ module.exports = class RequestPipeline { return this; } + addCardinality() { + if (this.searchContext.collapse) { + this.searchContext.aggs ||= {}; + this.searchContext.aggs.__pagination = { + cardinality: { + field: this.searchContext.collapse.field, + }, + }; + } + return this; + } + toJson() { - this.addNeuralModelId(); + this.addNeuralModelId().addCardinality(); return JSON.stringify(sortJson(this.searchContext)); } }; diff --git a/api/src/api/response/iiif/collection.js b/api/src/api/response/iiif/collection.js index d5501707..ea2fcfc7 100644 --- a/api/src/api/response/iiif/collection.js +++ b/api/src/api/response/iiif/collection.js @@ -5,7 +5,7 @@ const { provider, nulLogo } = require("./presentation-api/provider"); async function transform(response, pager) { if (response.statusCode === 200) { const responseBody = JSON.parse(response.body); - const pageInfo = await pager.pageInfo(responseBody.hits.total.value, { + const pageInfo = await pager.pageResponseInfo(responseBody, { includeOptions: true, }); diff --git a/api/src/api/response/opensearch/index.js b/api/src/api/response/opensearch/index.js index 917d3397..2a1a917b 100644 --- a/api/src/api/response/opensearch/index.js +++ b/api/src/api/response/opensearch/index.js @@ -40,7 +40,7 @@ async function transformMany(responseBody, options) { } async function paginationInfo(responseBody, pager) { - let { ...pageInfo } = await pager.pageInfo(responseBody.hits.total.value); + let { ...pageInfo } = await pager.pageResponseInfo(responseBody); return pageInfo; } diff --git a/api/src/api/response/transformer.js b/api/src/api/response/transformer.js index 0a4b1a3f..2316f21f 100644 --- a/api/src/api/response/transformer.js +++ b/api/src/api/response/transformer.js @@ -2,10 +2,38 @@ const { transformError } = require("./error.js"); const iiifCollectionResponse = require("./iiif/collection.js"); const opensearchResponse = require("./opensearch"); +// Hoist all inner_hits to the top, and if __pagination aggregation +// is present, use that for total hits instead of the hits.total.value +function applyInnerHits(response) { + const responseBody = JSON.parse(response.body); + if (responseBody.hits?.hits) { + responseBody.hits.hits = responseBody.hits.hits + .map((hit) => { + if (hit.inner_hits) { + return hit.inner_hits[Object.keys(hit.inner_hits)[0]].hits.hits; + } + return hit; + }) + .flat(); + } + if (responseBody?.aggregations?.__pagination) { + responseBody.hits.collapsed = { + value: responseBody.aggregations.__pagination.value, + }; + delete responseBody.aggregations.__pagination; + if (Object.keys(responseBody.aggregations).length === 0) { + delete responseBody.aggregations; + } + } + response.body = JSON.stringify(responseBody); + return response; +} + async function transformSearchResult(response, pager) { if (response.statusCode === 200) { + response = applyInnerHits(response); const responseBody = JSON.parse(response.body); - const pageInfo = await pager.pageInfo(responseBody.hits.total.value); + const pageInfo = await pager.pageResponseInfo(responseBody); if (pageInfo.format === "iiif") { return await iiifCollectionResponse.transform(response, pager); diff --git a/api/test/fixtures/mocks/collapse-search.json b/api/test/fixtures/mocks/collapse-search.json new file mode 100644 index 00000000..008a1389 --- /dev/null +++ b/api/test/fixtures/mocks/collapse-search.json @@ -0,0 +1,376 @@ +{ + "took": 7, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 7, + "relation": "eq" + }, + "max_score": 4.4889727, + "hits": [ + { + "_index": "mbk-dev-dc-v2-file-set-1778017290401", + "_id": "5de1d0a1-adb8-4412-90a4-eb48f82383c7", + "_score": 4.4889727, + "_source": { + "id": "5de1d0a1-adb8-4412-90a4-eb48f82383c7", + "label": "BFMF_B51_F04_007_p001.tif", + "description": null, + "streaming_url": null, + "mime_type": "image/tiff", + "accession_number": "BFMF_B51_F04_007_donut_01", + "role": "Access", + "rank": 0, + "published": true, + "visibility": "Public", + "work_id": "8373e219-0afc-4507-80e3-ae557c29e5a5", + "extracted_metadata": { + "exif": { + "tool": "exifr", + "tool_version": "6.1.1", + "value": { + "bitsPerSample": "8, 8, 8", + "compression": "Uncompressed", + "imageHeight": 970, + "imageWidth": 1540, + "make": "Phase One", + "model": "IQ180", + "orientation": "Horizontal (normal)", + "photometricInterpretation": "RGB", + "planarConfiguration": "Chunky", + "resolutionUnit": "inches", + "samplesPerPixel": 3, + "software": "Capture One 11 Macintosh", + "xResolution": 400, + "yResolution": 400 + } + } + }, + "group_with": null, + "poster_offset": null, + "annotations": [ + { + "id": "9a0a0056-e859-4960-8028-0fefccc00562", + "type": "transcription", + "model": "us.anthropic.claude-sonnet-4-6", + "language": ["en"], + "content": "Farmer Brothers\nCOFFEE\n\nFARMER BROS. CO.\nRESTAURANT SERVICE\n\nBOB DOUGHERTY 2142 JERROLD\nBRANCH SUPERVISOR SAN FRANCISCO, CALIF.\n PHONE 282-2100" + } + ], + "indexed_at": "2026-05-01T18:25:58.002003", + "alt_text": null, + "digests": { + "sha1": "5c624f9fb195ec0c684f2d5c9dc16ae715f36247", + "sha256": "70cbbcde0d28f9de8b3102bb3b231f5a6c48424b12572069268b7fb77087166f" + }, + "image_caption": null, + "api_link": "https://dcapi.rdc-staging.library.northwestern.edu/api/v2/file-sets/5de1d0a1-adb8-4412-90a4-eb48f82383c7", + "api_model": "FileSet", + "create_date": "2021-03-16T05:14:00.547263Z", + "modified_date": "2021-11-18T19:16:53.872768Z", + "download_url": "https://dcapi.rdc-staging.library.northwestern.edu/api/v2/file-sets/5de1d0a1-adb8-4412-90a4-eb48f82383c7/download", + "representative_image_url": "https://iiif.stack.rdc-staging.library.northwestern.edu/iiif/3/5de1d0a1-adb8-4412-90a4-eb48f82383c7", + "work_title": "Farmer Brothers Coffee" + }, + "fields": { + "work_id": ["8373e219-0afc-4507-80e3-ae557c29e5a5"] + }, + "inner_hits": { + "matching_filesets": { + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 4.4889727, + "hits": [ + { + "_index": "mbk-dev-dc-v2-file-set-1778017290401", + "_id": "5de1d0a1-adb8-4412-90a4-eb48f82383c7", + "_score": 4.4889727, + "_source": { + "id": "5de1d0a1-adb8-4412-90a4-eb48f82383c7", + "label": "BFMF_B51_F04_007_p001.tif", + "description": null, + "streaming_url": null, + "mime_type": "image/tiff", + "accession_number": "BFMF_B51_F04_007_donut_01", + "role": "Access", + "rank": 0, + "published": true, + "visibility": "Public", + "work_id": "8373e219-0afc-4507-80e3-ae557c29e5a5", + "extracted_metadata": { + "exif": { + "tool": "exifr", + "tool_version": "6.1.1", + "value": { + "bitsPerSample": "8, 8, 8", + "compression": "Uncompressed", + "imageHeight": 970, + "imageWidth": 1540, + "make": "Phase One", + "model": "IQ180", + "orientation": "Horizontal (normal)", + "photometricInterpretation": "RGB", + "planarConfiguration": "Chunky", + "resolutionUnit": "inches", + "samplesPerPixel": 3, + "software": "Capture One 11 Macintosh", + "xResolution": 400, + "yResolution": 400 + } + } + }, + "group_with": null, + "poster_offset": null, + "annotations": [ + { + "id": "9a0a0056-e859-4960-8028-0fefccc00562", + "type": "transcription", + "model": "us.anthropic.claude-sonnet-4-6", + "language": ["en"], + "content": "Farmer Brothers\nCOFFEE\n\nFARMER BROS. CO.\nRESTAURANT SERVICE\n\nBOB DOUGHERTY 2142 JERROLD\nBRANCH SUPERVISOR SAN FRANCISCO, CALIF.\n PHONE 282-2100" + } + ], + "indexed_at": "2026-05-01T18:25:58.002003", + "alt_text": null, + "digests": { + "sha1": "5c624f9fb195ec0c684f2d5c9dc16ae715f36247", + "sha256": "70cbbcde0d28f9de8b3102bb3b231f5a6c48424b12572069268b7fb77087166f" + }, + "image_caption": null, + "api_link": "https://dcapi.rdc-staging.library.northwestern.edu/api/v2/file-sets/5de1d0a1-adb8-4412-90a4-eb48f82383c7", + "api_model": "FileSet", + "create_date": "2021-03-16T05:14:00.547263Z", + "modified_date": "2021-11-18T19:16:53.872768Z", + "download_url": "https://dcapi.rdc-staging.library.northwestern.edu/api/v2/file-sets/5de1d0a1-adb8-4412-90a4-eb48f82383c7/download", + "representative_image_url": "https://iiif.stack.rdc-staging.library.northwestern.edu/iiif/3/5de1d0a1-adb8-4412-90a4-eb48f82383c7", + "work_title": "Farmer Brothers Coffee" + } + }, + { + "_index": "mbk-dev-dc-v2-file-set-1778017290401", + "_id": "3fdcb82b-dc4d-492d-9159-b2a45a3aac6c", + "_score": 4.2681394, + "_source": { + "id": "3fdcb82b-dc4d-492d-9159-b2a45a3aac6c", + "label": "BFMF_B51_F04_007_p008.tif", + "description": null, + "streaming_url": null, + "mime_type": "image/tiff", + "accession_number": "BFMF_B51_F04_007_donut_08", + "role": "Access", + "rank": 2130706432, + "published": true, + "visibility": "Public", + "work_id": "8373e219-0afc-4507-80e3-ae557c29e5a5", + "extracted_metadata": { + "exif": { + "tool": "exifr", + "tool_version": "6.1.1", + "value": { + "bitsPerSample": "8, 8, 8", + "compression": "Uncompressed", + "imageHeight": 4311, + "imageWidth": 3427, + "make": "Phase One", + "model": "IQ180", + "orientation": "Horizontal (normal)", + "photometricInterpretation": "RGB", + "planarConfiguration": "Chunky", + "resolutionUnit": "inches", + "samplesPerPixel": 3, + "software": "Capture One 11 Macintosh", + "xResolution": 400, + "yResolution": 400 + } + } + }, + "group_with": null, + "poster_offset": null, + "annotations": [ + { + "id": "ac5fd9e7-af4f-4a4e-9de0-7744d11307d6", + "type": "transcription", + "model": "us.anthropic.claude-sonnet-4-6", + "language": ["en"], + "content": "WASHINGTON\nSeattle\nTacoma Spokane\nYakima\nPortland\nEugene\nOREGON\nMedford\nKlamath Falls\n\nEureka\n\nChico\nCALIFORNIA\nSanta Rosa Sacramento\nSan Francisco Oakland\nSan Jose Stockton\nWatsonville Bishop\nFresno\nSan Luis Obispo\nBakersfield\nLancaster\nVentura Riverside\nPalm Springs\nSan Fernando Valley\nHollywood\nSanta Monica\nLos Angeles San\nLong Beach Diego\nSan Gabriel\nTorrance\n\nGreat Falls\nMissoula MONTANA\nButte\nBillings\n\nIDAHO\nBoise\n\nElko\n\nNEVADA\nReno\n\nSalt Lake City\nUTAH\n\nLas Vegas\n\nFlagstaff\nARIZONA\nPhoenix\n\nTucson\n\nWYOMING\nCasper\n\nCOLORADO\nDenver\n\nAlbuquerque\nNEW MEXICO\n\nRoswell\n\nEl Paso\n\nEl Centro Santa Ana\n\nNORTH DAKOTA\nBismarck\n\nSOUTH DAKOTA\n\nNEBRASKA\n\nTulsa\nOklahoma City\nOKLAHOMA\nAmarillo\nWichita Falls\nLubbock Fort Worth Dallas\nTEXAS\nOdessa\n\nAustin\nHouston\nSan Antonio\nCorpus\nChristi\n\nFarmer Brothers\nCOFFEE\n\nFarmer Bros. Co. was founded during the year 1912 and from its early inception has grown and expanded with an ever increasing demand for fine quality coffee, allied products and service. From strategically established branches the Company sells in the eighteen western states. An enthusiastic sales force is eager to extend good service, and provide customers with coffee \"Consistently Good.\"\n\nour pledge\n\nFarmer Brothers\nCOFFEE\n\nIn appreciation to the thousands of loyal customers that we have been privileged to serve, \"we pledge the brand name of Farmer Bros. Co. will always be synonymous with quality and our reputation for good business ethics shall always prevail.\"\n\nBFME_BSL_F04_007" + } + ], + "indexed_at": "2026-05-01T18:19:21.422546", + "alt_text": null, + "digests": { + "sha1": "6330add117040ec51aa87c86d4021f4ad4928509", + "sha256": "4e266fac3db3451692ff0aab92b85c1e2c2368dc0bc4ebbbd92a26f325f7dc36" + }, + "image_caption": null, + "api_link": "https://dcapi.rdc-staging.library.northwestern.edu/api/v2/file-sets/3fdcb82b-dc4d-492d-9159-b2a45a3aac6c", + "api_model": "FileSet", + "create_date": "2021-03-16T05:14:00.887189Z", + "modified_date": "2021-11-18T19:16:05.520021Z", + "download_url": "https://dcapi.rdc-staging.library.northwestern.edu/api/v2/file-sets/3fdcb82b-dc4d-492d-9159-b2a45a3aac6c/download", + "representative_image_url": "https://iiif.stack.rdc-staging.library.northwestern.edu/iiif/3/3fdcb82b-dc4d-492d-9159-b2a45a3aac6c", + "work_title": "Farmer Brothers Coffee" + } + }, + { + "_index": "mbk-dev-dc-v2-file-set-1778017290401", + "_id": "529f1276-d4a0-4540-846e-05635d0c4443", + "_score": 4.0512323, + "_source": { + "id": "529f1276-d4a0-4540-846e-05635d0c4443", + "label": "BFMF_B51_F04_007_p003.tif", + "description": null, + "streaming_url": null, + "mime_type": "image/tiff", + "accession_number": "BFMF_B51_F04_007_donut_03", + "role": "Access", + "rank": 1610612736, + "published": true, + "visibility": "Public", + "work_id": "8373e219-0afc-4507-80e3-ae557c29e5a5", + "extracted_metadata": { + "exif": { + "tool": "exifr", + "tool_version": "6.1.1", + "value": { + "bitsPerSample": "8, 8, 8", + "compression": "Uncompressed", + "imageHeight": 2628, + "imageWidth": 1846, + "make": "Phase One", + "model": "IQ180", + "orientation": "Horizontal (normal)", + "photometricInterpretation": "RGB", + "planarConfiguration": "Chunky", + "resolutionUnit": "inches", + "samplesPerPixel": 3, + "software": "Capture One 11 Macintosh", + "xResolution": 400, + "yResolution": 400 + } + } + }, + "group_with": null, + "poster_offset": null, + "annotations": [ + { + "id": "e7a7ff82-c41c-4e15-970b-0bf1fd4ab71e", + "type": "transcription", + "model": "us.anthropic.claude-sonnet-4-6", + "language": ["en"], + "content": "7/6\n\nFarmers Bros.\n\n10 LBS — $8.80\n1000 Filters - 3.40\n\nCoffee is Farmers" + } + ], + "indexed_at": "2026-05-01T18:07:04.846275", + "alt_text": null, + "digests": { + "sha1": "0d9095c24ba269dfced07f4229d8285505e16c96", + "sha256": "39c1b34dfc5a53be237a02136fd62412955607f65d8f65159588f41a7dcd646b" + }, + "image_caption": null, + "api_link": "https://dcapi.rdc-staging.library.northwestern.edu/api/v2/file-sets/529f1276-d4a0-4540-846e-05635d0c4443", + "api_model": "FileSet", + "create_date": "2021-03-16T05:14:00.650676Z", + "modified_date": "2021-11-18T19:16:18.952281Z", + "download_url": "https://dcapi.rdc-staging.library.northwestern.edu/api/v2/file-sets/529f1276-d4a0-4540-846e-05635d0c4443/download", + "representative_image_url": "https://iiif.stack.rdc-staging.library.northwestern.edu/iiif/3/529f1276-d4a0-4540-846e-05635d0c4443", + "work_title": "Farmer Brothers Coffee" + } + }, + { + "_index": "mbk-dev-dc-v2-file-set-1778017290401", + "_id": "81a1ee14-0f93-4885-aa12-571d674801cd", + "_score": 2.3322124, + "_source": { + "id": "81a1ee14-0f93-4885-aa12-571d674801cd", + "label": "BFMF_B51_F04_007_p007.tif", + "description": null, + "streaming_url": null, + "mime_type": "image/tiff", + "accession_number": "BFMF_B51_F04_007_donut_07", + "role": "Access", + "rank": 2113929216, + "published": true, + "visibility": "Public", + "work_id": "8373e219-0afc-4507-80e3-ae557c29e5a5", + "extracted_metadata": { + "exif": { + "tool": "exifr", + "tool_version": "6.1.1", + "value": { + "bitsPerSample": "8, 8, 8", + "compression": "Uncompressed", + "imageHeight": 4328, + "imageWidth": 3378, + "make": "Phase One", + "model": "IQ180", + "orientation": "Horizontal (normal)", + "photometricInterpretation": "RGB", + "planarConfiguration": "Chunky", + "resolutionUnit": "inches", + "samplesPerPixel": 3, + "software": "Adobe Photoshop CS6 (Windows)", + "subfileType": "Full-resolution image", + "xResolution": 400, + "yResolution": 400 + } + } + }, + "group_with": null, + "poster_offset": null, + "annotations": [ + { + "id": "2f77d5e3-4ad0-4d0b-8947-51086fcb2961", + "type": "transcription", + "model": "us.anthropic.claude-sonnet-4-6", + "language": ["en"], + "content": "SPICE\n\n No.1 No.5 25 Lb. 100 Lb. Misc.\nAllspice, Ground X X\nAllspice, Whole X X\nBar B. Q. Spice, Plain X X\nBasil, Sweet, Whole X X\nBay Leaves, Whole X X\nCaraway Seed, Whole X X\nCelery Salt X X X\nCelery Seed, Whole X X\nChili, Jap, Crushed X X\nChili, Jap, Whole X X\nChili Pequines, Whole X X\nChili Pepper, Albuquerque X X X\nChili Pepper X X\nChili Pods, Calif., Whole X\nChili Powder, Gateway X X X\nChili Powder, Regular X X X\nCinnamon, Ground X X X\nCinnamon, Whole X X\nClove, Ground X X\nClove, Whole X X\nCream of Tartar X X\nCuminos Seed, Ground X X X\nCuminos Seed, Whole X X\nCurry Powder X X\nFennel Seed, Whole X X\nGarlic, Granulated X X 30 Lb.\nGarlic, Pure X X 30 Lb.\nGarlic Salt X X X\nGinger, Ground X X\nItalian Seasoning, Whole X X\nM.S.G. X X X X\nMace, Ground X X\nMeringue Powder X X X\nMustard, Regular X X X\nMustard, Hot X X X\nNutmeg, Ground X X\nOnion, Chopped 2½ & 15 Lbs.\nOnion, Granulated X X\nOnion, Pure X X\nOnion Salt X X X\nOregano, Ground X X\nOregano, Whole, Greek X X\nOregano, Whole, Mexican X X\nPaprika X X X\nPaprika, Hungarian X X X\nPaprika, Spanish X X X\nParsley Flakes X 15 Lb.\nPepper, Black, Coarse X X X X\nPepper, Black, Cracked X X X\nPepper, Black, Ground X X X X\nPepper, Steel Cut X X X\nPepper, Black, Whole X X X\nPepper, Cayenne X X\nPepper, White, Ground X X X\nPickling Spice, Whole X X X\nPoppy Seed, Whole X X\nPoultry Seasoning X X\nPumpkin Pie Spice X X\nRosemary Leaves X X\nSage, Dalmation, Rubbed X X\nSage, Dalmation, Ground X X\nSeasonall X X X\nSeasoning Salt X X X\n\n\nSPICE, Continued\n\n No.1 No.5 25 Lb. 100 Lb Misc.\nSesame Seed, Whole X X\nStaWhite X X X\nTarragon, Whole X X\nThyme, Ground X X\nThyme, Whole X X\nTaco Mixes X X\n\n\nPAPER PRODUCTS\n\n Qty. Size\nButterchips,\nSquare Per M\nStyro Cups 25 6 Oz. 8 Oz.\nStyro Lids 100 6 Oz. 8 Oz.\nNapkins 500\nSandwich Tissue Per M 12 x 12\nSouffle Cup 250 2½ Oz. 1 Oz. ¾ Oz. ½ Oz.\nWaiter Checks Ea. 65\nWaiter Checks Ea. 66\nWaiter Checks Ea. 67\nCoffee Coasters 2 M\nWax Paper\nDividers, Dry 1 M 6 x 6\nDividers, Wet 1 M 6 x 6\nWraps, Dry 1 M 9x12\nWraps, Wet 1 M 12x12\nSandwich Bags 500's\n\n\nGLASSWARE\n\n Size\nLIBBY\nJuice 5 Oz.\nMilk 8 Oz.\nWater 9½ Oz.\nCollins 10 Oz.\nIce Tea 12 Oz.\n\n\nCANDLES\n\n Pink Red\nDecorlites\nTear Drops Amber Pink Red\nSparkle-Lites Amber Blue Red\nPink Lady Candles\nWarmer Candles\nWarmer Glasses\nLighting Tapers\n\n\nMISCELLANEOUS CAFE SUPPLIES\n\nHottles\nHottle Caps, Pap.\nGrill Bricks\nGrill Masters\nGrill Screens\nSponges, S/Metal\n\n\nSILVERWARE\n\n Parade Plain\nIce Tea Spoons X X\nTeaspoons X X\nDessert Spoons X X\nBouillon Spoons X\nDessert Forks X X\nSalad Forks X\nOyster Forks X\nDinner Knives X\nSerrated Knives X X" + } + ], + "indexed_at": "2026-05-01T18:40:39.008014", + "alt_text": null, + "digests": { + "sha1": "f9a88664ebf474ce640faf27936e5f9a56ff05fc", + "sha256": "ac044fa38d7c4d794a96106a98adfc077bcae0987df32c101b2ebafbd14f25dd" + }, + "image_caption": null, + "api_link": "https://dcapi.rdc-staging.library.northwestern.edu/api/v2/file-sets/81a1ee14-0f93-4885-aa12-571d674801cd", + "api_model": "FileSet", + "create_date": "2021-03-16T05:14:00.841729Z", + "modified_date": "2021-11-18T19:18:31.126118Z", + "download_url": "https://dcapi.rdc-staging.library.northwestern.edu/api/v2/file-sets/81a1ee14-0f93-4885-aa12-571d674801cd/download", + "representative_image_url": "https://iiif.stack.rdc-staging.library.northwestern.edu/iiif/3/81a1ee14-0f93-4885-aa12-571d674801cd", + "work_title": "Farmer Brothers Coffee" + } + } + ] + } + } + } + } + ] + }, + "aggregations": { + "visibility": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "Public", + "doc_count": 7 + } + ] + }, + "__pagination": { + "value": 4 + } + } +} diff --git a/api/test/unit/api/pagination.test.js b/api/test/unit/api/pagination.test.js index 4d51d6cc..aebbeeb3 100644 --- a/api/test/unit/api/pagination.test.js +++ b/api/test/unit/api/pagination.test.js @@ -14,6 +14,10 @@ describe("Paginator", function () { sort: [{ create_date: "asc" }], _source: ["id", "title", "collection"], aggs: { collection: { terms: { field: "contributor.label", size: 10 } } }, + collapse: { + field: "collection.id", + inner_hits: { name: "top_collection_hits", size: 1 }, + }, }; let pager; @@ -64,10 +68,17 @@ describe("Paginator", function () { const rehydrated = await decodeSearchToken(token); expect(rehydrated.models).to.include.members(["works"]); - for (const field of ["query", "size", "sort", "_source"]) { + for (const field of [ + "aggs", + "collapse", + "query", + "size", + "sort", + "_source", + ]) { expect(rehydrated.body[field]).to.deep.equal(requestBody[field]); } - expect(rehydrated.body).not.to.include.keys(["aggs", "from"]); + expect(rehydrated.body).not.to.include.any.keys("from"); }); it("correctly sets the default size", async () => { diff --git a/api/test/unit/api/response/transformer.test.js b/api/test/unit/api/response/transformer.test.js new file mode 100644 index 00000000..cf400e5d --- /dev/null +++ b/api/test/unit/api/response/transformer.test.js @@ -0,0 +1,105 @@ +"use strict"; + +const chai = require("chai"); +const expect = chai.expect; + +const transformer = requireSource("api/response/transformer"); +const { Paginator } = requireSource("api/pagination"); + +describe("Response transformer", () => { + helpers.saveEnvironment(); + + describe("collapse responses", () => { + let requestBody; + let response; + + beforeEach(() => { + response = { + statusCode: 200, + body: helpers.testFixture("mocks/collapse-search.json"), + }; + + requestBody = { + query: { + bool: { + must: [ + { term: { "annotations.type": "transcription" } }, + { match_phrase: { "annotations.content": "coffee" } }, + ], + }, + }, + size: 1, + collapse: { + field: "work_id", + inner_hits: { + name: "matching_filesets", + size: 50, + sort: [{ _score: "desc" }], + }, + }, + }; + }); + + it("transforms a `collapse` response to opensearch format", async () => { + const pager = new Paginator( + "http://dcapi.library.northwestern.edu/v2/", + "search", + ["file-sets"], + requestBody + ); + + const result = await transformer.transformSearchResult(response, pager); + expect(result.statusCode).to.eq(200); + + const body = JSON.parse(result.body); + expect(body.data).to.be.an("array"); + expect(body.data.length).to.eq(4); + expect(body.info).to.include.key("version"); + expect(body).to.include.key("pagination"); + expect(body.pagination.collapsed_by).to.eq("work_id"); + expect(body.pagination.total_hits).to.eq(7); + expect(body.pagination.total_pages).to.eq(4); + }); + + it("transforms a `collapse` response to iiif format", async () => { + const pager = new Paginator( + "http://dcapi.library.northwestern.edu/v2/", + "search", + ["file-sets"], + requestBody, + "iiif", + { + queryStringParameters: { + collectionLabel: "Test Collection", + collectionSummary: "Test Summary", + }, + } + ); + + const result = await transformer.transformSearchResult(response, pager); + expect(result.statusCode).to.eq(200); + + const body = JSON.parse(result.body); + + expect(body.items).to.be.an("array"); + expect(body.items.length).to.eq(5); + for (var i = 0; i <= 3; i++) { + const item = body.items[i]; + expect(item).to.include.keys( + "homepage", + "label", + "summary", + "thumbnail", + "type" + ); + expect(item.type).to.eq("Manifest"); + } + + const item = body.items[4]; + expect(item).to.include.key("id"); + expect(item).to.include.key("type"); + expect(item.type).to.eq("Collection"); + expect(item.label?.none?.[0]).to.eq("Next page"); + }); + }); +}); diff --git a/docs/docs/spec/data-types.yaml b/docs/docs/spec/data-types.yaml index 58cedcb9..32ef5a80 100644 --- a/docs/docs/spec/data-types.yaml +++ b/docs/docs/spec/data-types.yaml @@ -30,7 +30,7 @@ components: type: string format: uri nullable: true - desciption: IIIF collection url + desciption: IIIF collection url indexed_at: type: string format: date-time @@ -338,7 +338,14 @@ components: - Venue/Event Date PaginationInfo: type: object - description: Pagination info for the current response. + description: | + Pagination info for the current response. NOTE: If results are + collapsed, the length of the `data` array may be longer than the + requested `limit`. The `total_hits` value represents the number of + collapsed groups, not the total number of hits across all groups. + Collapsing will also have an effect on the `offset` value, which + represents the starting index of the first collapsed group on the + current page. properties: next_url: type: string @@ -351,8 +358,10 @@ components: description: Base URL to repeat this query for a given page search_token: type: string - required: false description: Tokenized query to use in subsequent GET requests + collapsed_by: + type: string + description: The field by which results are collapsed, if applicable. current_page: type: integer description: Index of current page of results @@ -368,6 +377,12 @@ components: total_pages: type: integer description: Total number of result pages + required: + - current_page + - limit + - offset + - total_hits + - total_pages PreservationLevel: nullable: true description: The preservation workflow applied to the resource @@ -467,7 +482,6 @@ components: type: string accession_number: type: string - required: true description: Accession number for the work. Serves as basis for the filename. alternate_title: type: array @@ -495,12 +509,14 @@ components: nullable: true box_name: type: array - description: Physical box name. Sometimes used with Distinctive Collections materials. + description: Physical box name. Sometimes used with Distinctive Collections + materials. items: type: string box_number: type: array - description: Physical box number. Sometimes used with Distinctive Collections materials. + description: Physical box number. Sometimes used with Distinctive Collections + materials. items: type: string caption: @@ -557,7 +573,8 @@ components: type: string date_created: type: array - description: A point or a period of time associatied with an event in the lifecycle of the resource. + description: A point or a period of time associatied with an event in the + lifecycle of the resource. items: type: string description: @@ -567,12 +584,14 @@ components: type: string embedding: type: array - description: Vector representation of the resource's location in the repository's semantic space. + description: Vector representation of the resource's location in the + repository's semantic space. items: type: number embedding_model: type: string - description: The name of the inference model used to generate the `embedding` from the resource's content. + description: The name of the inference model used to generate the `embedding` + from the resource's content. embedding_text_length: type: string description: The length of the embedding text in bytes. @@ -599,7 +618,6 @@ components: id: type: string format: uuid - required: true description: UUID for the work record in the repository. identifier: type: array @@ -610,12 +628,12 @@ components: type: string format: uri nullable: true - desciption: IIIF url manifest for the work + description: IIIF url manifest for the work indexed_at: type: string format: date-time nullable: true - desciption: Date/time of last index + description: Date/time of last index ingest_project: type: object nullable: true @@ -729,7 +747,9 @@ components: - task_number provenance: type: array - description: Location of Physical Object // will also include messy dates. Information about the provenance, such as origin, ownership and custodial history (chain of custody), of a resource. + description: Location of Physical Object // will also include messy dates. + Information about the provenance, such as origin, ownership and + custodial history (chain of custody), of a resource. items: type: string published: @@ -771,34 +791,42 @@ components: type: string series: type: array - description: Sometimes used with Distincitive Collections materials. Used for archival series and subseries information. + description: Sometimes used with Distincitive Collections materials. Used for + archival series and subseries information. items: type: string source: type: array - description: A related resource from which the described resource is derived. Source of digital object - book, journal, etc. Follow Chicago Manual of Style for citation. + description: A related resource from which the described resource is derived. + Source of digital object - book, journal, etc. Follow Chicago Manual + of Style for citation. items: type: string status: $ref: "#/components/schemas/Status" style_period: type: array - description: A defined style, historical period, group, school, dynasty, movement, etc. whose characteristics are represented in the work. + description: A defined style, historical period, group, school, dynasty, + movement, etc. whose characteristics are represented in the work. items: $ref: "#/components/schemas/ControlledTerm" subject: type: array - description: A defined style, historical period, group, school, dynasty, movement, etc. whose characteristics are represented in the work. + description: A defined style, historical period, group, school, dynasty, + movement, etc. whose characteristics are represented in the work. items: $ref: "#/components/schemas/ControlledTermWithRole" table_of_contents: type: array - description: Used to provide the titles of separate works or parts of a resource. Information provided may also contain statements of responsibility or other sequential designations. + description: Used to provide the titles of separate works or parts of a + resource. Information provided may also contain statements of + responsibility or other sequential designations. items: type: string technique: type: array - description: A defined style, historical period, group, school, dynasty, movement, etc. whose characteristics are represented in the work. + description: A defined style, historical period, group, school, dynasty, + movement, etc. whose characteristics are represented in the work. items: $ref: "#/components/schemas/ControlledTerm" terms_of_use: From 14ab761cf5993660ae26935722572a36e29e3e41 Mon Sep 17 00:00:00 2001 From: "Michael B. Klein" Date: Mon, 11 May 2026 17:10:46 +0000 Subject: [PATCH 07/11] Include total unique collapsed field values in collapse response --- api/src/api/pagination.js | 5 +++- docs/docs/spec/data-types.yaml | 44 ++++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/api/src/api/pagination.js b/api/src/api/pagination.js index 694bbb86..7e8338f4 100644 --- a/api/src/api/pagination.js +++ b/api/src/api/pagination.js @@ -114,7 +114,10 @@ class Paginator { format: this.format, }; if (this.body.collapse) { - result.collapsed_by = this.body.collapse.field; + result.collapsed_by = { + field: this.body.collapse.field, + total_hits: aggregatedCount, + }; } if (opts.includeOptions) { result.options = this.options; diff --git a/docs/docs/spec/data-types.yaml b/docs/docs/spec/data-types.yaml index 32ef5a80..df4afddc 100644 --- a/docs/docs/spec/data-types.yaml +++ b/docs/docs/spec/data-types.yaml @@ -360,8 +360,15 @@ components: type: string description: Tokenized query to use in subsequent GET requests collapsed_by: - type: string - description: The field by which results are collapsed, if applicable. + type: object + description: Result grouping info, if applicable. + properties: + field: + type: string + description: The name of the field by which results are grouped. + total_hits: + type: integer + description: The total number of unique collapsed field groups. current_page: type: integer description: Index of current page of results @@ -509,13 +516,15 @@ components: nullable: true box_name: type: array - description: Physical box name. Sometimes used with Distinctive Collections + description: + Physical box name. Sometimes used with Distinctive Collections materials. items: type: string box_number: type: array - description: Physical box number. Sometimes used with Distinctive Collections + description: + Physical box number. Sometimes used with Distinctive Collections materials. items: type: string @@ -573,7 +582,8 @@ components: type: string date_created: type: array - description: A point or a period of time associatied with an event in the + description: + A point or a period of time associatied with an event in the lifecycle of the resource. items: type: string @@ -590,7 +600,8 @@ components: type: number embedding_model: type: string - description: The name of the inference model used to generate the `embedding` + description: + The name of the inference model used to generate the `embedding` from the resource's content. embedding_text_length: type: string @@ -747,7 +758,8 @@ components: - task_number provenance: type: array - description: Location of Physical Object // will also include messy dates. + description: + Location of Physical Object // will also include messy dates. Information about the provenance, such as origin, ownership and custodial history (chain of custody), of a resource. items: @@ -791,13 +803,15 @@ components: type: string series: type: array - description: Sometimes used with Distincitive Collections materials. Used for + description: + Sometimes used with Distincitive Collections materials. Used for archival series and subseries information. items: type: string source: type: array - description: A related resource from which the described resource is derived. + description: + A related resource from which the described resource is derived. Source of digital object - book, journal, etc. Follow Chicago Manual of Style for citation. items: @@ -806,26 +820,30 @@ components: $ref: "#/components/schemas/Status" style_period: type: array - description: A defined style, historical period, group, school, dynasty, + description: + A defined style, historical period, group, school, dynasty, movement, etc. whose characteristics are represented in the work. items: $ref: "#/components/schemas/ControlledTerm" subject: type: array - description: A defined style, historical period, group, school, dynasty, + description: + A defined style, historical period, group, school, dynasty, movement, etc. whose characteristics are represented in the work. items: $ref: "#/components/schemas/ControlledTermWithRole" table_of_contents: type: array - description: Used to provide the titles of separate works or parts of a + description: + Used to provide the titles of separate works or parts of a resource. Information provided may also contain statements of responsibility or other sequential designations. items: type: string technique: type: array - description: A defined style, historical period, group, school, dynasty, + description: + A defined style, historical period, group, school, dynasty, movement, etc. whose characteristics are represented in the work. items: $ref: "#/components/schemas/ControlledTerm" From 241befaeb99364473efaf7e44516cf96c93d554f Mon Sep 17 00:00:00 2001 From: Brendan Quinn Date: Wed, 13 May 2026 14:20:05 -0500 Subject: [PATCH 08/11] Add ?as=iiif parameter to file set routes --- api/src/api/response/iiif/canvas.js | 190 +++++++++++++++ api/src/handlers/get-file-set-by-id.js | 7 + .../mocks/fileset-image-canvas-1234.json | 24 ++ api/test/integration/get-doc.test.js | 35 +++ api/test/test-helpers/index.js | 2 + .../unit/api/response/iiif/canvas.test.js | 216 ++++++++++++++++++ .../unit/api/response/transformer.test.js | 5 +- docs/docs/spec/openapi.yaml | 1 + docs/docs/spec/types.yaml | 4 + 9 files changed, 483 insertions(+), 1 deletion(-) create mode 100644 api/src/api/response/iiif/canvas.js create mode 100644 api/test/fixtures/mocks/fileset-image-canvas-1234.json create mode 100644 api/test/unit/api/response/iiif/canvas.test.js diff --git a/api/src/api/response/iiif/canvas.js b/api/src/api/response/iiif/canvas.js new file mode 100644 index 00000000..7efe3be4 --- /dev/null +++ b/api/src/api/response/iiif/canvas.js @@ -0,0 +1,190 @@ +const { dcApiEndpoint } = require("../../../environment"); +const { getWork } = require("../../opensearch"); +const { transformError } = require("../error"); +const { + addThumbnailToCanvas, + buildImageResourceId, + buildImageService, +} = require("./presentation-api/items"); +const { + buildPlaceholderCanvas, +} = require("./presentation-api/placeholder-canvas"); + +async function transform(response, options = {}) { + if (response.statusCode !== 200) return transformError(response); + + const openSearchResponse = JSON.parse(response.body); + const fileSet = openSearchResponse._source; + const canvasId = `${dcApiEndpoint()}/file-sets/${fileSet.id}?as=iiif`; + const { width, height } = dimensions(fileSet); + + const canvas = { + "@context": "http://iiif.io/api/presentation/3/context.json", + id: canvasId, + type: "Canvas", + width, + height, + label: { none: [label(fileSet)] }, + items: [annotationPage(canvasId, fileSet, { width, height })], + }; + + if (fileSet.description) { + canvas.summary = { none: [fileSet.description] }; + } + + if (fileSet.representative_image_url) { + const thumbnailBuilder = { + thumbnails: [], + addThumbnail(thumbnail) { + this.thumbnails.push(thumbnail); + }, + }; + addThumbnailToCanvas(thumbnailBuilder, fileSet); + canvas.thumbnail = thumbnailBuilder.thumbnails; + } + + if ( + isImage(fileSet) && + fileSet.representative_image_url && + fileSetWidth(fileSet) && + fileSetHeight(fileSet) + ) { + canvas.placeholderCanvas = buildPlaceholderCanvas(canvasId, { + ...fileSet, + width, + height, + }); + } + + const partOf = await parentManifest(fileSet, options); + if (partOf) { + canvas.partOf = [partOf]; + } + + return { + statusCode: 200, + headers: { + "content-type": "application/json", + }, + body: JSON.stringify(canvas), + }; +} + +function annotationPage(canvasId, fileSet, dimensions) { + const pageId = `${canvasId}/annotation-page`; + return { + id: pageId, + type: "AnnotationPage", + items: [ + { + id: `${canvasId}/annotation/0`, + type: "Annotation", + motivation: "painting", + target: canvasId, + body: annotationBody(fileSet, dimensions), + }, + ], + }; +} + +function annotationBody(fileSet, { width, height }) { + const body = { + id: bodyId(fileSet), + type: bodyType(fileSet), + format: fileSet.mime_type, + label: { en: [label(fileSet)] }, + }; + + if (["Image", "Video"].includes(body.type)) { + body.width = width; + body.height = height; + } + + if (body.type === "Image" && fileSet.representative_image_url) { + body.service = buildImageService(fileSet.representative_image_url); + } + + if (["Sound", "Video"].includes(body.type) && fileSet.duration) { + body.duration = fileSet.duration; + } + + return body; +} + +function bodyId(fileSet) { + if (isImage(fileSet) && fileSet.representative_image_url) { + return buildImageResourceId(fileSet.representative_image_url, "600,"); + } + return ( + fileSet.streaming_url || + fileSet.download_url || + fileSet.api_link || + `${dcApiEndpoint()}/file-sets/${fileSet.id}` + ); +} + +function bodyType(fileSet) { + const mimeType = fileSet.mime_type || ""; + if (mimeType.startsWith("image/")) return "Image"; + if (mimeType.startsWith("audio/")) return "Sound"; + if (mimeType.startsWith("video/")) return "Video"; + if (mimeType === "application/pdf") return "Text"; + return "Dataset"; +} + +function dimensions(fileSet) { + return { + width: fileSetWidth(fileSet) || 100, + height: fileSetHeight(fileSet) || 100, + }; +} + +function fileSetWidth(fileSet) { + return fileSet.width || fileSet.extracted_metadata?.exif?.value?.imageWidth; +} + +function fileSetHeight(fileSet) { + return fileSet.height || fileSet.extracted_metadata?.exif?.value?.imageHeight; +} + +function isImage(fileSet) { + return fileSet.mime_type?.startsWith("image/"); +} + +function label(fileSet) { + return fileSet.label || fileSet.original_filename || fileSet.id; +} + +async function parentManifest(fileSet, options) { + if (!fileSet.work_id) return null; + + return { + id: `${dcApiEndpoint()}/works/${fileSet.work_id}?as=iiif`, + type: "Manifest", + label: { en: [await workTitle(fileSet, options)] }, + }; +} + +async function workTitle(fileSet, options) { + if (fileSet.work_title) return fileSet.work_title; + + let response; + try { + response = await getWork(fileSet.work_id, { + allowPrivate: options.allowPrivate, + allowUnpublished: options.allowUnpublished, + }); + } catch (_error) { + return fileSet.work_id; + } + if (response.statusCode !== 200) return fileSet.work_id; + + return JSON.parse(response.body)?._source?.title || fileSet.work_id; +} + +module.exports = { + annotationBody, + bodyType, + dimensions, + transform, +}; diff --git a/api/src/handlers/get-file-set-by-id.js b/api/src/handlers/get-file-set-by-id.js index 48d6e3e5..5809fb71 100644 --- a/api/src/handlers/get-file-set-by-id.js +++ b/api/src/handlers/get-file-set-by-id.js @@ -1,5 +1,6 @@ const { wrap } = require("./middleware"); const { getFileSet } = require("../api/opensearch"); +const canvasResponse = require("../api/response/iiif/canvas"); const opensearchResponse = require("../api/response/opensearch"); /** @@ -11,5 +12,11 @@ exports.handler = wrap(async (event) => { event.userToken.isSuperUser() || event.userToken.isReadingRoom(); const allowUnpublished = event.userToken.isSuperUser(); const esResponse = await getFileSet(id, { allowPrivate, allowUnpublished }); + if (event.queryStringParameters?.as === "iiif") { + return await canvasResponse.transform(esResponse, { + allowPrivate, + allowUnpublished, + }); + } return await opensearchResponse.transform(esResponse); }); diff --git a/api/test/fixtures/mocks/fileset-image-canvas-1234.json b/api/test/fixtures/mocks/fileset-image-canvas-1234.json new file mode 100644 index 00000000..6988f4cf --- /dev/null +++ b/api/test/fixtures/mocks/fileset-image-canvas-1234.json @@ -0,0 +1,24 @@ +{ + "_index": "dev-dc-v2-file-set", + "_type": "_doc", + "_id": "1234", + "_version": 1, + "found": true, + "_source": { + "id": "1234", + "api_model": "FileSet", + "api_link": "https://thisisafakeapiurl/file-sets/1234", + "visibility": "Public", + "published": true, + "role": "Access", + "label": "recto", + "description": "Map of British Isles showing political boundaries.", + "mime_type": "image/tiff", + "height": 8582, + "width": 6262, + "representative_image_url": "https://index.test.library.northwestern.edu/iiif/3/1234", + "download_url": "https://thisisafakeapiurl/file-sets/1234/download", + "work_id": "20f1cd93-7851-4646-af07-0b544661569f", + "work_title": "L'Isole Britanniche (1811)" + } +} diff --git a/api/test/integration/get-doc.test.js b/api/test/integration/get-doc.test.js index 8086da7d..76c390e8 100644 --- a/api/test/integration/get-doc.test.js +++ b/api/test/integration/get-doc.test.js @@ -154,6 +154,41 @@ describe("Doc retrieval routes", () => { expect(resultBody.data.id).to.eq("1234"); }); + it("returns a single file-set as a IIIF Canvas", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply( + 200, + helpers.testFixture("mocks/fileset-image-canvas-1234.json") + ); + + const event = helpers + .mockEvent("GET", "/file-sets/{id}") + .pathParams({ id: 1234 }) + .queryParams({ as: "iiif" }) + .render(); + const result = await handler(event); + expect(result.statusCode).to.eq(200); + expect(result).to.have.header( + "content-type", + /application\/json;.*charset=UTF-8/ + ); + + const resultBody = JSON.parse(result.body); + expect(resultBody.type).to.eq("Canvas"); + expect(resultBody["@context"]).to.eq( + "http://iiif.io/api/presentation/3/context.json" + ); + expect(resultBody.id).to.eq( + `${process.env.DC_API_ENDPOINT}/file-sets/1234?as=iiif` + ); + expect(resultBody.partOf[0]).to.deep.eq({ + id: `${process.env.DC_API_ENDPOINT}/works/20f1cd93-7851-4646-af07-0b544661569f?as=iiif`, + type: "Manifest", + label: { en: ["L'Isole Britanniche (1811)"] }, + }); + }); + it("403s a private file-set", async () => { const event = helpers .mockEvent("GET", "/file-sets/{id}") diff --git a/api/test/test-helpers/index.js b/api/test/test-helpers/index.js index b2599810..307a4294 100644 --- a/api/test/test-helpers/index.js +++ b/api/test/test-helpers/index.js @@ -10,6 +10,8 @@ process.env.__SKIP_SECRETS__ = "true"; const TestEnvironment = { API_TOKEN_SECRET: "abc123", API_TOKEN_NAME: "dcapiTEST", + AWS_ACCESS_KEY_ID: "test", + AWS_SECRET_ACCESS_KEY: "test", DC_URL: "https://thisisafakedcurl", DC_API_ENDPOINT: "https://thisisafakeapiurl", DEV_TEAM_NET_IDS: "abc123,def456", diff --git a/api/test/unit/api/response/iiif/canvas.test.js b/api/test/unit/api/response/iiif/canvas.test.js new file mode 100644 index 00000000..1d45d388 --- /dev/null +++ b/api/test/unit/api/response/iiif/canvas.test.js @@ -0,0 +1,216 @@ +"use strict"; + +const chai = require("chai"); +const expect = chai.expect; + +const { dcApiEndpoint } = requireSource("environment"); +const transformer = requireSource("api/response/iiif/canvas"); + +async function setup(fixture = "mocks/fileset-image-canvas-1234.json") { + const response = { + statusCode: 200, + body: helpers.testFixture(fixture), + }; + const source = JSON.parse(response.body)._source; + + const result = await transformer.transform(response); + expect(result.statusCode).to.eq(200); + + return { source, canvas: JSON.parse(result.body) }; +} + +describe("FileSet as IIIF Canvas response transformer", () => { + helpers.saveEnvironment(); + const mock = helpers.mockIndex(); + + it("transforms an image file set response to a Canvas", async () => { + const { source, canvas } = await setup(); + + expect(canvas["@context"]).to.eq( + "http://iiif.io/api/presentation/3/context.json" + ); + expect(canvas.id).to.eq( + `${dcApiEndpoint()}/file-sets/${source.id}?as=iiif` + ); + expect(canvas.type).to.eq("Canvas"); + expect(canvas.label.none[0]).to.eq(source.label); + expect(canvas.summary.none[0]).to.eq(source.description); + expect(canvas.width).to.eq(source.width); + expect(canvas.height).to.eq(source.height); + expect(canvas.thumbnail[0].id).to.eq( + `${source.representative_image_url}/full/!300,300/0/default.jpg` + ); + }); + + it("builds a painting annotation for image file sets", async () => { + const { source, canvas } = await setup(); + const annotationPage = canvas.items[0]; + const annotation = annotationPage.items[0]; + + expect(annotationPage.id).to.eq(`${canvas.id}/annotation-page`); + expect(annotationPage.type).to.eq("AnnotationPage"); + expect(annotation.id).to.eq(`${canvas.id}/annotation/0`); + expect(annotation.type).to.eq("Annotation"); + expect(annotation.motivation).to.eq("painting"); + expect(annotation.target).to.eq(canvas.id); + expect(annotation.body).to.deep.include({ + id: `${source.representative_image_url}/full/600,/0/default.jpg`, + type: "Image", + format: source.mime_type, + width: source.width, + height: source.height, + }); + expect(annotation.body.service[0]).to.deep.eq({ + id: source.representative_image_url, + type: "ImageService2", + profile: "http://iiif.io/api/image/2/level2.json", + }); + }); + + it("adds placeholderCanvas for image file sets with dimensions", async () => { + const { source, canvas } = await setup(); + + expect(canvas.placeholderCanvas.id).to.eq(`${canvas.id}/placeholder`); + expect(canvas.placeholderCanvas.type).to.eq("Canvas"); + expect(canvas.placeholderCanvas.width).to.eq(640); + expect(canvas.placeholderCanvas.height).to.eq(877); + expect(canvas.placeholderCanvas.items[0].items[0].body.id).to.eq( + `${source.representative_image_url}/full/!640,877/0/default.jpg` + ); + }); + + it("adds partOf from indexed work id and work title", async () => { + const { source, canvas } = await setup(); + + expect(canvas.partOf).to.deep.eq([ + { + id: `${dcApiEndpoint()}/works/${source.work_id}?as=iiif`, + type: "Manifest", + label: { en: [source.work_title] }, + }, + ]); + }); + + it("fetches parent work title when work title is not indexed", async () => { + const responseBody = JSON.parse( + helpers.testFixture("mocks/fileset-image-canvas-1234.json") + ); + delete responseBody._source.work_title; + + mock + .get(`/dc-v2-work/_doc/${responseBody._source.work_id}`) + .reply(200, helpers.testFixture("mocks/work-1234.json")); + + const result = await transformer.transform({ + statusCode: 200, + body: JSON.stringify(responseBody), + }); + const canvas = JSON.parse(result.body); + + expect(canvas.partOf[0].label.en[0]).to.eq("Canary Record TEST 1"); + }); + + it("falls back to work id when parent work title cannot be fetched", async () => { + const responseBody = JSON.parse( + helpers.testFixture("mocks/fileset-image-canvas-1234.json") + ); + delete responseBody._source.work_title; + + mock + .get(`/dc-v2-work/_doc/${responseBody._source.work_id}`) + .reply(404, helpers.testFixture("mocks/missing-work-1234.json")); + + const result = await transformer.transform({ + statusCode: 200, + body: JSON.stringify(responseBody), + }); + const canvas = JSON.parse(result.body); + + expect(canvas.partOf[0].label.en[0]).to.eq(responseBody._source.work_id); + }); + + it("maps non-image mime types to IIIF body types", () => { + expect(transformer.bodyType({ mime_type: "audio/mp3" })).to.eq("Sound"); + expect(transformer.bodyType({ mime_type: "video/mp4" })).to.eq("Video"); + expect(transformer.bodyType({ mime_type: "application/pdf" })).to.eq( + "Text" + ); + expect(transformer.bodyType({ mime_type: "application/zip" })).to.eq( + "Dataset" + ); + }); + + it("builds annotation bodies for non-image file sets", () => { + const audio = transformer.annotationBody( + { + id: "audio-123", + label: "Audio", + mime_type: "audio/mp3", + streaming_url: "https://example.com/audio.m3u8", + duration: 12.5, + }, + { width: 100, height: 100 } + ); + const video = transformer.annotationBody( + { + id: "video-123", + label: "Video", + mime_type: "video/mp4", + streaming_url: "https://example.com/video.m3u8", + duration: 25, + }, + { width: 640, height: 480 } + ); + const pdf = transformer.annotationBody( + { + id: "pdf-123", + label: "PDF", + mime_type: "application/pdf", + download_url: "https://example.com/file.pdf", + }, + { width: 100, height: 100 } + ); + const zip = transformer.annotationBody( + { + id: "zip-123", + label: "ZIP", + mime_type: "application/zip", + download_url: "https://example.com/file.zip", + }, + { width: 100, height: 100 } + ); + + expect(audio).to.include({ + id: "https://example.com/audio.m3u8", + type: "Sound", + format: "audio/mp3", + duration: 12.5, + }); + expect(video).to.include({ + id: "https://example.com/video.m3u8", + type: "Video", + format: "video/mp4", + width: 640, + height: 480, + duration: 25, + }); + expect(pdf).to.include({ + id: "https://example.com/file.pdf", + type: "Text", + format: "application/pdf", + }); + expect(zip).to.include({ + id: "https://example.com/file.zip", + type: "Dataset", + format: "application/zip", + }); + }); + + it("passes non-200 responses through error transformation", async () => { + const result = await transformer.transform({ statusCode: 404 }); + const body = JSON.parse(result.body); + + expect(result.statusCode).to.eq(404); + expect(body.error).to.eq("Not Found"); + }); +}); diff --git a/api/test/unit/api/response/transformer.test.js b/api/test/unit/api/response/transformer.test.js index cf400e5d..f0acfddb 100644 --- a/api/test/unit/api/response/transformer.test.js +++ b/api/test/unit/api/response/transformer.test.js @@ -56,7 +56,10 @@ describe("Response transformer", () => { expect(body.data.length).to.eq(4); expect(body.info).to.include.key("version"); expect(body).to.include.key("pagination"); - expect(body.pagination.collapsed_by).to.eq("work_id"); + expect(body.pagination.collapsed_by).to.deep.eq({ + field: "work_id", + total_hits: 4, + }); expect(body.pagination.total_hits).to.eq(7); expect(body.pagination.total_pages).to.eq(4); }); diff --git a/docs/docs/spec/openapi.yaml b/docs/docs/spec/openapi.yaml index 8d47ca22..053417c7 100644 --- a/docs/docs/spec/openapi.yaml +++ b/docs/docs/spec/openapi.yaml @@ -165,6 +165,7 @@ paths: - FileSet parameters: - $ref: "./types.yaml#/components/parameters/id" + - $ref: "./types.yaml#/components/parameters/as" responses: 200: $ref: "./types.yaml#/components/responses/DocumentResponse" diff --git a/docs/docs/spec/types.yaml b/docs/docs/spec/types.yaml index 95edac92..11d6cb1f 100644 --- a/docs/docs/spec/types.yaml +++ b/docs/docs/spec/types.yaml @@ -129,6 +129,7 @@ components: oneOf: - $ref: "#/components/schemas/IndexDocument" - $ref: "#/components/schemas/IiifPresentationManifest" + - $ref: "#/components/schemas/IiifPresentationCanvas" info: type: object AnnotationsResponse: @@ -168,6 +169,9 @@ components: IiifPresentationManifest: type: object description: A [IIIF Presentation v3.x](https://iiif.io/api/presentation/3.0/) Manifest + IiifPresentationCanvas: + type: object + description: A [IIIF Presentation v3.x](https://iiif.io/api/presentation/3.0/) Canvas IndexDocument: description: A single index document oneOf: From 3310416154665460d46dddcdad4b929fa0c545fe Mon Sep 17 00:00:00 2001 From: Brendan Quinn Date: Thu, 14 May 2026 19:25:33 +0000 Subject: [PATCH 09/11] Add IIIF content search for file sets --- README.md | 16 ++ api/src/api/response/iiif/annotations.js | 24 +-- api/src/api/response/iiif/canvas.js | 6 + api/src/api/response/iiif/file-set-search.js | 35 +++++ api/src/api/response/iiif/manifest.js | 145 +++++++++--------- api/src/api/response/iiif/search-helpers.js | 36 +++++ api/src/api/response/iiif/search.js | 66 ++++---- api/src/handlers/get-file-set-search.js | 30 ++++ api/template.yaml | 24 +++ api/test/integration/get-annotations.test.js | 3 + .../integration/get-file-set-search.test.js | 119 ++++++++++++++ api/test/integration/get-work-search.test.js | 12 +- .../unit/api/response/iiif/canvas.test.js | 4 + .../unit/api/response/iiif/manifest.test.js | 6 + 14 files changed, 387 insertions(+), 139 deletions(-) create mode 100644 api/src/api/response/iiif/file-set-search.js create mode 100644 api/src/api/response/iiif/search-helpers.js create mode 100644 api/src/handlers/get-file-set-search.js create mode 100644 api/test/integration/get-file-set-search.test.js diff --git a/README.md b/README.md index ad8c15eb..e03ed6be 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,22 @@ View and edit information about a specific Work in the Index. 3. View JSON response at `https://USER_PREFIX.dev.rdc.library.northwestern.edu:3002/works/[WORK_ID]` 4. View IIIF Manifest JSON response at `https://USER_PREFIX.dev.rdc.library.northwestern.edu:3002/works/[WORK_ID]?as=iiif` +### IIIF content search + +IIIF Presentation responses expose [IIIF Content Search 2.0](https://iiif.io/api/search/2.0/) services for transcription annotations: + +- Work manifests include a `SearchService2` entry for `https://USER_PREFIX.dev.rdc.library.northwestern.edu:3002/works/[WORK_ID]/search?as=iiif` +- File set canvases include a `SearchService2` entry for `https://USER_PREFIX.dev.rdc.library.northwestern.edu:3002/file-sets/[FILE_SET_ID]/search?as=iiif` + +To search transcription text, include a non-empty `q` parameter: + +```shell +curl "https://USER_PREFIX.dev.rdc.library.northwestern.edu:3002/works/[WORK_ID]/search?as=iiif&q=[QUERY]" +curl "https://USER_PREFIX.dev.rdc.library.northwestern.edu:3002/file-sets/[FILE_SET_ID]/search?as=iiif&q=[QUERY]" +``` + +Both endpoints return a IIIF `AnnotationPage` whose `items` target the matching work canvas or file set canvas. Requests without `as=iiif` or a non-empty `q` return `400`. + For help debugging/inspecting, JavaScript `console` messages are written to: `dc-api-v2/dc-api.log` ### DC diff --git a/api/src/api/response/iiif/annotations.js b/api/src/api/response/iiif/annotations.js index d32a4f5a..41837aca 100644 --- a/api/src/api/response/iiif/annotations.js +++ b/api/src/api/response/iiif/annotations.js @@ -1,16 +1,11 @@ const { dcApiEndpoint } = require("../../../environment"); -const { getWorkFileSets } = require("../../opensearch"); -async function transform(response, options = {}) { +async function transform(response) { const body = JSON.parse(response.body); const fileSet = body._source; const annotations = fileSet?.annotations ?? []; - const workId = fileSet.work_id; - const fileSetId = body._id; - const fileSetIndex = await getFileSetIndex(workId, fileSetId, options); - - const canvasId = `${dcApiEndpoint()}/works/${workId}?as=iiif/canvas/${fileSetIndex}`; + const canvasId = `${dcApiEndpoint()}/file-sets/${fileSet.id}?as=iiif`; const annotationPageId = `${dcApiEndpoint()}/file-sets/${ fileSet.id }/annotations?as=iiif`; @@ -51,19 +46,4 @@ async function transform(response, options = {}) { }; } -async function getFileSetIndex(workId, fileSetId, options) { - const fileSetsResponse = await getWorkFileSets(workId, { - allowPrivate: options.allowPrivate, - allowUnpublished: options.allowUnpublished, - role: "Access", - sortBy: "rank", - }); - - const fileSetBody = JSON.parse(fileSetsResponse.body); - const hits = fileSetBody?.hits?.hits || []; - - const index = hits.findIndex((hit) => hit._source.id === fileSetId); - - return index; -} module.exports = { transform }; diff --git a/api/src/api/response/iiif/canvas.js b/api/src/api/response/iiif/canvas.js index 7efe3be4..87771c01 100644 --- a/api/src/api/response/iiif/canvas.js +++ b/api/src/api/response/iiif/canvas.js @@ -26,6 +26,12 @@ async function transform(response, options = {}) { height, label: { none: [label(fileSet)] }, items: [annotationPage(canvasId, fileSet, { width, height })], + service: [ + { + id: `${dcApiEndpoint()}/file-sets/${fileSet.id}/search?as=iiif`, + type: "SearchService2", + }, + ], }; if (fileSet.description) { diff --git a/api/src/api/response/iiif/file-set-search.js b/api/src/api/response/iiif/file-set-search.js new file mode 100644 index 00000000..d15cd007 --- /dev/null +++ b/api/src/api/response/iiif/file-set-search.js @@ -0,0 +1,35 @@ +const { dcApiEndpoint } = require("../../../environment"); +const { + buildSearchAnnotationBody, + transcriptionAnnotationsMatching, +} = require("./search-helpers"); + +async function transform(fileSet, q) { + const canvasId = `${dcApiEndpoint()}/file-sets/${fileSet.id}?as=iiif`; + const searchId = `${dcApiEndpoint()}/file-sets/${ + fileSet.id + }/search?as=iiif&q=${encodeURIComponent(q)}`; + + const items = transcriptionAnnotationsMatching(fileSet.annotations, q).map( + (ann) => ({ + id: `${canvasId}/annotation/${ann.id}`, + type: "Annotation", + motivation: "supplementing", + body: buildSearchAnnotationBody(ann), + target: canvasId, + }) + ); + + return { + statusCode: 200, + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + "@context": "http://iiif.io/api/search/2/context.json", + id: searchId, + type: "AnnotationPage", + items, + }), + }; +} + +module.exports = { transform }; diff --git a/api/src/api/response/iiif/manifest.js b/api/src/api/response/iiif/manifest.js index 688f6060..09c82469 100644 --- a/api/src/api/response/iiif/manifest.js +++ b/api/src/api/response/iiif/manifest.js @@ -45,7 +45,7 @@ async function transform(response, options = {}) { * @param {boolean} isAuxiliary */ function buildCanvasFromFileSet(fileSet, index, isAuxiliary) { - const canvasId = `${manifestId}/canvas/${fileSet.role.toLowerCase()}/${index}`; + const canvasId = fileSetCanvasId(fileSet); manifest.createCanvas(canvasId, (canvas) => { if (isAudioVideo(source.work_type)) canvas.duration = fileSet.duration || 1; @@ -241,81 +241,76 @@ async function transform(response, options = {}) { }); /** Process grouped file sets */ - Object.entries(fileSetGroups).forEach( - ([currentGroupKey, fileSets], index) => { - const canvasId = `${manifestId}/canvas/${index}`; - manifest.createCanvas(canvasId, (canvas) => { - // Find the file set with ID matching the currentGroupKey and make it primary - let matchingFileSetIndex = -1; - for (let i = 0; i < fileSets.length; i++) { - if (fileSets[i].id === currentGroupKey) { - matchingFileSetIndex = i; - break; - } - } - if (matchingFileSetIndex > -1) { - // Remove the matching fileSet and place it at the beginning - const matchingFileSet = fileSets.splice( - matchingFileSetIndex, - 1 - )[0]; - fileSets.unshift(matchingFileSet); - } - const primaryFileSet = fileSets[0]; + Object.entries(fileSetGroups).forEach(([currentGroupKey, fileSets]) => { + // Find the file set with ID matching the currentGroupKey and make it primary + let matchingFileSetIndex = -1; + for (let i = 0; i < fileSets.length; i++) { + if (fileSets[i].id === currentGroupKey) { + matchingFileSetIndex = i; + break; + } + } + if (matchingFileSetIndex > -1) { + // Remove the matching fileSet and place it at the beginning + const matchingFileSet = fileSets.splice(matchingFileSetIndex, 1)[0]; + fileSets.unshift(matchingFileSet); + } + const primaryFileSet = fileSets[0]; + const canvasId = fileSetCanvasId(primaryFileSet); - if (isAudioVideo(source.work_type)) { - canvas.duration = primaryFileSet.duration || 1; - } - canvas.height = primaryFileSet.height || 100; - canvas.width = primaryFileSet.width || 100; - canvas.addLabel(primaryFileSet.label, "none"); - addThumbnailToCanvas(canvas, primaryFileSet); - - /** Build "Choice" annotation if there are alternates */ - const annotationId = `${canvasId}/annotation/0`; - const choiceBody = - fileSets.length > 1 - ? { - type: "Choice", - items: fileSets.map((fileSet) => - buildAnnotationBody(fileSet, source.work_type) - ), - } - : buildAnnotationBody(primaryFileSet, source.work_type); - - canvas.createAnnotation(annotationId, { - id: annotationId, - type: "Annotation", - motivation: "painting", - body: choiceBody, - }); - - /** Add "supplementing" annotation */ - if (primaryFileSet.webvtt) { - addSupplementingAnnotationToCanvas( - canvas, - canvasId, - primaryFileSet - ); - } + manifest.createCanvas(canvasId, (canvas) => { + if (isAudioVideo(source.work_type)) { + canvas.duration = primaryFileSet.duration || 1; + } + canvas.height = primaryFileSet.height || 100; + canvas.width = primaryFileSet.width || 100; + canvas.addLabel(primaryFileSet.label, "none"); + addThumbnailToCanvas(canvas, primaryFileSet); - /** Add transcription annotations */ - const transcriptions = transcriptionMap[primaryFileSet.id]; - if ( - source.work_type === "Image" && - primaryFileSet.role === "Access" && - transcriptions?.length - ) { - canvasAnnotations[canvasId] = { - id: `${dcApiEndpoint()}/file-sets/${ - primaryFileSet.id - }/annotations?as=iiif`, - type: "AnnotationPage", - }; - } + /** Build "Choice" annotation if there are alternates */ + const annotationId = `${canvasId}/annotation/0`; + const choiceBody = + fileSets.length > 1 + ? { + type: "Choice", + items: fileSets.map((fileSet) => + buildAnnotationBody(fileSet, source.work_type) + ), + } + : buildAnnotationBody(primaryFileSet, source.work_type); + + canvas.createAnnotation(annotationId, { + id: annotationId, + type: "Annotation", + motivation: "painting", + body: choiceBody, }); - } - ); + + /** Add "supplementing" annotation */ + if (primaryFileSet.webvtt) { + addSupplementingAnnotationToCanvas( + canvas, + canvasId, + primaryFileSet + ); + } + + /** Add transcription annotations */ + const transcriptions = transcriptionMap[primaryFileSet.id]; + if ( + source.work_type === "Image" && + primaryFileSet.role === "Access" && + transcriptions?.length + ) { + canvasAnnotations[canvasId] = { + id: `${dcApiEndpoint()}/file-sets/${ + primaryFileSet.id + }/annotations?as=iiif`, + type: "AnnotationPage", + }; + } + }); + }); source.file_sets .filter((fileSet) => fileSet.role === "Auxiliary") @@ -394,6 +389,10 @@ async function transform(response, options = {}) { return transformError(response); } +function fileSetCanvasId(fileSet) { + return `${dcApiEndpoint()}/file-sets/${fileSet.id}?as=iiif`; +} + async function fetchFileSetTranscriptions(source, options) { if (source.work_type !== "Image") return {}; if (!openSearchEndpoint()) return {}; diff --git a/api/src/api/response/iiif/search-helpers.js b/api/src/api/response/iiif/search-helpers.js new file mode 100644 index 00000000..dd3badb8 --- /dev/null +++ b/api/src/api/response/iiif/search-helpers.js @@ -0,0 +1,36 @@ +const { + getTranscriptionContent, + normalizeLanguages, +} = require("./presentation-api/items"); + +function annotationMatches(annotation, q) { + return getTranscriptionContent(annotation) + .toLowerCase() + .includes(q.toLowerCase()); +} + +function buildSearchAnnotationBody(annotation) { + const body = { + type: "TextualBody", + value: getTranscriptionContent(annotation), + format: "text/plain", + }; + const languages = normalizeLanguages(annotation.language); + if (languages.length === 1) { + body.language = languages[0]; + } else if (languages.length > 1) { + body.language = languages; + } + return body; +} + +function transcriptionAnnotationsMatching(annotations = [], q) { + return annotations + .filter((annotation) => annotation.type === "transcription") + .filter((annotation) => annotationMatches(annotation, q)); +} + +module.exports = { + buildSearchAnnotationBody, + transcriptionAnnotationsMatching, +}; diff --git a/api/src/api/response/iiif/search.js b/api/src/api/response/iiif/search.js index 66f0908a..68102c55 100644 --- a/api/src/api/response/iiif/search.js +++ b/api/src/api/response/iiif/search.js @@ -1,45 +1,37 @@ const { dcApiEndpoint } = require("../../../environment"); const { getWorkFileSets } = require("../../opensearch"); const { - getTranscriptionContent, - normalizeLanguages, -} = require("./presentation-api/items"); - -function buildSearchAnnotationBody(annotation, content) { - const body = { - type: "TextualBody", - value: content, - format: "text/plain", - }; - const languages = normalizeLanguages(annotation.language); - if (languages.length === 1) { - body.language = languages[0]; - } else if (languages.length > 1) { - body.language = languages; - } - return body; -} + buildSearchAnnotationBody, + transcriptionAnnotationsMatching, +} = require("./search-helpers"); async function transform(workSource, q, opts = {}) { const { allowPrivate = false, allowUnpublished = false } = opts; const workId = workSource.id; - const manifestId = `${dcApiEndpoint()}/works/${workId}?as=iiif`; const searchId = `${dcApiEndpoint()}/works/${workId}/search?as=iiif&q=${encodeURIComponent( q )}`; - // Build canvas index map from the work's file_sets array — same ordering as manifest.js - const groupIndexMap = {}; - let groupIndex = 0; + // Build canvas ID map from the work's file_sets array using the same grouping + // and primary-file-set selection as manifest.js. + const groupFileSetMap = {}; (workSource.file_sets || []) .filter((fs) => fs.role === "Access") .forEach((fs) => { const key = fs.group_with || fs.id; - if (!(key in groupIndexMap)) { - groupIndexMap[key] = groupIndex++; + if (!groupFileSetMap[key]) { + groupFileSetMap[key] = []; } + groupFileSetMap[key].push(fs); }); + const groupCanvasIdMap = Object.fromEntries( + Object.entries(groupFileSetMap).map(([key, groupFileSets]) => { + const primary = + groupFileSets.find((fs) => fs.id === key) || groupFileSets[0]; + return [key, `${dcApiEndpoint()}/file-sets/${primary.id}?as=iiif`]; + }) + ); const response = await getWorkFileSets(workId, { allowPrivate, @@ -64,29 +56,23 @@ async function transform(workSource, q, opts = {}) { const items = []; Object.entries(fileSetGroups).forEach(([groupKey, groupFileSets]) => { - const canvasIndex = groupIndexMap[groupKey]; - if (canvasIndex === undefined) return; - const canvasId = `${manifestId}/canvas/${canvasIndex}`; + const canvasId = groupCanvasIdMap[groupKey]; + if (canvasId === undefined) return; // Primary file set is the one whose id matches the group key (same as manifest.js) const primary = groupFileSets.find((fs) => fs.id === groupKey) || groupFileSets[0]; if (!primary?.annotations) return; - primary.annotations - .filter((ann) => ann.type === "transcription") - .forEach((ann) => { - const content = getTranscriptionContent(ann); - if (!content.toLowerCase().includes(q.toLowerCase())) return; - - items.push({ - id: `${canvasId}/annotation/${ann.id}`, - type: "Annotation", - motivation: "supplementing", - body: buildSearchAnnotationBody(ann, content), - target: canvasId, - }); + transcriptionAnnotationsMatching(primary.annotations, q).forEach((ann) => { + items.push({ + id: `${canvasId}/annotation/${ann.id}`, + type: "Annotation", + motivation: "supplementing", + body: buildSearchAnnotationBody(ann), + target: canvasId, }); + }); }); return { diff --git a/api/src/handlers/get-file-set-search.js b/api/src/handlers/get-file-set-search.js new file mode 100644 index 00000000..69a84cc5 --- /dev/null +++ b/api/src/handlers/get-file-set-search.js @@ -0,0 +1,30 @@ +const { getFileSet } = require("../api/opensearch"); +const iiifSearchResponse = require("../api/response/iiif/file-set-search"); +const { wrap } = require("./middleware"); + +exports.handler = wrap(async (event) => { + const id = event.pathParameters.id; + const { as, q } = event.queryStringParameters ?? {}; + + const allowPrivate = + event.userToken.isSuperUser() || event.userToken.isReadingRoom(); + const allowUnpublished = event.userToken.isSuperUser(); + + if (as !== "iiif" || !q?.trim()) { + return { + statusCode: 400, + body: JSON.stringify({ + message: "Request must include ?as=iiif&q={query}", + }), + }; + } + + const fileSetResponse = await getFileSet(id, { + allowPrivate, + allowUnpublished, + }); + if (fileSetResponse.statusCode !== 200) return fileSetResponse; + + const fileSetSource = JSON.parse(fileSetResponse.body)._source; + return iiifSearchResponse.transform(fileSetSource, q); +}); diff --git a/api/template.yaml b/api/template.yaml index e12a9ce9..da873fb6 100644 --- a/api/template.yaml +++ b/api/template.yaml @@ -359,6 +359,30 @@ Resources: ApiId: !Ref dcApi Path: /file-sets/{id}/annotations Method: HEAD + getFileSetSearchFunction: + Type: AWS::Serverless::Function + Condition: DeployAPI + Properties: + Handler: handlers/get-file-set-search.handler + Description: IIIF Search 2.0 for a FileSet's transcription annotations. + #* Layers: + #* - !Ref apiDependencies + Policies: + - !Ref SecretsPolicy + - !Ref readIndexPolicy + Events: + ApiGet: + Type: HttpApi + Properties: + ApiId: !Ref dcApi + Path: /file-sets/{id}/search + Method: GET + ApiHead: + Type: HttpApi + Properties: + ApiId: !Ref dcApi + Path: /file-sets/{id}/search + Method: HEAD getAnnotationByIdFunction: Type: AWS::Serverless::Function Condition: DeployAPI diff --git a/api/test/integration/get-annotations.test.js b/api/test/integration/get-annotations.test.js index 01be08fc..9aa836f0 100644 --- a/api/test/integration/get-annotations.test.js +++ b/api/test/integration/get-annotations.test.js @@ -51,6 +51,9 @@ describe("Annotation routes", () => { expect(body.items[0].type).to.eq("Annotation"); expect(body.items[0].motivation).to.eq("commenting"); expect(body.items[0].body.value).to.exist; + expect(body.items[0].target).to.eq( + `${process.env.DC_API_ENDPOINT}/file-sets/1234?as=iiif` + ); }); it("returns IIIF annotation page with empty items when no annotations", async () => { diff --git a/api/test/integration/get-file-set-search.test.js b/api/test/integration/get-file-set-search.test.js new file mode 100644 index 00000000..9a459bab --- /dev/null +++ b/api/test/integration/get-file-set-search.test.js @@ -0,0 +1,119 @@ +"use strict"; + +const chai = require("chai"); +const expect = chai.expect; +chai.use(require("chai-http")); + +describe("IIIF Search 2.0 for a file set", () => { + helpers.saveEnvironment(); + const mock = helpers.mockIndex(); + + describe("GET /file-sets/{id}/search", () => { + const { handler } = requireSource("handlers/get-file-set-search"); + + it("returns a IIIF Search 2.0 AnnotationPage with matching items", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply(200, helpers.testFixture("mocks/fileset-annotated-1234.json")); + + const event = helpers + .mockEvent("GET", "/file-sets/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ as: "iiif", q: "Lorem" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(200); + + const body = JSON.parse(result.body); + expect(body["@context"]).to.eq( + "http://iiif.io/api/search/2/context.json" + ); + expect(body.type).to.eq("AnnotationPage"); + expect(body.id).to.include("/file-sets/1234/search?as=iiif&q=Lorem"); + expect(body.items).to.have.lengthOf(1); + + const item = body.items[0]; + expect(item.type).to.eq("Annotation"); + expect(item.motivation).to.eq("supplementing"); + expect(item.body.type).to.eq("TextualBody"); + expect(item.body.value).to.include("Lorem"); + expect(item.body.format).to.eq("text/plain"); + expect(item.body.language).to.deep.eq(["lg", "en"]); + expect(item.target).to.eq( + `${process.env.DC_API_ENDPOINT}/file-sets/1234?as=iiif` + ); + }); + + it("returns an empty items array when no annotations match", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply(200, helpers.testFixture("mocks/fileset-annotated-1234.json")); + + const event = helpers + .mockEvent("GET", "/file-sets/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ as: "iiif", q: "zzznomatch" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(200); + + const body = JSON.parse(result.body); + expect(body.type).to.eq("AnnotationPage"); + expect(body.items).to.deep.eq([]); + }); + + it("returns 400 when q parameter is missing", async () => { + const event = helpers + .mockEvent("GET", "/file-sets/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ as: "iiif" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(400); + }); + + it("returns 400 when as parameter is not iiif", async () => { + const event = helpers + .mockEvent("GET", "/file-sets/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ q: "Lorem" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(400); + }); + + it("returns 404 when the file set does not exist", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply(200, helpers.testFixture("mocks/missing-fileset-1234.json")); + + const event = helpers + .mockEvent("GET", "/file-sets/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ as: "iiif", q: "Lorem" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(404); + }); + + it("returns 403 when the file set is private and no token is provided", async () => { + mock + .get("/dc-v2-file-set/_doc/1234") + .reply(200, helpers.testFixture("mocks/fileset-restricted-1234.json")); + + const event = helpers + .mockEvent("GET", "/file-sets/{id}/search") + .pathParams({ id: "1234" }) + .queryParams({ as: "iiif", q: "Lorem" }) + .render(); + + const result = await handler(event); + expect(result.statusCode).to.eq(403); + }); + }); +}); diff --git a/api/test/integration/get-work-search.test.js b/api/test/integration/get-work-search.test.js index 6090413a..c1e94f2d 100644 --- a/api/test/integration/get-work-search.test.js +++ b/api/test/integration/get-work-search.test.js @@ -102,10 +102,12 @@ describe("IIIF Search 2.0 for a work", () => { expect(item.body.value).to.include("Lorem"); expect(item.body.format).to.eq("text/plain"); expect(item.body.language).to.eq("en"); - expect(item.target).to.include("/canvas/0"); + expect(item.target).to.eq( + `${process.env.DC_API_ENDPOINT}/file-sets/076dcbd8-8c57-40e8-bdf7-dc9153c87a36?as=iiif` + ); }); - it("uses the correct canvas index from the manifest ordering, not sequential search result order", async () => { + it("targets the correct file-set canvas from the manifest ordering, not sequential search result order", async () => { mock .get("/dc-v2-work/_doc/1234") .reply(200, helpers.testFixture("mocks/work-1234.json")); @@ -124,8 +126,10 @@ describe("IIIF Search 2.0 for a work", () => { const body = JSON.parse(result.body); expect(body.items).to.have.lengthOf(1); - // Second Access file set in work-1234.json must map to canvas/1, not canvas/0 - expect(body.items[0].target).to.include("/canvas/1"); + // Second Access file set in work-1234.json must map to its standalone Canvas URI + expect(body.items[0].target).to.eq( + `${process.env.DC_API_ENDPOINT}/file-sets/51862c1c-c024-45dc-ab26-694bd8ebc16c?as=iiif` + ); }); it("returns an empty items array when no annotations match", async () => { diff --git a/api/test/unit/api/response/iiif/canvas.test.js b/api/test/unit/api/response/iiif/canvas.test.js index 1d45d388..cd95c389 100644 --- a/api/test/unit/api/response/iiif/canvas.test.js +++ b/api/test/unit/api/response/iiif/canvas.test.js @@ -40,6 +40,10 @@ describe("FileSet as IIIF Canvas response transformer", () => { expect(canvas.thumbnail[0].id).to.eq( `${source.representative_image_url}/full/!300,300/0/default.jpg` ); + expect(canvas.service).to.deep.include({ + id: `${dcApiEndpoint()}/file-sets/${source.id}/search?as=iiif`, + type: "SearchService2", + }); }); it("builds a painting annotation for image file sets", async () => { diff --git a/api/test/unit/api/response/iiif/manifest.test.js b/api/test/unit/api/response/iiif/manifest.test.js index e3cb2a18..3814639f 100644 --- a/api/test/unit/api/response/iiif/manifest.test.js +++ b/api/test/unit/api/response/iiif/manifest.test.js @@ -171,6 +171,12 @@ describe("Image Work as IIIF Manifest response transformer", () => { manifest.items.forEach((canvas) => { expect(canvas.type).to.eq("Canvas"); }); + expect(manifest.items[0].id).to.eq( + `${dcApiEndpoint()}/file-sets/${source.file_sets[0].id}?as=iiif` + ); + expect(manifest.items[0].items[0].id).to.eq( + `${manifest.items[0].id}/annotation-page` + ); expect(manifest.items[0].width).to.eq(source.file_sets[0].width); expect(manifest.items[0].height).to.eq(source.file_sets[0].height); expect(manifest.items[0].label.none[0]).to.eq(source.file_sets[0].label); From b88f32d6ec168ad58ce369bde81edee14a137d28 Mon Sep 17 00:00:00 2001 From: Mat Jordan Date: Mon, 18 May 2026 19:22:09 +0000 Subject: [PATCH 10/11] Refine canvas and annotation IIIF responses. Co-authored-by: Karen Shaw --- api/src/api/response/iiif/annotations.js | 57 +++++--------- api/src/api/response/iiif/canvas.js | 16 ++++ .../api/response/iiif/file-set-annotations.js | 55 ++++++++++++++ api/src/api/response/iiif/file-set-search.js | 7 +- api/src/api/response/iiif/search-helpers.js | 15 ++++ api/src/api/response/iiif/search.js | 7 +- api/src/handlers/get-annotation-by-id.js | 15 +++- api/src/handlers/get-file-set-annotations.js | 2 +- .../mocks/fileset-annotated-1234.json | 1 + api/test/integration/get-annotations.test.js | 66 +++++++++++++++- .../integration/get-file-set-search.test.js | 21 +++++- api/test/integration/get-work-search.test.js | 34 +++++++-- .../unit/api/response/iiif/canvas.test.js | 75 +++++++++++++++++++ 13 files changed, 310 insertions(+), 61 deletions(-) create mode 100644 api/src/api/response/iiif/file-set-annotations.js diff --git a/api/src/api/response/iiif/annotations.js b/api/src/api/response/iiif/annotations.js index 41837aca..4d68fb8b 100644 --- a/api/src/api/response/iiif/annotations.js +++ b/api/src/api/response/iiif/annotations.js @@ -1,48 +1,27 @@ const { dcApiEndpoint } = require("../../../environment"); +const { + buildAnnotationTarget, + buildSearchAnnotationBody, +} = require("./search-helpers"); -async function transform(response) { - const body = JSON.parse(response.body); - const fileSet = body._source; - const annotations = fileSet?.annotations ?? []; - +function transform(annotation, fileSet) { const canvasId = `${dcApiEndpoint()}/file-sets/${fileSet.id}?as=iiif`; - const annotationPageId = `${dcApiEndpoint()}/file-sets/${ - fileSet.id - }/annotations?as=iiif`; - - // Build annotation items - filter for transcriptions only - // We currently will only have one annotation and it's a transcription - const items = annotations - .filter((annotation) => annotation.type === "transcription") - .map((annotation, idx) => { - const annotationId = `${annotationPageId}/a${idx}`; - return { - id: annotationId, - type: "Annotation", - motivation: "commenting", - body: { - type: "TextualBody", - value: annotation.content, - format: "text/plain", - language: annotation.language || "en", - }, - target: canvasId, - }; - }); - - const annotationPage = { - "@context": "http://iiif.io/api/presentation/3/context.json", - id: annotationPageId, - type: "AnnotationPage", - items: items, - }; + const annotationId = `${dcApiEndpoint()}/annotations/${ + annotation.id + }?as=iiif`; return { statusCode: 200, - headers: { - "content-type": "application/json", - }, - body: JSON.stringify(annotationPage), + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + "@context": "http://iiif.io/api/presentation/3/context.json", + id: annotationId, + type: "Annotation", + // We have hardcoded motivations here, but in the future we may want to make this more dynamic based on the annotation type + motivation: ["contentState", "commenting"], + body: buildSearchAnnotationBody(annotation), + target: buildAnnotationTarget(canvasId, fileSet.work_id), + }), }; } diff --git a/api/src/api/response/iiif/canvas.js b/api/src/api/response/iiif/canvas.js index 87771c01..279469c2 100644 --- a/api/src/api/response/iiif/canvas.js +++ b/api/src/api/response/iiif/canvas.js @@ -67,6 +67,22 @@ async function transform(response, options = {}) { canvas.partOf = [partOf]; } + const transcriptions = (fileSet.annotations || []).filter( + (a) => a.type === "transcription" + ); + if ( + /^image\//i.test(fileSet.mime_type) && + fileSet.role === "Access" && + transcriptions.length + ) { + canvas.annotations = [ + { + id: `${dcApiEndpoint()}/file-sets/${fileSet.id}/annotations?as=iiif`, + type: "AnnotationPage", + }, + ]; + } + return { statusCode: 200, headers: { diff --git a/api/src/api/response/iiif/file-set-annotations.js b/api/src/api/response/iiif/file-set-annotations.js new file mode 100644 index 00000000..06aaa7ee --- /dev/null +++ b/api/src/api/response/iiif/file-set-annotations.js @@ -0,0 +1,55 @@ +const { dcApiEndpoint } = require("../../../environment"); +const { + buildAnnotationTarget, + buildSearchAnnotationBody, +} = require("./search-helpers"); + +async function transform(response) { + const body = JSON.parse(response.body); + const fileSet = body._source; + const annotations = fileSet?.annotations ?? []; + + const canvasId = `${dcApiEndpoint()}/file-sets/${fileSet.id}?as=iiif`; + const annotationPageId = `${dcApiEndpoint()}/file-sets/${ + fileSet.id + }/annotations?as=iiif`; + + // Build annotation items - filter for transcriptions only + // We currently will only have one annotation and it's a transcription + const items = annotations + .filter((annotation) => annotation.type === "transcription") + .map((annotation) => { + const annotationId = `${dcApiEndpoint()}/annotations/${ + annotation.id + }?as=iiif`; + return { + id: annotationId, + type: "Annotation", + motivation: "commenting", + body: { + type: "TextualBody", + value: annotation.content, + format: "text/plain", + language: annotation.language || "en", + }, + target: buildAnnotationTarget(canvasId, fileSet.work_id), + }; + }); + + const annotationPage = { + "@context": "http://iiif.io/api/presentation/3/context.json", + id: annotationPageId, + type: "AnnotationPage", + items: items, + }; + + return { + statusCode: 200, + headers: { + "content-type": "application/json", + }, + body: JSON.stringify(annotationPage), + }; +} + +module.exports = { transform }; diff --git a/api/src/api/response/iiif/file-set-search.js b/api/src/api/response/iiif/file-set-search.js index d15cd007..a929dd17 100644 --- a/api/src/api/response/iiif/file-set-search.js +++ b/api/src/api/response/iiif/file-set-search.js @@ -1,5 +1,6 @@ const { dcApiEndpoint } = require("../../../environment"); const { + buildAnnotationTarget, buildSearchAnnotationBody, transcriptionAnnotationsMatching, } = require("./search-helpers"); @@ -12,11 +13,11 @@ async function transform(fileSet, q) { const items = transcriptionAnnotationsMatching(fileSet.annotations, q).map( (ann) => ({ - id: `${canvasId}/annotation/${ann.id}`, + id: `${dcApiEndpoint()}/annotations/${ann.id}?as=iiif`, type: "Annotation", - motivation: "supplementing", + motivation: "commenting", body: buildSearchAnnotationBody(ann), - target: canvasId, + target: buildAnnotationTarget(canvasId, fileSet.work_id), }) ); diff --git a/api/src/api/response/iiif/search-helpers.js b/api/src/api/response/iiif/search-helpers.js index dd3badb8..f4599c31 100644 --- a/api/src/api/response/iiif/search-helpers.js +++ b/api/src/api/response/iiif/search-helpers.js @@ -1,3 +1,4 @@ +const { dcApiEndpoint } = require("../../../environment"); const { getTranscriptionContent, normalizeLanguages, @@ -30,7 +31,21 @@ function transcriptionAnnotationsMatching(annotations = [], q) { .filter((annotation) => annotationMatches(annotation, q)); } +function buildAnnotationTarget(canvasId, workId) { + const source = { id: canvasId, type: "Canvas" }; + if (workId) { + source.partOf = [ + { + id: `${dcApiEndpoint()}/works/${workId}?as=iiif`, + type: "Manifest", + }, + ]; + } + return { type: "SpecificResource", source }; +} + module.exports = { + buildAnnotationTarget, buildSearchAnnotationBody, transcriptionAnnotationsMatching, }; diff --git a/api/src/api/response/iiif/search.js b/api/src/api/response/iiif/search.js index 68102c55..4be8a10d 100644 --- a/api/src/api/response/iiif/search.js +++ b/api/src/api/response/iiif/search.js @@ -1,6 +1,7 @@ const { dcApiEndpoint } = require("../../../environment"); const { getWorkFileSets } = require("../../opensearch"); const { + buildAnnotationTarget, buildSearchAnnotationBody, transcriptionAnnotationsMatching, } = require("./search-helpers"); @@ -66,11 +67,11 @@ async function transform(workSource, q, opts = {}) { transcriptionAnnotationsMatching(primary.annotations, q).forEach((ann) => { items.push({ - id: `${canvasId}/annotation/${ann.id}`, + id: `${dcApiEndpoint()}/annotations/${ann.id}?as=iiif`, type: "Annotation", - motivation: "supplementing", + motivation: "commenting", body: buildSearchAnnotationBody(ann), - target: canvasId, + target: buildAnnotationTarget(canvasId, workId), }); }); }); diff --git a/api/src/handlers/get-annotation-by-id.js b/api/src/handlers/get-annotation-by-id.js index 692ffa0c..d613c93e 100644 --- a/api/src/handlers/get-annotation-by-id.js +++ b/api/src/handlers/get-annotation-by-id.js @@ -2,6 +2,7 @@ const { wrap } = require("./middleware"); const { search, getFileSet } = require("../api/opensearch"); const { prefix, appInfo } = require("../environment"); const { transformError } = require("../api/response/error"); +const iiifAnnotationsResponse = require("../api/response/iiif/annotations"); /** * Retrieves a single annotation by id @@ -58,13 +59,25 @@ exports.handler = wrap(async (event) => { if (!annotation) return transformError({ statusCode: 404 }); + const as = event.queryStringParameters?.as; + if (as === "iiif") { + return iiifAnnotationsResponse.transform( + annotation, + fileSetPayload._source + ); + } + return { statusCode: 200, headers: { "content-type": "application/json", }, body: JSON.stringify({ - data: annotation, + data: { + ...annotation, + file_set_id: fileSetPayload._source.id, + work_id: fileSetPayload._source.work_id, + }, info: appInfo(), }), }; diff --git a/api/src/handlers/get-file-set-annotations.js b/api/src/handlers/get-file-set-annotations.js index 126aecac..6f9fe241 100644 --- a/api/src/handlers/get-file-set-annotations.js +++ b/api/src/handlers/get-file-set-annotations.js @@ -2,7 +2,7 @@ const { wrap } = require("./middleware"); const { getFileSet } = require("../api/opensearch"); const { appInfo } = require("../environment"); const opensearchResponse = require("../api/response/opensearch"); -const annotationsResponse = require("../api/response/iiif/annotations.js"); +const annotationsResponse = require("../api/response/iiif/file-set-annotations.js"); /** * Returns annotations for a FileSet diff --git a/api/test/fixtures/mocks/fileset-annotated-1234.json b/api/test/fixtures/mocks/fileset-annotated-1234.json index cb965cf3..7c173ad8 100644 --- a/api/test/fixtures/mocks/fileset-annotated-1234.json +++ b/api/test/fixtures/mocks/fileset-annotated-1234.json @@ -7,6 +7,7 @@ "_source": { "id": "1234", "api_model": "FileSet", + "work_id": "work-1234", "visibility": "Public", "published": true, "mime_type": "image/tiff", diff --git a/api/test/integration/get-annotations.test.js b/api/test/integration/get-annotations.test.js index 9aa836f0..eb6676e3 100644 --- a/api/test/integration/get-annotations.test.js +++ b/api/test/integration/get-annotations.test.js @@ -48,12 +48,25 @@ describe("Annotation routes", () => { const body = JSON.parse(result.body); expect(body.type).to.eq("AnnotationPage"); expect(body.items).to.be.an("array").with.lengthOf(1); + expect(body.items[0].id).to.eq( + `${process.env.DC_API_ENDPOINT}/annotations/36a47020-5410-4dda-a7ca-967fe3885bcd?as=iiif` + ); expect(body.items[0].type).to.eq("Annotation"); expect(body.items[0].motivation).to.eq("commenting"); expect(body.items[0].body.value).to.exist; - expect(body.items[0].target).to.eq( - `${process.env.DC_API_ENDPOINT}/file-sets/1234?as=iiif` - ); + expect(body.items[0].target).to.deep.eq({ + type: "SpecificResource", + source: { + id: `${process.env.DC_API_ENDPOINT}/file-sets/1234?as=iiif`, + type: "Canvas", + partOf: [ + { + id: `${process.env.DC_API_ENDPOINT}/works/work-1234?as=iiif`, + type: "Manifest", + }, + ], + }, + }); }); it("returns IIIF annotation page with empty items when no annotations", async () => { @@ -100,6 +113,53 @@ describe("Annotation routes", () => { const body = JSON.parse(result.body); expect(body.data.id).to.eq("36a47020-5410-4dda-a7ca-967fe3885bcd"); + expect(body.data.file_set_id).to.eq("1234"); + expect(body.data.work_id).to.eq("work-1234"); + }); + + it("returns a IIIF contentState Annotation for ?as=iiif", async () => { + mock + .post("/dc-v2-file-set/_search", () => true) + .reply(200, helpers.testFixture("mocks/annotation-search-hit.json")); + + mock + .get("/dc-v2-file-set/_doc/1234") + .reply(200, helpers.testFixture("mocks/fileset-annotated-1234.json")); + + const event = helpers + .mockEvent("GET", "/annotations/{id}") + .pathParams({ id: "36a47020-5410-4dda-a7ca-967fe3885bcd" }) + .queryParams({ as: "iiif" }) + .render(); + const result = await handler(event); + expect(result.statusCode).to.eq(200); + + const body = JSON.parse(result.body); + expect(body["@context"]).to.eq( + "http://iiif.io/api/presentation/3/context.json" + ); + expect(body.type).to.eq("Annotation"); + expect(body.motivation).to.deep.eq(["contentState", "commenting"]); + expect(body.body.type).to.eq("TextualBody"); + expect(body.body.format).to.eq("text/plain"); + expect(body.body.value).to.be.a("string").and.not.empty; + expect(body.body.language).to.exist; + expect(body.id).to.include( + "/annotations/36a47020-5410-4dda-a7ca-967fe3885bcd?as=iiif" + ); + expect(body.target).to.deep.eq({ + type: "SpecificResource", + source: { + id: `${process.env.DC_API_ENDPOINT}/file-sets/1234?as=iiif`, + type: "Canvas", + partOf: [ + { + id: `${process.env.DC_API_ENDPOINT}/works/work-1234?as=iiif`, + type: "Manifest", + }, + ], + }, + }); }); }); }); diff --git a/api/test/integration/get-file-set-search.test.js b/api/test/integration/get-file-set-search.test.js index 9a459bab..8817f8c1 100644 --- a/api/test/integration/get-file-set-search.test.js +++ b/api/test/integration/get-file-set-search.test.js @@ -34,15 +34,28 @@ describe("IIIF Search 2.0 for a file set", () => { expect(body.items).to.have.lengthOf(1); const item = body.items[0]; + expect(item.id).to.eq( + `${process.env.DC_API_ENDPOINT}/annotations/36a47020-5410-4dda-a7ca-967fe3885bcd?as=iiif` + ); expect(item.type).to.eq("Annotation"); - expect(item.motivation).to.eq("supplementing"); + expect(item.motivation).to.eq("commenting"); expect(item.body.type).to.eq("TextualBody"); expect(item.body.value).to.include("Lorem"); expect(item.body.format).to.eq("text/plain"); expect(item.body.language).to.deep.eq(["lg", "en"]); - expect(item.target).to.eq( - `${process.env.DC_API_ENDPOINT}/file-sets/1234?as=iiif` - ); + expect(item.target).to.deep.eq({ + type: "SpecificResource", + source: { + id: `${process.env.DC_API_ENDPOINT}/file-sets/1234?as=iiif`, + type: "Canvas", + partOf: [ + { + id: `${process.env.DC_API_ENDPOINT}/works/work-1234?as=iiif`, + type: "Manifest", + }, + ], + }, + }); }); it("returns an empty items array when no annotations match", async () => { diff --git a/api/test/integration/get-work-search.test.js b/api/test/integration/get-work-search.test.js index c1e94f2d..1f49caac 100644 --- a/api/test/integration/get-work-search.test.js +++ b/api/test/integration/get-work-search.test.js @@ -97,14 +97,24 @@ describe("IIIF Search 2.0 for a work", () => { const item = body.items[0]; expect(item.type).to.eq("Annotation"); - expect(item.motivation).to.eq("supplementing"); + expect(item.motivation).to.eq("commenting"); expect(item.body.type).to.eq("TextualBody"); expect(item.body.value).to.include("Lorem"); expect(item.body.format).to.eq("text/plain"); expect(item.body.language).to.eq("en"); - expect(item.target).to.eq( - `${process.env.DC_API_ENDPOINT}/file-sets/076dcbd8-8c57-40e8-bdf7-dc9153c87a36?as=iiif` - ); + expect(item.target).to.deep.eq({ + type: "SpecificResource", + source: { + id: `${process.env.DC_API_ENDPOINT}/file-sets/076dcbd8-8c57-40e8-bdf7-dc9153c87a36?as=iiif`, + type: "Canvas", + partOf: [ + { + id: `${process.env.DC_API_ENDPOINT}/works/1234?as=iiif`, + type: "Manifest", + }, + ], + }, + }); }); it("targets the correct file-set canvas from the manifest ordering, not sequential search result order", async () => { @@ -127,9 +137,19 @@ describe("IIIF Search 2.0 for a work", () => { const body = JSON.parse(result.body); expect(body.items).to.have.lengthOf(1); // Second Access file set in work-1234.json must map to its standalone Canvas URI - expect(body.items[0].target).to.eq( - `${process.env.DC_API_ENDPOINT}/file-sets/51862c1c-c024-45dc-ab26-694bd8ebc16c?as=iiif` - ); + expect(body.items[0].target).to.deep.eq({ + type: "SpecificResource", + source: { + id: `${process.env.DC_API_ENDPOINT}/file-sets/51862c1c-c024-45dc-ab26-694bd8ebc16c?as=iiif`, + type: "Canvas", + partOf: [ + { + id: `${process.env.DC_API_ENDPOINT}/works/1234?as=iiif`, + type: "Manifest", + }, + ], + }, + }); }); it("returns an empty items array when no annotations match", async () => { diff --git a/api/test/unit/api/response/iiif/canvas.test.js b/api/test/unit/api/response/iiif/canvas.test.js index cd95c389..31765e87 100644 --- a/api/test/unit/api/response/iiif/canvas.test.js +++ b/api/test/unit/api/response/iiif/canvas.test.js @@ -210,6 +210,81 @@ describe("FileSet as IIIF Canvas response transformer", () => { }); }); + it("adds annotations reference for Image Access file sets with transcriptions", async () => { + const responseBody = JSON.parse( + helpers.testFixture("mocks/fileset-image-canvas-1234.json") + ); + responseBody._source.annotations = [ + { type: "transcription", content: "some text" }, + ]; + + const result = await transformer.transform({ + statusCode: 200, + body: JSON.stringify(responseBody), + }); + const canvas = JSON.parse(result.body); + + expect(canvas.annotations).to.deep.eq([ + { + id: `${dcApiEndpoint()}/file-sets/${ + responseBody._source.id + }/annotations?as=iiif`, + type: "AnnotationPage", + }, + ]); + }); + + it("does not add annotations reference when mime_type is not image/", async () => { + const responseBody = JSON.parse( + helpers.testFixture("mocks/fileset-image-canvas-1234.json") + ); + responseBody._source.mime_type = "video/mp4"; + responseBody._source.annotations = [ + { type: "transcription", content: "some text" }, + ]; + + const result = await transformer.transform({ + statusCode: 200, + body: JSON.stringify(responseBody), + }); + const canvas = JSON.parse(result.body); + + expect(canvas.annotations).to.be.undefined; + }); + + it("does not add annotations reference when role is not Access", async () => { + const responseBody = JSON.parse( + helpers.testFixture("mocks/fileset-image-canvas-1234.json") + ); + responseBody._source.role = "Auxiliary"; + responseBody._source.annotations = [ + { type: "transcription", content: "some text" }, + ]; + + const result = await transformer.transform({ + statusCode: 200, + body: JSON.stringify(responseBody), + }); + const canvas = JSON.parse(result.body); + + expect(canvas.annotations).to.be.undefined; + }); + + it("does not add annotations reference when there are no transcription annotations", async () => { + const responseBody = JSON.parse( + helpers.testFixture("mocks/fileset-image-canvas-1234.json") + ); + responseBody._source.annotations = [{ type: "other", content: "nope" }]; + + const result = await transformer.transform({ + statusCode: 200, + body: JSON.stringify(responseBody), + }); + const canvas = JSON.parse(result.body); + + expect(canvas.annotations).to.be.undefined; + }); + it("passes non-200 responses through error transformation", async () => { const result = await transformer.transform({ statusCode: 404 }); const body = JSON.parse(result.body); From cbf8222061dcadfe33998d5d98caf8ae7245c780 Mon Sep 17 00:00:00 2001 From: Karen Shaw Date: Tue, 19 May 2026 18:27:13 +0000 Subject: [PATCH 11/11] Bump version to 2.11.0 --- api/dependencies/package-lock.json | 4 ++-- api/dependencies/package.json | 2 +- api/package-lock.json | 4 ++-- api/package.json | 2 +- api/src/package-lock.json | 4 ++-- api/src/package.json | 2 +- av-download/lambdas/package-lock.json | 4 ++-- av-download/lambdas/package.json | 2 +- chat/pyproject.toml | 2 +- chat/uv.lock | 5 ++++- docs/pyproject.toml | 2 +- docs/uv.lock | 2 +- mcp/apps/mcp/package.json | 2 +- mcp/package-lock.json | 4 ++-- mcp/package.json | 2 +- mcp/server.json | 6 +++--- 16 files changed, 26 insertions(+), 23 deletions(-) diff --git a/api/dependencies/package-lock.json b/api/dependencies/package-lock.json index 13581b22..f56ede72 100644 --- a/api/dependencies/package-lock.json +++ b/api/dependencies/package-lock.json @@ -1,12 +1,12 @@ { "name": "dc-api-dependencies", - "version": "2.10.11", + "version": "2.11.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dc-api-dependencies", - "version": "2.10.11", + "version": "2.11.0", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "^2.0.1", diff --git a/api/dependencies/package.json b/api/dependencies/package.json index e2af173d..a73a82d3 100644 --- a/api/dependencies/package.json +++ b/api/dependencies/package.json @@ -1,6 +1,6 @@ { "name": "dc-api-dependencies", - "version": "2.10.11", + "version": "2.11.0", "description": "NUL Digital Collections API Dependencies", "repository": "https://github.com/nulib/dc-api-v2", "author": "nulib", diff --git a/api/package-lock.json b/api/package-lock.json index c6b8365e..d4897a1b 100644 --- a/api/package-lock.json +++ b/api/package-lock.json @@ -1,12 +1,12 @@ { "name": "dc-api-build", - "version": "2.10.11", + "version": "2.11.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dc-api-build", - "version": "2.10.11", + "version": "2.11.0", "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { diff --git a/api/package.json b/api/package.json index 58fcc27e..f75ba364 100644 --- a/api/package.json +++ b/api/package.json @@ -1,6 +1,6 @@ { "name": "dc-api-build", - "version": "2.10.11", + "version": "2.11.0", "description": "NUL Digital Collections API Build Environment", "repository": "https://github.com/nulib/dc-api-v2", "author": "nulib", diff --git a/api/src/package-lock.json b/api/src/package-lock.json index ba60495e..d9c2906f 100644 --- a/api/src/package-lock.json +++ b/api/src/package-lock.json @@ -1,12 +1,12 @@ { "name": "dc-api", - "version": "2.10.11", + "version": "2.11.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dc-api", - "version": "2.10.11", + "version": "2.11.0", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "^2.0.1", diff --git a/api/src/package.json b/api/src/package.json index 7286011a..cbf32adb 100644 --- a/api/src/package.json +++ b/api/src/package.json @@ -1,6 +1,6 @@ { "name": "dc-api", - "version": "2.10.11", + "version": "2.11.0", "description": "NUL Digital Collections API", "repository": "https://github.com/nulib/dc-api-v2", "author": "nulib", diff --git a/av-download/lambdas/package-lock.json b/av-download/lambdas/package-lock.json index 66116294..425e2f38 100644 --- a/av-download/lambdas/package-lock.json +++ b/av-download/lambdas/package-lock.json @@ -1,12 +1,12 @@ { "name": "lambdas", - "version": "2.10.11", + "version": "2.11.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "lambdas", - "version": "2.10.11", + "version": "2.11.0", "license": "Apache-2.0", "dependencies": { "fluent-ffmpeg": "2.1.3" diff --git a/av-download/lambdas/package.json b/av-download/lambdas/package.json index f3339b77..62e82003 100644 --- a/av-download/lambdas/package.json +++ b/av-download/lambdas/package.json @@ -1,6 +1,6 @@ { "name": "lambdas", - "version": "2.10.11", + "version": "2.11.0", "description": "Non-API handler lambdas", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" diff --git a/chat/pyproject.toml b/chat/pyproject.toml index 99cd2aaf..135d891e 100644 --- a/chat/pyproject.toml +++ b/chat/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dc-api-v2-chat" -version = "2.10.11" +version = "2.11.0" requires-python = ">=3.12" dependencies = [ "boto3~=1.34", diff --git a/chat/uv.lock b/chat/uv.lock index 5d95dc07..563e72db 100644 --- a/chat/uv.lock +++ b/chat/uv.lock @@ -302,7 +302,7 @@ wheels = [ [[package]] name = "dc-api-v2-chat" -version = "2.10.11" +version = "2.11.0" source = { virtual = "." } dependencies = [ { name = "boto3" }, @@ -383,6 +383,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, { url = "https://files.pythonhosted.org/packages/3b/16/035dcfcc48715ccd345f3a93183267167cdd162ad123cd93067d86f27ce4/greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968", size = 655185, upload-time = "2025-08-07T13:45:27.624Z" }, + { url = "https://files.pythonhosted.org/packages/31/da/0386695eef69ffae1ad726881571dfe28b41970173947e7c558d9998de0f/greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9", size = 649926, upload-time = "2025-08-07T13:53:15.251Z" }, { url = "https://files.pythonhosted.org/packages/68/88/69bf19fd4dc19981928ceacbc5fd4bb6bc2215d53199e367832e98d1d8fe/greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6", size = 651839, upload-time = "2025-08-07T13:18:30.281Z" }, { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, @@ -393,6 +394,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" }, + { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" }, { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" }, { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, @@ -403,6 +405,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" }, { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" }, { url = "https://files.pythonhosted.org/packages/c0/aa/687d6b12ffb505a4447567d1f3abea23bd20e73a5bed63871178e0831b7a/greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5", size = 699218, upload-time = "2025-08-07T13:45:30.969Z" }, + { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" }, { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" }, { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" }, { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" }, diff --git a/docs/pyproject.toml b/docs/pyproject.toml index 17c4caef..b7b5f1cb 100644 --- a/docs/pyproject.toml +++ b/docs/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dc-api-v2-docs" -version = "2.10.11" +version = "2.11.0" requires-python = ">=3.12" dependencies = [ "mkdocs>=1.1.2,<2.0.0", diff --git a/docs/uv.lock b/docs/uv.lock index 5d9d770d..e1b10b6c 100644 --- a/docs/uv.lock +++ b/docs/uv.lock @@ -123,7 +123,7 @@ wheels = [ [[package]] name = "dc-api-v2-docs" -version = "2.10.11" +version = "2.11.0" source = { virtual = "." } dependencies = [ { name = "diagrams" }, diff --git a/mcp/apps/mcp/package.json b/mcp/apps/mcp/package.json index c968b9cc..ad57b2ce 100644 --- a/mcp/apps/mcp/package.json +++ b/mcp/apps/mcp/package.json @@ -1,5 +1,5 @@ { "name": "mcp", - "version": "2.10.11", + "version": "2.11.0", "type": "module" } diff --git a/mcp/package-lock.json b/mcp/package-lock.json index 1498aab3..6ff52d00 100644 --- a/mcp/package-lock.json +++ b/mcp/package-lock.json @@ -1,12 +1,12 @@ { "name": "@nulib/dc-api-mcp", - "version": "2.10.11", + "version": "2.11.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@nulib/dc-api-mcp", - "version": "2.10.11", + "version": "2.11.0", "license": "MIT", "dependencies": { "@aws-sdk/client-secrets-manager": "^3.563.0", diff --git a/mcp/package.json b/mcp/package.json index 9b82a35d..ea558c6f 100644 --- a/mcp/package.json +++ b/mcp/package.json @@ -1,7 +1,7 @@ { "name": "@nulib/dc-api-mcp", "mcpName": "io.github.nulib/dc-api", - "version": "2.10.11", + "version": "2.11.0", "description": "Agent integration with the Northwestern University Libraries Digital Collections API", "repository": { "type": "git", diff --git a/mcp/server.json b/mcp/server.json index 380991d0..07b06661 100644 --- a/mcp/server.json +++ b/mcp/server.json @@ -1,7 +1,7 @@ { "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json", "name": "io.github.nulib/dc-api", - "version": "2.10.11", + "version": "2.11.0", "title": "Northwestern University Libraries Digital Collections API", "description": "Agent integration with the Northwestern University Libraries Digital Collections API", "repository": { @@ -13,14 +13,14 @@ { "registryType": "npm", "identifier": "@nulib/dc-api-mcp", - "version": "2.10.11", + "version": "2.11.0", "transport": { "type": "stdio" } }, { "registryType": "oci", - "identifier": "ghcr.io/nulib/dc-api-mcp:2.10.11", + "identifier": "ghcr.io/nulib/dc-api-mcp:2.11.0", "transport": { "type": "stdio" }