From e040e917c6227434112da3250268c11557548238 Mon Sep 17 00:00:00 2001 From: b Date: Fri, 13 Mar 2026 23:01:40 +1100 Subject: [PATCH 1/4] feat: split runtime-lite and ingest-full capabilities --- .dockerignore | 5 +- .github/workflows/docker-build.yml | 77 +++++++++++++++++ Dockerfile | 55 ++++++++++-- README-reqif-ingest-cli.md | 9 ++ README.md | 5 +- ado/templates/ci.yml | 2 +- ado/templates/selftest.yml | 2 +- azure-pipelines.yml | 2 + justfile | 19 ++++- pyproject.toml | 14 ++- reqif_ingest_cli/docling_adapter.py | 33 ++++++-- tests/test_reqif_ingest_cli_foundry.py | 4 + uv.lock | 113 +++++++++++++++---------- 13 files changed, 267 insertions(+), 73 deletions(-) diff --git a/.dockerignore b/.dockerignore index 09c8482..2458bcc 100644 --- a/.dockerignore +++ b/.dockerignore @@ -28,10 +28,11 @@ build/ *~ .DS_Store -# Documentation (keep LICENSE and README) +# Documentation (keep runtime docs copied by Dockerfile) docs/ *.md !README.md +!README-reqif-ingest-cli.md !LICENSE # CI/CD @@ -52,7 +53,7 @@ evidence_store/ !evidence_store/.gitkeep # Development tools -justfile +!justfile Dockerfile .dockerignore docker-compose.yml diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 9308fb3..eedf683 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -9,7 +9,15 @@ on: pull_request: branches: - main # Test builds on PRs + schedule: + - cron: '0 6 * * 1' workflow_dispatch: # Allow manual triggers + inputs: + build_ingest_full: + description: "Build ingest-full image" + required: false + default: false + type: boolean env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true @@ -67,9 +75,12 @@ jobs: - name: Build and push Docker image id: build + env: + BUILD_START: ${{ github.run_id }}-${{ github.run_attempt }}-${{ github.job }} uses: docker/build-push-action@v6 with: context: . + target: runtime-lite platforms: linux/amd64,linux/arm64 push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} @@ -79,6 +90,24 @@ jobs: build-args: | PYTHON_VERSION=3.10 OPA_VERSION=0.71.0 + + - name: Build runtime-lite image locally for size metrics + if: always() + run: | + set -euo pipefail + START_SECONDS=$(date +%s) + docker buildx build \ + --platform linux/amd64 \ + --target runtime-lite \ + --load \ + -t local/reqif-opa-mcp:runtime-lite \ + . + END_SECONDS=$(date +%s) + SIZE_BYTES=$(docker image inspect local/reqif-opa-mcp:runtime-lite --format '{{.Size}}') + DURATION_SECONDS=$((END_SECONDS - START_SECONDS)) + echo "## Runtime-lite build metrics" >> "$GITHUB_STEP_SUMMARY" + echo "- Duration (s): ${DURATION_SECONDS}" >> "$GITHUB_STEP_SUMMARY" + echo "- Size (bytes): ${SIZE_BYTES}" >> "$GITHUB_STEP_SUMMARY" - name: Generate artifact attestation if: github.event_name != 'pull_request' && steps.build.outputs.digest != '' @@ -104,3 +133,51 @@ jobs: echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY echo "docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" >> $GITHUB_STEP_SUMMARY echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + + build-ingest-full: + runs-on: ubuntu-latest + if: github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && inputs.build_ingest_full) + permissions: + contents: read + packages: write + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push ingest-full image + uses: docker/build-push-action@v6 + with: + context: . + target: ingest-full + platforms: linux/amd64,linux/arm64 + push: true + tags: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:ingest-full + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:sha-${{ github.sha }}-ingest-full + cache-from: type=gha + cache-to: type=gha,mode=max + build-args: | + PYTHON_VERSION=3.10 + OPA_VERSION=0.71.0 + + - name: Smoke test ingest-full docling import + run: | + set -euo pipefail + docker buildx build \ + --platform linux/amd64 \ + --target ingest-full \ + --load \ + -t local/reqif-opa-mcp:ingest-full \ + . + docker run --rm local/reqif-opa-mcp:ingest-full \ + python -c "import docling; print(docling.__version__)" diff --git a/Dockerfile b/Dockerfile index 123e83c..635b822 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,8 +2,8 @@ ARG PYTHON_VERSION=3.10 ARG OPA_VERSION=0.71.0 -# Builder: Install Python deps -FROM python:${PYTHON_VERSION}-slim AS builder +# Builder: install runtime-lite dependencies only. +FROM python:${PYTHON_VERSION}-slim AS deps-lite COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ WORKDIR /build COPY pyproject.toml uv.lock ./ @@ -11,7 +11,7 @@ RUN uv sync --frozen --no-dev --no-install-project COPY reqif_mcp/ reqif_mcp/ COPY reqif_ingest_cli/ reqif_ingest_cli/ COPY README.md LICENSE ./ -RUN uv sync --frozen --no-dev +RUN uv sync --frozen --no-dev --extra ingest-lite # OPA binary FROM alpine:latest AS opa-downloader @@ -23,16 +23,16 @@ RUN apk add --no-cache curl && \ curl -L -o /opa/opa "https://openpolicyagent.org/downloads/v${OPA_VERSION}/opa_linux_${ARCH}_static" && \ chmod +x /opa/opa -# Runtime -FROM python:${PYTHON_VERSION}-slim +# Runtime-lite image: default CI/runtime target. +FROM python:${PYTHON_VERSION}-slim AS runtime-lite LABEL org.opencontainers.image.title="ReqIF-OPA-MCP" \ - org.opencontainers.image.description="ReqIF compliance gate with OPA and SARIF" \ + org.opencontainers.image.description="ReqIF compliance gate with OPA and SARIF (runtime-lite)" \ org.opencontainers.image.vendor="PromptExecution" \ org.opencontainers.image.source="https://github.com/PromptExecution/reqif-opa-mcp" RUN groupadd -r reqif && useradd -r -g reqif -u 1000 reqif WORKDIR /app -COPY --from=builder --chown=reqif:reqif /build/.venv /app/.venv +COPY --from=deps-lite --chown=reqif:reqif /build/.venv /app/.venv COPY --from=opa-downloader /opa/opa /usr/local/bin/opa COPY --chown=reqif:reqif reqif_mcp/ /app/reqif_mcp/ COPY --chown=reqif:reqif reqif_ingest_cli/ /app/reqif_ingest_cli/ @@ -53,3 +53,44 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD python -c "import sys, urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health', timeout=5).read(); sys.exit(0)" EXPOSE 8000 CMD ["python", "-m", "reqif_mcp", "--http", "--host", "0.0.0.0", "--port", "8000"] + +# Builder: install full ingestion plus optional LLM review dependencies. +FROM deps-lite AS deps-full +RUN uv sync --frozen --no-dev --extra ingest-full --extra llm-review + +# Ingest-full image for richer docling extraction. +FROM python:${PYTHON_VERSION}-slim AS ingest-full +LABEL org.opencontainers.image.title="ReqIF-OPA-MCP" \ + org.opencontainers.image.description="ReqIF compliance gate with OPA and SARIF (ingest-full)" \ + org.opencontainers.image.vendor="PromptExecution" \ + org.opencontainers.image.source="https://github.com/PromptExecution/reqif-opa-mcp" + +RUN groupadd -r reqif && useradd -r -g reqif -u 1000 reqif +WORKDIR /app +COPY --from=deps-full --chown=reqif:reqif /build/.venv /app/.venv +COPY --from=opa-downloader /opa/opa /usr/local/bin/opa +COPY --chown=reqif:reqif reqif_mcp/ /app/reqif_mcp/ +COPY --chown=reqif:reqif reqif_ingest_cli/ /app/reqif_ingest_cli/ +COPY --chown=reqif:reqif agents/ /app/agents/ +COPY --chown=reqif:reqif schemas/ /app/schemas/ +COPY --chown=reqif:reqif samples/ /app/samples/ +COPY --chown=reqif:reqif opa-bundles/ /app/opa-bundles/ +COPY --chown=reqif:reqif justfile pyproject.toml README.md README-reqif-ingest-cli.md LICENSE /app/ +RUN mkdir -p /app/evidence_store/{events,sarif,decision_logs} /app/artifacts/{tests,selftest,demo} && \ + chown -R reqif:reqif /app + +ENV PATH="/app/.venv/bin:$PATH" \ + PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 + +USER reqif +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD python -c "import sys, urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health', timeout=5).read(); sys.exit(0)" +EXPOSE 8000 +CMD ["python", "-m", "reqif_mcp", "--http", "--host", "0.0.0.0", "--port", "8000"] + +# Demo-full can carry sample/selftest assets while sharing ingest-full runtime. +FROM ingest-full AS demo-full + +# Default build target stays lean. +FROM runtime-lite AS default diff --git a/README-reqif-ingest-cli.md b/README-reqif-ingest-cli.md index 71650d6..ddb756e 100644 --- a/README-reqif-ingest-cli.md +++ b/README-reqif-ingest-cli.md @@ -13,6 +13,15 @@ Engineer view: - use this surface when the starting point is an artifact such as XLSX, PDF, DOCX, or Markdown - use `reqif_mcp` when the starting point is already ReqIF +Install profiles: + +- `uv sync --extra ingest-lite` + - XLSX and text-layer PDF extraction (`openpyxl`, `pypdf`) +- `uv sync --extra ingest-full` + - richer DOCX/Markdown/PDF extraction (`docling` stack) +- `uv sync --extra llm-review` + - optional Foundry/OpenAI-compatible review adapter + ```mermaid flowchart LR ART[Source artifact] --> EXT[Deterministic extraction] diff --git a/README.md b/README.md index 6792eb8..0637d75 100644 --- a/README.md +++ b/README.md @@ -233,8 +233,11 @@ Current implementation notes: - PDF prefers `pypdf` for offline text-layer extraction - `docling` remains the richer path for DOCX and Markdown - Azure Foundry integration is optional and not part of the deterministic first pass +- Install `ingest-lite` with `uv sync --extra ingest-lite` for XLSX plus text-layer PDF support +- Install `ingest-full` with `uv sync --extra ingest-full` for docling-backed extraction +- Install `llm-review` with `uv sync --extra llm-review` for optional Foundry quality-eval hooks -See `README-reqif-ingest-cli.md` for command details. +See [README-reqif-ingest-cli.md](README-reqif-ingest-cli.md) for command details. ## Samples and Fixtures diff --git a/ado/templates/ci.yml b/ado/templates/ci.yml index 2d38204..8d7a765 100644 --- a/ado/templates/ci.yml +++ b/ado/templates/ci.yml @@ -36,7 +36,7 @@ jobs: mkdir -p ${{ parameters.artifactRoot }}/tests export PATH="$HOME/.local/bin:$PATH" export UV_CACHE_DIR=.uv-cache - uv sync + uv sync --extra ingest-lite just ci-check "${{ parameters.artifactRoot }}/tests/junit.xml" uv run ruff check . > "${{ parameters.artifactRoot }}/tests/ruff.txt" uv run mypy reqif_mcp reqif_ingest_cli > "${{ parameters.artifactRoot }}/tests/mypy.txt" diff --git a/ado/templates/selftest.yml b/ado/templates/selftest.yml index 9eb54ec..1a358f2 100644 --- a/ado/templates/selftest.yml +++ b/ado/templates/selftest.yml @@ -38,7 +38,7 @@ jobs: set -euo pipefail export PATH="$HOME/.local/bin:$PATH" export UV_CACHE_DIR=.uv-cache - uv sync + uv sync --extra ingest-lite just demo-artifacts "${{ parameters.artifactRoot }}/demo" "${{ parameters.artifactRoot }}/selftest" "${{ parameters.enforceGateFailures }}" continueOnError: ${{ eq(parameters.enforceGateFailures, 'false') }} displayName: Build self-test and demo artifacts diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b341881..a3af4a6 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,6 +20,7 @@ variables: containerAppEnvironmentName: 'reqif-opa-demo-env' containerAppName: 'reqif-opa-mcp-demo' containerPort: '8000' + imageTarget: 'runtime-lite' stages: - stage: ci @@ -61,6 +62,7 @@ stages: set -euo pipefail az acr login --name $(acrName) docker build \ + --target $(imageTarget) \ -t $(acrName).azurecr.io/$(imageRepository):$(Build.SourceVersion) \ -t $(acrName).azurecr.io/$(imageRepository):latest \ . diff --git a/justfile b/justfile index 757bad1..d0d3515 100644 --- a/justfile +++ b/justfile @@ -7,7 +7,7 @@ default: # Dev install: - uv sync + uv sync --extra ingest-lite dev: uv run python -m reqif_mcp @@ -216,8 +216,14 @@ demo-artifacts out="artifacts/demo" selftest_out="artifacts/selftest" enforce="f exit 0 # Docker -docker-build tag="latest": - docker build -t ghcr.io/promptexecution/reqif-opa-mcp:{{tag}} . +docker-build tag="latest" target="runtime-lite": + docker build --target {{target}} -t ghcr.io/promptexecution/reqif-opa-mcp:{{tag}} . + +build-runtime-lite tag="runtime-lite": + just docker-build {{tag}} runtime-lite + +build-ingest-full tag="ingest-full": + just docker-build {{tag}} ingest-full docker-run tag="latest" port="8000": docker run --rm -p {{port}}:8000 \ @@ -225,6 +231,13 @@ docker-run tag="latest" port="8000": -v $(pwd)/opa-bundles:/app/opa-bundles \ ghcr.io/promptexecution/reqif-opa-mcp:{{tag}} +smoke-runtime-lite tag="runtime-lite": + docker run --rm -d --name reqif-runtime-lite -p 18000:8000 \ + ghcr.io/promptexecution/reqif-opa-mcp:{{tag}} + sleep 3 + curl -fsSL http://127.0.0.1:18000/health + docker rm -f reqif-runtime-lite + docker-push tag="latest": docker push ghcr.io/promptexecution/reqif-opa-mcp:{{tag}} diff --git a/pyproject.toml b/pyproject.toml index 28ec94f..b5135e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,12 +8,22 @@ dependencies = [ "python-ulid>=3.1.0", "returns>=0.26.0", "fastmcp==3.0.0b1", - "docling>=2.78.0", "reqif>=0.0.48", - "azure-ai-inference>=1.0.0b9", +] + +[project.optional-dependencies] +ingest-lite = [ + "openpyxl>=3.1.5", + "pypdf>=6.8.0", +] +ingest-full = [ + "docling>=2.79.0", "openpyxl>=3.1.5", "pypdf>=6.8.0", ] +llm-review = [ + "azure-ai-inference>=1.0.0b9", +] [dependency-groups] dev = [ diff --git a/reqif_ingest_cli/docling_adapter.py b/reqif_ingest_cli/docling_adapter.py index 84e954f..987c187 100644 --- a/reqif_ingest_cli/docling_adapter.py +++ b/reqif_ingest_cli/docling_adapter.py @@ -47,19 +47,21 @@ def extract_docling_document( return artifact_result artifact = artifact_result.unwrap() - from docling.datamodel.base_models import InputFormat - from docling.datamodel.pipeline_options import PdfPipelineOptions - from docling.document_converter import DocumentConverter, PdfFormatOption - if suffix == ".pdf": - pdf_graph = None try: pdf_graph = _extract_pdf_with_pypdf(resolved, artifact, None) - except Exception: - pdf_graph = None - else: if pdf_graph.nodes: return Success(pdf_graph) + except Exception: + pass + + try: + from docling.datamodel.base_models import InputFormat + from docling.datamodel.pipeline_options import PdfPipelineOptions + from docling.document_converter import DocumentConverter, PdfFormatOption + except Exception as exc: + return Failure(_missing_docling_error(resolved, exc)) + try: converter = DocumentConverter( allowed_formats=[InputFormat.PDF], @@ -80,6 +82,10 @@ def extract_docling_document( except Exception as exc: return Success(_extract_pdf_with_pypdf(resolved, artifact, exc)) else: + try: + from docling.document_converter import DocumentConverter + except Exception as exc: + return Failure(_missing_docling_error(resolved, exc)) converter = DocumentConverter() conversion = converter.convert(resolved) document = conversion.document @@ -218,7 +224,7 @@ def _extract_pdf_with_pypdf( path: Path, artifact: ArtifactRecord, docling_error: Exception | None, -) -> DocumentGraph: + ) -> DocumentGraph: """Fallback PDF extractor when Docling cannot initialize offline models.""" from pypdf import PdfReader @@ -267,3 +273,12 @@ def _extract_pdf_with_pypdf( "fallback_reason": str(docling_error) if docling_error is not None else None, }, ) + + +def _missing_docling_error(path: Path, import_error: Exception) -> ValueError: + """Return a guided install error when docling extras are missing.""" + return ValueError( + f"Docling extractor is not available for {path.name}. " + f"Install optional dependency group 'ingest-full' via 'uv sync --extra ingest-full'. " + f"Import error: {import_error}" + ) diff --git a/tests/test_reqif_ingest_cli_foundry.py b/tests/test_reqif_ingest_cli_foundry.py index ed73181..e410a58 100644 --- a/tests/test_reqif_ingest_cli_foundry.py +++ b/tests/test_reqif_ingest_cli_foundry.py @@ -2,6 +2,9 @@ from __future__ import annotations +from importlib.util import find_spec + +import pytest from returns.result import Failure, Success from reqif_ingest_cli.foundry_adapter import ( @@ -22,6 +25,7 @@ def test_load_foundry_chat_config_requires_expected_env_keys() -> None: assert "REQIF_INGEST_FOUNDRY_ENDPOINT" in str(error) +@pytest.mark.skipif(find_spec("azure.ai.inference") is None, reason="Install extra 'llm-review' for Foundry client tests.") def test_load_foundry_chat_config_and_create_client() -> None: """Foundry client creation should not require a live network call.""" result = load_foundry_chat_config( diff --git a/uv.lock b/uv.lock index 4191dd1..b271469 100644 --- a/uv.lock +++ b/uv.lock @@ -107,15 +107,15 @@ wheels = [ [[package]] name = "azure-core" -version = "1.38.2" +version = "1.38.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "requests" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/00/fe/5c7710bc611a4070d06ba801de9a935cc87c3d4b689c644958047bdf2cba/azure_core-1.38.2.tar.gz", hash = "sha256:67562857cb979217e48dc60980243b61ea115b77326fa93d83b729e7ff0482e7", size = 363734 } +sdist = { url = "https://files.pythonhosted.org/packages/c8/29/9641b73248745774a52c7ce7f965ed1febbdea787ec21caad3ae6891d18a/azure_core-1.38.3.tar.gz", hash = "sha256:a7931fd445cb4af8802c6f39c6a326bbd1e34b115846550a8245fa656ead6f8e", size = 367267 } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/23/6371a551800d3812d6019cd813acd985f9fac0fedc1290129211a73da4ae/azure_core-1.38.2-py3-none-any.whl", hash = "sha256:074806c75cf239ea284a33a66827695ef7aeddac0b4e19dda266a93e4665ead9", size = 217957 }, + { url = "https://files.pythonhosted.org/packages/9a/3d/ac86083efa45a439d0bbfb7947615227813d368b9e1e93d23fd30de6fec0/azure_core-1.38.3-py3-none-any.whl", hash = "sha256:bf59d29765bf4748ab9edf25f98a30b7ea9797f43e367c06d846a30b29c1f845", size = 218231 }, ] [[package]] @@ -511,7 +511,7 @@ wheels = [ [[package]] name = "docling" -version = "2.78.0" +version = "2.79.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "accelerate" }, @@ -548,9 +548,9 @@ dependencies = [ { name = "tqdm" }, { name = "typer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/02/e8/2147c9963efea2698b8fd983ae7f2acf290ce5246120e6e662a545ae4b21/docling-2.78.0.tar.gz", hash = "sha256:46ac9fb208cbbbbc9feba7bc650b660df4706a92f1ed6b26a952a9bcaf72cf04", size = 385012 } +sdist = { url = "https://files.pythonhosted.org/packages/78/85/263ccbb2b448c56ddad4636d7432db6a4b9a94001aabb7aaac31654a5b02/docling-2.79.0.tar.gz", hash = "sha256:448015bfb5da535f9e21232a07d1ae28914ee2613614cf82d7a6a2161daf6fd6", size = 386508 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/26/bdc2dff2e8be4b178f2994b5a1eca61250ea0d234521d6f1b50ef87dbffd/docling-2.78.0-py3-none-any.whl", hash = "sha256:237ba66a253962c87e9278c15d872154c750e08ea424bccf8bc91adebaf45ddc", size = 415390 }, + { url = "https://files.pythonhosted.org/packages/0e/2b/d700d0489a594c65e4a03c290363a706d7ce660e16b511fb5e35790d5bc9/docling-2.79.0-py3-none-any.whl", hash = "sha256:bbca4bf7fc7b42ec5cab066db718ce8240a93eb4c16ad0233101d59ce7ef440d", size = 421713 }, ] [[package]] @@ -752,11 +752,11 @@ wheels = [ [[package]] name = "filelock" -version = "3.25.1" +version = "3.25.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b3/8b/4c32ecde6bea6486a2a5d05340e695174351ff6b06cf651a74c005f9df00/filelock-3.25.1.tar.gz", hash = "sha256:b9a2e977f794ef94d77cdf7d27129ac648a61f585bff3ca24630c1629f701aa9", size = 40319 } +sdist = { url = "https://files.pythonhosted.org/packages/94/b8/00651a0f559862f3bb7d6f7477b192afe3f583cc5e26403b44e59a55ab34/filelock-3.25.2.tar.gz", hash = "sha256:b64ece2b38f4ca29dd3e810287aa8c48182bbecd1ae6e9ae126c9b35f1382694", size = 40480 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/b8/2f664b56a3b4b32d28d3d106c71783073f712ba43ff6d34b9ea0ce36dc7b/filelock-3.25.1-py3-none-any.whl", hash = "sha256:18972df45473c4aa2c7921b609ee9ca4925910cc3a0fb226c96b92fc224ef7bf", size = 26720 }, + { url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl", hash = "sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70", size = 26759 }, ] [[package]] @@ -788,34 +788,34 @@ wheels = [ [[package]] name = "hf-xet" -version = "1.3.2" +version = "1.4.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8b/cb/9bb543bd987ffa1ee48202cc96a756951b734b79a542335c566148ade36c/hf_xet-1.3.2.tar.gz", hash = "sha256:e130ee08984783d12717444e538587fa2119385e5bd8fc2bb9f930419b73a7af", size = 643646 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/49/75/462285971954269432aad2e7938c5c7ff9ec7d60129cec542ab37121e3d6/hf_xet-1.3.2-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:335a8f36c55fd35a92d0062f4e9201b4015057e62747b7e7001ffb203c0ee1d2", size = 3761019 }, - { url = "https://files.pythonhosted.org/packages/35/56/987b0537ddaf88e17192ea09afa8eca853e55f39a4721578be436f8409df/hf_xet-1.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c1ae4d3a716afc774e66922f3cac8206bfa707db13f6a7e62dfff74bfc95c9a8", size = 3521565 }, - { url = "https://files.pythonhosted.org/packages/a8/5c/7e4a33a3d689f77761156cc34558047569e54af92e4d15a8f493229f6767/hf_xet-1.3.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6dbdf231efac0b9b39adcf12a07f0c030498f9212a18e8c50224d0e84ab803d", size = 4176494 }, - { url = "https://files.pythonhosted.org/packages/6b/b3/71e856bf9d9a69b3931837e8bf22e095775f268c8edcd4a9e8c355f92484/hf_xet-1.3.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c1980abfb68ecf6c1c7983379ed7b1e2b49a1aaf1a5aca9acc7d48e5e2e0a961", size = 3955601 }, - { url = "https://files.pythonhosted.org/packages/63/d7/aecf97b3f0a981600a67ff4db15e2d433389d698a284bb0ea5d8fcdd6f7f/hf_xet-1.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1c88fbd90ad0d27c46b77a445f0a436ebaa94e14965c581123b68b1c52f5fd30", size = 4154770 }, - { url = "https://files.pythonhosted.org/packages/e2/e1/3af961f71a40e09bf5ee909842127b6b00f5ab4ee3817599dc0771b79893/hf_xet-1.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:35b855024ca37f2dd113ac1c08993e997fbe167b9d61f9ef66d3d4f84015e508", size = 4394161 }, - { url = "https://files.pythonhosted.org/packages/a1/c3/859509bade9178e21b8b1db867b8e10e9f817ab9ac1de77cb9f461ced765/hf_xet-1.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:31612ba0629046e425ba50375685a2586e11fb9144270ebabd75878c3eaf6378", size = 3637377 }, - { url = "https://files.pythonhosted.org/packages/05/7f/724cfbef4da92d577b71f68bf832961c8919f36c60d28d289a9fc9d024d4/hf_xet-1.3.2-cp313-cp313t-win_arm64.whl", hash = "sha256:433c77c9f4e132b562f37d66c9b22c05b5479f243a1f06a120c1c06ce8b1502a", size = 3497875 }, - { url = "https://files.pythonhosted.org/packages/ba/75/9d54c1ae1d05fb704f977eca1671747babf1957f19f38ae75c5933bc2dc1/hf_xet-1.3.2-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:c34e2c7aefad15792d57067c1c89b2b02c1bbaeabd7f8456ae3d07b4bbaf4094", size = 3761076 }, - { url = "https://files.pythonhosted.org/packages/f2/8a/08a24b6c6f52b5d26848c16e4b6d790bb810d1bf62c3505bed179f7032d3/hf_xet-1.3.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4bc995d6c41992831f762096020dc14a65fdf3963f86ffed580b596d04de32e3", size = 3521745 }, - { url = "https://files.pythonhosted.org/packages/b5/db/a75cf400dd8a1a8acf226a12955ff6ee999f272dfc0505bafd8079a61267/hf_xet-1.3.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:959083c89dee30f7d6f890b36cdadda823386c4de63b1a30384a75bfd2ae995d", size = 4176301 }, - { url = "https://files.pythonhosted.org/packages/01/40/6c4c798ffdd83e740dd3925c4e47793b07442a9efa3bc3866ba141a82365/hf_xet-1.3.2-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:cfa760888633b08c01b398d212ce7e8c0d7adac6c86e4b20dfb2397d8acd78ee", size = 3955437 }, - { url = "https://files.pythonhosted.org/packages/0c/09/9a3aa7c5f07d3e5cc57bb750d12a124ffa72c273a87164bd848f9ac5cc14/hf_xet-1.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3155a02e083aa21fd733a7485c7c36025e49d5975c8d6bda0453d224dd0b0ac4", size = 4154535 }, - { url = "https://files.pythonhosted.org/packages/ae/e0/831f7fa6d90cb47a230bc23284b502c700e1483bbe459437b3844cdc0776/hf_xet-1.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:91b1dc03c31cbf733d35dc03df7c5353686233d86af045e716f1e0ea4a2673cf", size = 4393891 }, - { url = "https://files.pythonhosted.org/packages/ab/96/6ed472fdce7f8b70f5da6e3f05be76816a610063003bfd6d9cea0bbb58a3/hf_xet-1.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:211f30098512d95e85ad03ae63bd7dd2c4df476558a5095d09f9e38e78cbf674", size = 3637583 }, - { url = "https://files.pythonhosted.org/packages/8b/e8/a069edc4570b3f8e123c0b80fadc94530f3d7b01394e1fc1bb223339366c/hf_xet-1.3.2-cp314-cp314t-win_arm64.whl", hash = "sha256:4a6817c41de7c48ed9270da0b02849347e089c5ece9a0e72ae4f4b3a57617f82", size = 3497977 }, - { url = "https://files.pythonhosted.org/packages/d8/28/dbb024e2e3907f6f3052847ca7d1a2f7a3972fafcd53ff79018977fcb3e4/hf_xet-1.3.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f93b7595f1d8fefddfede775c18b5c9256757824f7f6832930b49858483cd56f", size = 3763961 }, - { url = "https://files.pythonhosted.org/packages/e4/71/b99aed3823c9d1795e4865cf437d651097356a3f38c7d5877e4ac544b8e4/hf_xet-1.3.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:a85d3d43743174393afe27835bde0cd146e652b5fcfdbcd624602daef2ef3259", size = 3526171 }, - { url = "https://files.pythonhosted.org/packages/9d/ca/907890ce6ef5598b5920514f255ed0a65f558f820515b18db75a51b2f878/hf_xet-1.3.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7c2a054a97c44e136b1f7f5a78f12b3efffdf2eed3abc6746fc5ea4b39511633", size = 4180750 }, - { url = "https://files.pythonhosted.org/packages/8c/ad/bc7f41f87173d51d0bce497b171c4ee0cbde1eed2d7b4216db5d0ada9f50/hf_xet-1.3.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:06b724a361f670ae557836e57801b82c75b534812e351a87a2c739f77d1e0635", size = 3961035 }, - { url = "https://files.pythonhosted.org/packages/73/38/600f4dda40c4a33133404d9fe644f1d35ff2d9babb4d0435c646c63dd107/hf_xet-1.3.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:305f5489d7241a47e0458ef49334be02411d1d0f480846363c1c8084ed9916f7", size = 4161378 }, - { url = "https://files.pythonhosted.org/packages/00/b3/7bc1ff91d1ac18420b7ad1e169b618b27c00001b96310a89f8a9294fe509/hf_xet-1.3.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:06cdbde243c85f39a63b28e9034321399c507bcd5e7befdd17ed2ccc06dfe14e", size = 4398020 }, - { url = "https://files.pythonhosted.org/packages/2b/0b/99bfd948a3ed3620ab709276df3ad3710dcea61976918cce8706502927af/hf_xet-1.3.2-cp37-abi3-win_amd64.whl", hash = "sha256:9298b47cce6037b7045ae41482e703c471ce36b52e73e49f71226d2e8e5685a1", size = 3641624 }, - { url = "https://files.pythonhosted.org/packages/cc/02/9a6e4ca1f3f73a164c0cd48e41b3cc56585dcc37e809250de443d673266f/hf_xet-1.3.2-cp37-abi3-win_arm64.whl", hash = "sha256:83d8ec273136171431833a6957e8f3af496bee227a0fe47c7b8b39c106d1749a", size = 3503976 }, +sdist = { url = "https://files.pythonhosted.org/packages/09/08/23c84a26716382c89151b5b447b4beb19e3345f3a93d3b73009a71a57ad3/hf_xet-1.4.2.tar.gz", hash = "sha256:b7457b6b482d9e0743bd116363239b1fa904a5e65deede350fbc0c4ea67c71ea", size = 672357 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/06/e8cf74c3c48e5485c7acc5a990d0d8516cdfb5fdf80f799174f1287cc1b5/hf_xet-1.4.2-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ac8202ae1e664b2c15cdfc7298cbb25e80301ae596d602ef7870099a126fcad4", size = 3796125 }, + { url = "https://files.pythonhosted.org/packages/66/d4/b73ebab01cbf60777323b7de9ef05550790451eb5172a220d6b9845385ec/hf_xet-1.4.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6d2f8ee39fa9fba9af929f8c0d0482f8ee6e209179ad14a909b6ad78ffcb7c81", size = 3555985 }, + { url = "https://files.pythonhosted.org/packages/ff/e7/ded6d1bd041c3f2bca9e913a0091adfe32371988e047dd3a68a2463c15a2/hf_xet-1.4.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4642a6cf249c09da8c1f87fe50b24b2a3450b235bf8adb55700b52f0ea6e2eb6", size = 4212085 }, + { url = "https://files.pythonhosted.org/packages/97/c1/a0a44d1f98934f7bdf17f7a915b934f9fca44bb826628c553589900f6df8/hf_xet-1.4.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:769431385e746c92dc05492dde6f687d304584b89c33d79def8367ace06cb555", size = 3988266 }, + { url = "https://files.pythonhosted.org/packages/7a/82/be713b439060e7d1f1d93543c8053d4ef2fe7e6922c5b31642eaa26f3c4b/hf_xet-1.4.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c9dd1c1bc4cc56168f81939b0e05b4c36dd2d28c13dc1364b17af89aa0082496", size = 4188513 }, + { url = "https://files.pythonhosted.org/packages/21/a6/cbd4188b22abd80ebd0edbb2b3e87f2633e958983519980815fb8314eae5/hf_xet-1.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:fca58a2ae4e6f6755cc971ac6fcdf777ea9284d7e540e350bb000813b9a3008d", size = 4428287 }, + { url = "https://files.pythonhosted.org/packages/b2/4e/84e45b25e2e3e903ed3db68d7eafa96dae9a1d1f6d0e7fc85120347a852f/hf_xet-1.4.2-cp313-cp313t-win_amd64.whl", hash = "sha256:163aab46854ccae0ab6a786f8edecbbfbaa38fcaa0184db6feceebf7000c93c0", size = 3665574 }, + { url = "https://files.pythonhosted.org/packages/ee/71/c5ac2b9a7ae39c14e91973035286e73911c31980fe44e7b1d03730c00adc/hf_xet-1.4.2-cp313-cp313t-win_arm64.whl", hash = "sha256:09b138422ecbe50fd0c84d4da5ff537d27d487d3607183cd10e3e53f05188e82", size = 3528760 }, + { url = "https://files.pythonhosted.org/packages/1e/0f/fcd2504015eab26358d8f0f232a1aed6b8d363a011adef83fe130bff88f7/hf_xet-1.4.2-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:949dcf88b484bb9d9276ca83f6599e4aa03d493c08fc168c124ad10b2e6f75d7", size = 3796493 }, + { url = "https://files.pythonhosted.org/packages/82/56/19c25105ff81731ca6d55a188b5de2aa99d7a2644c7aa9de1810d5d3b726/hf_xet-1.4.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:41659966020d59eb9559c57de2cde8128b706a26a64c60f0531fa2318f409418", size = 3555797 }, + { url = "https://files.pythonhosted.org/packages/bf/e3/8933c073186849b5e06762aa89847991d913d10a95d1603eb7f2c3834086/hf_xet-1.4.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c588e21d80010119458dd5d02a69093f0d115d84e3467efe71ffb2c67c19146", size = 4212127 }, + { url = "https://files.pythonhosted.org/packages/eb/01/f89ebba4e369b4ed699dcb60d3152753870996f41c6d22d3d7cac01310e1/hf_xet-1.4.2-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a296744d771a8621ad1d50c098d7ab975d599800dae6d48528ba3944e5001ba0", size = 3987788 }, + { url = "https://files.pythonhosted.org/packages/84/4d/8a53e5ffbc2cc33bbf755382ac1552c6d9af13f623ed125fe67cc3e6772f/hf_xet-1.4.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f563f7efe49588b7d0629d18d36f46d1658fe7e08dce3fa3d6526e1c98315e2d", size = 4188315 }, + { url = "https://files.pythonhosted.org/packages/d1/b8/b7a1c1b5592254bd67050632ebbc1b42cc48588bf4757cb03c2ef87e704a/hf_xet-1.4.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5b2e0132c56d7ee1bf55bdb638c4b62e7106f6ac74f0b786fed499d5548c5570", size = 4428306 }, + { url = "https://files.pythonhosted.org/packages/a0/0c/40779e45b20e11c7c5821a94135e0207080d6b3d76e7b78ccb413c6f839b/hf_xet-1.4.2-cp314-cp314t-win_amd64.whl", hash = "sha256:2f45c712c2fa1215713db10df6ac84b49d0e1c393465440e9cb1de73ecf7bbf6", size = 3665826 }, + { url = "https://files.pythonhosted.org/packages/51/4c/e2688c8ad1760d7c30f7c429c79f35f825932581bc7c9ec811436d2f21a0/hf_xet-1.4.2-cp314-cp314t-win_arm64.whl", hash = "sha256:6d53df40616f7168abfccff100d232e9d460583b9d86fa4912c24845f192f2b8", size = 3529113 }, + { url = "https://files.pythonhosted.org/packages/b4/86/b40b83a2ff03ef05c4478d2672b1fc2b9683ff870e2b25f4f3af240f2e7b/hf_xet-1.4.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:71f02d6e4cdd07f344f6844845d78518cc7186bd2bc52d37c3b73dc26a3b0bc5", size = 3800339 }, + { url = "https://files.pythonhosted.org/packages/64/2e/af4475c32b4378b0e92a587adb1aa3ec53e3450fd3e5fe0372a874531c00/hf_xet-1.4.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e9b38d876e94d4bdcf650778d6ebbaa791dd28de08db9736c43faff06ede1b5a", size = 3559664 }, + { url = "https://files.pythonhosted.org/packages/3c/4c/781267da3188db679e601de18112021a5cb16506fe86b246e22c5401a9c4/hf_xet-1.4.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:77e8c180b7ef12d8a96739a4e1e558847002afe9ea63b6f6358b2271a8bdda1c", size = 4217422 }, + { url = "https://files.pythonhosted.org/packages/68/47/d6cf4a39ecf6c7705f887a46f6ef5c8455b44ad9eb0d391aa7e8a2ff7fea/hf_xet-1.4.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c3b3c6a882016b94b6c210957502ff7877802d0dbda8ad142c8595db8b944271", size = 3992847 }, + { url = "https://files.pythonhosted.org/packages/2d/ef/e80815061abff54697239803948abc665c6b1d237102c174f4f7a9a5ffc5/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9d9a634cc929cfbaf2e1a50c0e532ae8c78fa98618426769480c58501e8c8ac2", size = 4193843 }, + { url = "https://files.pythonhosted.org/packages/54/75/07f6aa680575d9646c4167db6407c41340cbe2357f5654c4e72a1b01ca14/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6b0932eb8b10317ea78b7da6bab172b17be03bbcd7809383d8d5abd6a2233e04", size = 4432751 }, + { url = "https://files.pythonhosted.org/packages/cd/71/193eabd7e7d4b903c4aa983a215509c6114915a5a237525ec562baddb868/hf_xet-1.4.2-cp37-abi3-win_amd64.whl", hash = "sha256:ad185719fb2e8ac26f88c8100562dbf9dbdcc3d9d2add00faa94b5f106aea53f", size = 3671149 }, + { url = "https://files.pythonhosted.org/packages/b4/7e/ccf239da366b37ba7f0b36095450efae4a64980bdc7ec2f51354205fdf39/hf_xet-1.4.2-cp37-abi3-win_arm64.whl", hash = "sha256:32c012286b581f783653e718c1862aea5b9eb140631685bb0c5e7012c8719a87", size = 3533426 }, ] [[package]] @@ -1053,11 +1053,11 @@ wheels = [ [[package]] name = "latex2mathml" -version = "3.78.1" +version = "3.79.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1a/26/57b1034c08922d0aefea79430a5e0006ffaee4f0ec59d566613f667ab2f7/latex2mathml-3.78.1.tar.gz", hash = "sha256:f941db80bf41db33f31df87b304e8b588f8166b813b0257c11c98f7a9d0aac71", size = 74030 } +sdist = { url = "https://files.pythonhosted.org/packages/dd/8d/2161f46485d9c36c0fa0e1c997faf08bb7843027e59b549598e49f55f8bf/latex2mathml-3.79.0.tar.gz", hash = "sha256:11bde318c2d2d6fcdd105a07509d867cee2208f653278eb80243dec7ea77a0ce", size = 151103 } wheels = [ - { url = "https://files.pythonhosted.org/packages/3e/76/d661ea2e529c3d464f9efd73f9ac31626b45279eb4306e684054ea20e3d4/latex2mathml-3.78.1-py3-none-any.whl", hash = "sha256:f089b6d75e85b937f99693c93e8c16c0804008672c3dd2a3d25affd36f238100", size = 73892 }, + { url = "https://files.pythonhosted.org/packages/fd/92/56a954dd59637dd2ee013581fa3beea0821f17f2c07f818fc51dcc11fd10/latex2mathml-3.79.0-py3-none-any.whl", hash = "sha256:9f10720d4fcf6b22d1b81f6628237832419a7a29783c13aa92fa8d680165e63d", size = 73945 }, ] [[package]] @@ -3015,17 +3015,27 @@ name = "reqif-mcp" version = "0.1.0" source = { virtual = "." } dependencies = [ - { name = "azure-ai-inference" }, - { name = "docling" }, { name = "fastmcp" }, { name = "jsonschema" }, - { name = "openpyxl" }, - { name = "pypdf" }, { name = "python-ulid" }, { name = "reqif" }, { name = "returns" }, ] +[package.optional-dependencies] +ingest-full = [ + { name = "docling" }, + { name = "openpyxl" }, + { name = "pypdf" }, +] +ingest-lite = [ + { name = "openpyxl" }, + { name = "pypdf" }, +] +llm-review = [ + { name = "azure-ai-inference" }, +] + [package.dev-dependencies] dev = [ { name = "mypy" }, @@ -3036,12 +3046,14 @@ dev = [ [package.metadata] requires-dist = [ - { name = "azure-ai-inference", specifier = ">=1.0.0b9" }, - { name = "docling", specifier = ">=2.78.0" }, + { name = "azure-ai-inference", marker = "extra == 'llm-review'", specifier = ">=1.0.0b9" }, + { name = "docling", marker = "extra == 'ingest-full'", specifier = ">=2.79.0" }, { name = "fastmcp", specifier = "==3.0.0b1" }, { name = "jsonschema", specifier = ">=4.26.0" }, - { name = "openpyxl", specifier = ">=3.1.5" }, - { name = "pypdf", specifier = ">=6.8.0" }, + { name = "openpyxl", marker = "extra == 'ingest-full'", specifier = ">=3.1.5" }, + { name = "openpyxl", marker = "extra == 'ingest-lite'", specifier = ">=3.1.5" }, + { name = "pypdf", marker = "extra == 'ingest-full'", specifier = ">=6.8.0" }, + { name = "pypdf", marker = "extra == 'ingest-lite'", specifier = ">=6.8.0" }, { name = "python-ulid", specifier = ">=3.1.0" }, { name = "reqif", specifier = ">=0.0.48" }, { name = "returns", specifier = ">=0.26.0" }, @@ -3744,6 +3756,13 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0f/8b/4b61d6e13f7108f36910df9ab4b58fd389cc2520d54d81b88660804aad99/torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:418997cb02d0a0f1497cf6a09f63166f9f5df9f3e16c8a716ab76a72127c714f", size = 79423467 }, { url = "https://files.pythonhosted.org/packages/d3/54/a2ba279afcca44bbd320d4e73675b282fcee3d81400ea1b53934efca6462/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574", size = 79498202 }, { url = "https://files.pythonhosted.org/packages/ec/23/2c9fe0c9c27f7f6cb865abcea8a4568f29f00acaeadfc6a37f6801f84cb4/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e521c9f030a3774ed770a9c011751fb47c4d12029a3d6522116e48431f2ff89e", size = 79498254 }, + { url = "https://files.pythonhosted.org/packages/16/ee/efbd56687be60ef9af0c9c0ebe106964c07400eade5b0af8902a1d8cd58c/torch-2.10.0-3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a1ff626b884f8c4e897c4c33782bdacdff842a165fee79817b1dd549fdda1321", size = 915510070 }, + { url = "https://files.pythonhosted.org/packages/36/ab/7b562f1808d3f65414cd80a4f7d4bb00979d9355616c034c171249e1a303/torch-2.10.0-3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:ac5bdcbb074384c66fa160c15b1ead77839e3fe7ed117d667249afce0acabfac", size = 915518691 }, + { url = "https://files.pythonhosted.org/packages/b3/7a/abada41517ce0011775f0f4eacc79659bc9bc6c361e6bfe6f7052a6b9363/torch-2.10.0-3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:98c01b8bb5e3240426dcde1446eed6f40c778091c8544767ef1168fc663a05a6", size = 915622781 }, + { url = "https://files.pythonhosted.org/packages/ab/c6/4dfe238342ffdcec5aef1c96c457548762d33c40b45a1ab7033bb26d2ff2/torch-2.10.0-3-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:80b1b5bfe38eb0e9f5ff09f206dcac0a87aadd084230d4a36eea5ec5232c115b", size = 915627275 }, + { url = "https://files.pythonhosted.org/packages/d8/f0/72bf18847f58f877a6a8acf60614b14935e2f156d942483af1ffc081aea0/torch-2.10.0-3-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:46b3574d93a2a8134b3f5475cfb98e2eb46771794c57015f6ad1fb795ec25e49", size = 915523474 }, + { url = "https://files.pythonhosted.org/packages/f4/39/590742415c3030551944edc2ddc273ea1fdfe8ffb2780992e824f1ebee98/torch-2.10.0-3-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b1d5e2aba4eb7f8e87fbe04f86442887f9167a35f092afe4c237dfcaaef6e328", size = 915632474 }, + { url = "https://files.pythonhosted.org/packages/b6/8e/34949484f764dde5b222b7fe3fede43e4a6f0da9d7f8c370bb617d629ee2/torch-2.10.0-3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0228d20b06701c05a8f978357f657817a4a63984b0c90745def81c18aedfa591", size = 915523882 }, { url = "https://files.pythonhosted.org/packages/0c/1a/c61f36cfd446170ec27b3a4984f072fd06dab6b5d7ce27e11adb35d6c838/torch-2.10.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:5276fa790a666ee8becaffff8acb711922252521b28fbce5db7db5cf9cb2026d", size = 145992962 }, { url = "https://files.pythonhosted.org/packages/b5/60/6662535354191e2d1555296045b63e4279e5a9dbad49acf55a5d38655a39/torch-2.10.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:aaf663927bcd490ae971469a624c322202a2a1e68936eb952535ca4cd3b90444", size = 915599237 }, { url = "https://files.pythonhosted.org/packages/40/b8/66bbe96f0d79be2b5c697b2e0b187ed792a15c6c4b8904613454651db848/torch-2.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:a4be6a2a190b32ff5c8002a0977a25ea60e64f7ba46b1be37093c141d9c49aeb", size = 113720931 }, From bb63c6dd1576d6f2e1b7302bacaa57b013590248 Mon Sep 17 00:00:00 2001 From: Brian Horakh <35611074+elasticdotventures@users.noreply.github.com> Date: Fri, 13 Mar 2026 23:28:10 +1100 Subject: [PATCH 2/4] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> Signed-off-by: Brian Horakh <35611074+elasticdotventures@users.noreply.github.com> --- reqif_ingest_cli/docling_adapter.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/reqif_ingest_cli/docling_adapter.py b/reqif_ingest_cli/docling_adapter.py index 987c187..a5151e4 100644 --- a/reqif_ingest_cli/docling_adapter.py +++ b/reqif_ingest_cli/docling_adapter.py @@ -52,7 +52,10 @@ def extract_docling_document( pdf_graph = _extract_pdf_with_pypdf(resolved, artifact, None) if pdf_graph.nodes: return Success(pdf_graph) + except (ModuleNotFoundError, ImportError) as exc: + return Failure(_missing_pypdf_error(resolved, exc)) except Exception: + # Fall back to Docling-based extraction on unexpected PyPDF errors. pass try: @@ -178,6 +181,21 @@ def extract_docling_document( return Failure(exc) +def _missing_pypdf_error(path: Path, exc: Exception) -> Exception: + """ + Build a helpful error explaining that the PyPDF-based extractor is unavailable. + + This typically means the optional 'ingest-lite' extra (which provides 'pypdf') + is not installed. + """ + return RuntimeError( + "PyPDF-based PDF extraction failed for " + f"'{path}': {exc}. " + "The 'pypdf' dependency is optional; install the 'ingest-lite' extra, e.g.:\n" + " pip install 'reqif-ingest[ingest-lite]'" + ) + + def distill_docling_graph(graph: DocumentGraph) -> list[RequirementCandidate]: """Distill normative paragraph candidates from a Docling graph.""" candidates: list[RequirementCandidate] = [] From 610c004b024a104ab04302b36f3ea7a41e3e832b Mon Sep 17 00:00:00 2001 From: Brian Horakh <35611074+elasticdotventures@users.noreply.github.com> Date: Fri, 13 Mar 2026 23:28:24 +1100 Subject: [PATCH 3/4] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> Signed-off-by: Brian Horakh <35611074+elasticdotventures@users.noreply.github.com> --- reqif_ingest_cli/docling_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reqif_ingest_cli/docling_adapter.py b/reqif_ingest_cli/docling_adapter.py index a5151e4..1ce6bb2 100644 --- a/reqif_ingest_cli/docling_adapter.py +++ b/reqif_ingest_cli/docling_adapter.py @@ -62,7 +62,7 @@ def extract_docling_document( from docling.datamodel.base_models import InputFormat from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.document_converter import DocumentConverter, PdfFormatOption - except Exception as exc: + except (ImportError, ModuleNotFoundError) as exc: return Failure(_missing_docling_error(resolved, exc)) try: From 9d870e9ed9464cd02e2de66060f142c603a0f027 Mon Sep 17 00:00:00 2001 From: b Date: Mon, 16 Mar 2026 00:11:14 +1100 Subject: [PATCH 4/4] fix: limit docling import guidance to missing module --- reqif_ingest_cli/docling_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reqif_ingest_cli/docling_adapter.py b/reqif_ingest_cli/docling_adapter.py index 1ce6bb2..0190a40 100644 --- a/reqif_ingest_cli/docling_adapter.py +++ b/reqif_ingest_cli/docling_adapter.py @@ -87,7 +87,7 @@ def extract_docling_document( else: try: from docling.document_converter import DocumentConverter - except Exception as exc: + except (ImportError, ModuleNotFoundError) as exc: return Failure(_missing_docling_error(resolved, exc)) converter = DocumentConverter() conversion = converter.convert(resolved)