NVIDIA · WhereIs38 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/contrib/multilingual/.env.example b/contrib/multilingual/.env.example
@@ -0,0 +1,27 @@
+# SkillSpector Contrib Batch Scanner — Environment Configuration
+#
+# Copy to the repository root as .env:
+#   cp contrib/multilingual/.env.example .env
+#
+# The scanner also respects the upstream .env.example keys
+# (OPENAI_API_KEY, SKILLSPECTOR_PROVIDER, SKILLSPECTOR_MODEL).
+
+# Provider configuration
+SKILLSPECTOR_PROVIDER=openai
+SKILLSPECTOR_MODEL=deepseek-v4-flash
+
+# Single-key mode (standard OpenAI-compatible)
+OPENAI_API_KEY=sk-or-xxxxxxxxxxxxxxxxxxxxxxxx
+OPENAI_BASE_URL=https://api.deepseek.com/v1
+
+# Multi-key pool (recommended for batch scans).
+# Pipe-delimited: key|base_url|model.  Separate entries with newlines
+# or semicolons.  Supports up to 10 keys.  Leave unset to use
+# single-key mode above.
+# SKILLSPECTOR_API_KEYS="
+#   sk-or-xxx1|https://api.deepseek.com/v1|deepseek-v4-flash
+#   sk-or-xxx2|https://api.deepseek.com/v1|deepseek-v4-flash
+# "
+
+# Logging (DEBUG | INFO | WARNING | ERROR)
+SKILLSPECTOR_LOG_LEVEL=WARNING
diff --git a/contrib/multilingual/CONTRIBUTING.md b/contrib/multilingual/CONTRIBUTING.md
@@ -0,0 +1,149 @@
+# Contributing — Multilingual Batch Scanner
+
+> For developers who want to set up, test, and extend this module.
+
+---
+
+## Quick Start
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e .
+cp contrib/multilingual/.env.example .env   # edit with your API keys
+```
+
+Verify everything works:
+```bash
+python -m contrib.multilingual.batch_scan ./tests/fixtures/ -f terminal --workers 8
+```
+
+---
+
+## Project Map
+
+```
+contrib/multilingual/
+├── batch_scan.py          # CLI entry + ThreadPoolExecutor (start here)
+├── runner.py              # graph.invoke() wrapper + 7 patches + pool wiring (core)
+├── gap_fill.py            # GapFillAnalyzer — LLM pass for 8 uncovered rules
+├── api_pool.py            # ApiKeyPool — multi-key scheduler + 429 backoff
+├── detection.py           # Unicode script-ratio language detection
+├── annotation.py          # Finding language-compatibility labels
+├── discovery.py           # Recursive SKILL.md finder
+├── reports.py             # Terminal / JSON / Markdown formatters
+├── CONTRIBUTING.md        # this file
+│
+├── docs/
+│   ├── README.md          # user guide — all commands, test commands, reviewer index
+│   ├── DESIGN.md          # architecture — concurrency, patches, dual-patch mechanism
+│   ├── REVIEW_RESPONSE.md # PR #100 review response
+│   └── archive/           # deep dives, history, future work, pitfalls
+│
+└── tests/
+    ├── test_pool_wiring.py            # smoke — 3-path pool verification
+    ├── test_monkeypatch_invasiveness.py # thread isolation, scoping (14 tests)
+    ├── test_monkeypatch_fragility.py    # guard verification, deep deps (26 tests)
+    ├── docs/
+    │   ├── TEST_DESIGN.md             # WHY each suite was designed
+    │   ├── TEST_GUIDE.md              # WHAT each file covers + run commands
+    │   └── BUGS_FOUND.md              # 16 bugs found & fixed
+    └── tests-pro/
+        ├── test_api_pool.py           # 45 tests — acquire/release/backoff
+        ├── test_gap_fill.py           # 41 tests — JSON parsing, prompt building
+        ├── test_runner_patches.py     # 24 tests — context manager, patches
+        ├── test_annotation.py         # 10 tests — language compatibility
+        ├── random_numbered.py         # main entry point (seed=42)
+        └── mutation_max.py            # 30-bug injection framework
+```
+
+---
+
+## Running Tests
+
+```bash
+# All 164 tests
+python contrib/multilingual/tests/tests-pro/random_numbered.py       # 120 unit (seed=42)
+python contrib/multilingual/tests/test_pool_wiring.py                 # 4 smoke checks
+python contrib/multilingual/tests/test_monkeypatch_invasiveness.py    # 14 thematic
+python contrib/multilingual/tests/test_monkeypatch_fragility.py       # 26 thematic
+
+# Review-themed only
+python -m unittest \
+  contrib.multilingual.tests.test_monkeypatch_invasiveness \
+  contrib.multilingual.tests.test_monkeypatch_fragility -v
+python contrib/multilingual/tests/test_pool_wiring.py
+
+# Mutation test
+python contrib/multilingual/tests/tests-pro/mutation_max.py
+
+# End-to-end (fixture suite)
+python -m contrib.multilingual.batch_scan ./tests/fixtures/ -f terminal --workers 8
+python -m contrib.multilingual.batch_scan ./tests/fixtures/ -f terminal --workers 8 --no-llm
+```
+
+**Three commands catch most regressions:**
+```bash
+python contrib/multilingual/tests/tests-pro/random_numbered.py
+python contrib/multilingual/tests/test_pool_wiring.py
+python -m contrib.multilingual.batch_scan ./tests/fixtures/ -f terminal --workers 8
+```
+
+---
+
+## Code Conventions
+
+Match SkillSpector upstream exactly:
+
+- **SPDX header** on every `.py` file
+- `from __future__ import annotations` as first import
+- Imports: stdlib → third-party → `skillspector.*` → relative (`.`)
+- `| None` syntax (not `Optional[X]`)
+- `frozenset` / `Final` for module-level constants (`UPPER_SNAKE_CASE`)
+- Private helpers: `_lower_snake_case`
+- `logger = get_logger(__name__)` in every module
+- Comments explain **why**, not what
+- Docstrings on all public functions and classes
+
+---
+
+## Commit Style
+
+```
+fix: wire ApiKeyPool into llm_analyzer_base graph path
+feat: add multilingual batch scanner with parallel execution
+docs: document dual-patch pool wiring fix
+```
+
+- Present-tense, imperative mood
+- `Signed-off-by` trailer required (NVIDIA DCO)
+- `Co-authored-by` trailer for joint work
+
+---
+
+## Key Design Points
+
+Before modifying code, understand these three:
+
+1. **Dual-patch pool wiring.** `set_api_pool()` patches both `llm_utils.get_chat_model` AND `llm_analyzer_base.get_chat_model`. The latter is necessary because `llm_analyzer_base` imports via `from ... import`, creating a local reference that single-module patching misses. See `docs/archive/PITFALLS.md`.
+
+2. **Instance-attribute injection (not class-attribute).** Patch 1 writes `self.response_schema = None` to instance `__dict__`, not class `__dict__`. Python MRO finds instance attributes first. This is what makes patches thread-safe. Mutating the class attribute causes cross-thread races (this killed V1).
+
+3. **Guard before apply.** `_verify_patch_targets()` checks all 7 patch assumptions before `_apply_patches()` runs. If upstream changes a signature or removes a dependency, the guard raises immediately — patches fail closed, never silently.
+
+Full architecture: `docs/DESIGN.md`.
+All pitfalls: `docs/archive/PITFALLS.md`.
+
+---
+
+## Where to Contribute
+
+See `docs/archive/FUTURE_WORK.md` for 12 future directions with effort estimates. High-impact items:
+- Checkpoint/resume (prevents data loss on large scans)
+- Language detection expansion (9+ languages)
+- SARIF output format
+- Non-English ground-truth fixtures
+
+---
+
+**Next:** [docs/README.md](docs/README.md) — user guide · [docs/DESIGN.md](docs/DESIGN.md) — architecture · [docs/REVIEW_RESPONSE.md](docs/REVIEW_RESPONSE.md) — PR #100 review response
diff --git a/contrib/multilingual/__init__.py b/contrib/multilingual/__init__.py
@@ -0,0 +1,69 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Multilingual batch scan for SkillSpector.
+
+Community-contributed tool for scanning directories of AI agent skills
+in non-English languages.  Extends SkillSpector's built-in analyzers
+with targeted LLM gap-fill for vulnerability categories that static
+English-keyword regex rules cannot detect.
+
+Public API
+----------
+- :func:`~.discovery.discover_skills`
+- :func:`~.detection.detect_language`
+- :func:`~.detection.detect_skill_language`
+- :func:`~.annotation.is_language_compatible`
+- :func:`~.annotation.annotate_findings`
+- :func:`~.gap_fill.run_gap_fill`
+- :func:`~.runner.run_one`
+"""
+
+from __future__ import annotations
+
+# -- .env MUST load before any skillspector import.  Python imports
+#    this __init__.py before executing the batch_scan module body;
+#    without this early load, constants.py resolves the provider
+#    with stale env vars.
+try:
+    import dotenv as _dotenv
+except ImportError:
+    pass
+else:
+    _dotenv.load_dotenv(_dotenv.find_dotenv(usecwd=True), override=True)
+
+from .annotation import annotate_findings, is_language_compatible
+from .api_pool import ApiKey, ApiKeyPool, PooledChatModel, create_api_key_pool_from_env
+from .detection import detect_language, detect_skill_language
+from .discovery import discover_skills
+from .gap_fill import GapFillAnalyzer, GapFillFinding, GapFillResult, run_gap_fill
+from .runner import run_one
+
+__all__ = [
+    "annotate_findings",
+    "ApiKey",
+    "ApiKeyPool",
+    "create_api_key_pool_from_env",
+    "detect_language",
+    "detect_skill_language",
+    "discover_skills",
+    "GapFillAnalyzer",
+    "GapFillFinding",
+    "GapFillResult",
+    "is_language_compatible",
+    "PooledChatModel",
+    "run_gap_fill",
+    "run_one",
+]
diff --git a/contrib/multilingual/annotation.py b/contrib/multilingual/annotation.py
@@ -0,0 +1,100 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Finding language-compatibility annotation.
+
+Classifies each finding's ``rule_id`` against known buckets so downstream
+reports can flag which findings are reliable for non-English skills.
+"""
+
+from __future__ import annotations
+
+# ---------------------------------------------------------------------------
+# Rule classification
+# ---------------------------------------------------------------------------
+
+# Rule IDs from LLM-based semantic analyzers — inherently multilingual.
+_SEMANTIC_RULES: frozenset[str] = frozenset(
+    {
+        "SSD1", "SSD2", "SSD3", "SSD4",
+        "SDI1", "SDI2", "SDI3", "SDI4",
+        "SQP1", "SQP2", "SQP3",
+        "TP4",
+    }
+)
+
+# Rule IDs from the gap-fill pass (P5 / P6-P8 / MP1-MP3 / RA1-RA2) —
+# these are LLM-generated for non-English skills.
+_GAP_FILL_RULES: frozenset[str] = frozenset(
+    {"P5", "P6", "P7", "P8", "MP1", "MP2", "MP3", "RA1", "RA2"}
+)
+
+# Rule IDs from code-level analyzers — language-independent by design.
+_CODE_RULES: frozenset[str] = frozenset(
+    {
+        "AST1", "AST2", "AST3", "AST4", "AST5", "AST6", "AST7", "AST8",
+        "TT1", "TT2", "TT3", "TT4", "TT5",
+        "YR1", "YR2", "YR3", "YR4",
+        "SC1", "SC2", "SC3", "SC4", "SC5", "SC6",
+        "LP1", "LP2", "LP3", "LP4",
+        "TP1", "TP2", "TP3",
+        "TM1", "TM2", "TM3",
+    }
+)
+
+# English-keyword static rules that have semantic-equivalent coverage
+# via SSD / SDI / SQP for non-English skills.  These are listed for
+# documentation; the compatibility check treats them as needing scrutiny
+# when the detected language is non-English.
+_ENGLISH_KEYWORD_RULES: frozenset[str] = frozenset(
+    {
+        "P1", "P2", "P3", "P4",
+        "E1", "E2", "E3", "E4",
+        "PE1", "PE2", "PE3",
+        "EA1", "EA2", "EA3", "EA4",
+        "OH1", "OH2", "OH3",
+        "TR1", "TR2", "TR3",
+    }
+)
+
+
+def is_language_compatible(rule_id: str, detected_language: str) -> bool:
+    """Return ``True`` when *rule_id* is reliable for *detected_language*.
+
+    Code-level rules are always compatible.  Semantic rules are always
+    compatible.  English-keyword rules are only compatible when the skill
+    is English.  Gap-fill rules are compatible (they were generated by
+    an LLM specifically for this language).
+    """
+    if detected_language == "en":
+        return True
+    return rule_id in _SEMANTIC_RULES | _CODE_RULES | _GAP_FILL_RULES
+
+
+def annotate_findings(
+    issues: list[dict[str, object]],
+    detected_language: str,
+) -> list[dict[str, object]]:
+    """Add a ``language_compatible`` field to each issue dict.
+
+    Returns a new list — the input *issues* list is not mutated.
+    """
+    annotated: list[dict[str, object]] = []
+    for issue in issues:
+        rule_id = str(issue.get("id", ""))
+        entry = dict(issue)
+        entry["language_compatible"] = is_language_compatible(rule_id, detected_language)
+        annotated.append(entry)
+    return annotated