Skip to content

chore: add dependency audit gate #143

chore: add dependency audit gate

chore: add dependency audit gate #143

Workflow file for this run

name: Book QA (Unicode + Links + Textlint + Layout Risk)
on:
pull_request:
push:
branches: [ main ]
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: 'true'
jobs:
qa:
runs-on: ubuntu-latest
permissions:
contents: read
pages: read
steps:
- name: Checkout book
uses: actions/checkout@v6
- name: Checkout book-formatter (pinned)
uses: actions/checkout@v6
with:
repository: itdojp/book-formatter
ref: da2a49e7d2dcd9e1fa885e910c458130fe8d73a4
path: book-formatter
- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: '20'
cache: npm
cache-dependency-path: book-formatter/package-lock.json
- name: Root dependency security audit
run: npm run check:security
- name: Book metadata consistency check
run: node scripts/check-metadata-consistency.js
- name: Install dependencies (book-formatter)
working-directory: book-formatter
run: npm ci
- name: Determine scan directory
id: scan
shell: bash
run: |
if [ -d docs ]; then
echo "dir=docs" >> "$GITHUB_OUTPUT"
else
echo "dir=." >> "$GITHUB_OUTPUT"
fi
- name: Invisible Unicode check (bidi / zero-width; fail)
shell: bash
run: |
python3 - << 'PY'
import sys
import unicodedata
from pathlib import Path
import re
scan_dir = Path("${{ steps.scan.outputs.dir }}").resolve()
banned = [
"\u061c", # ARABIC LETTER MARK
"\u00ad", # SOFT HYPHEN
"\u180e", # MONGOLIAN VOWEL SEPARATOR (deprecated)
"\u200b", # ZERO WIDTH SPACE
"\u200c", # ZERO WIDTH NON-JOINER
"\u200d", # ZERO WIDTH JOINER
"\u200e", # LEFT-TO-RIGHT MARK
"\u200f", # RIGHT-TO-LEFT MARK
"\u2060", # WORD JOINER
"\u202a", # LEFT-TO-RIGHT EMBEDDING
"\u202b", # RIGHT-TO-LEFT EMBEDDING
"\u202c", # POP DIRECTIONAL FORMATTING
"\u202d", # LEFT-TO-RIGHT OVERRIDE
"\u202e", # RIGHT-TO-LEFT OVERRIDE
"\u2066", # LEFT-TO-RIGHT ISOLATE
"\u2067", # RIGHT-TO-LEFT ISOLATE
"\u2068", # FIRST STRONG ISOLATE
"\u2069", # POP DIRECTIONAL ISOLATE
"\ufeff", # ZERO WIDTH NO-BREAK SPACE (BOM)
]
pattern = re.compile("[" + "".join(banned) + "]")
def file_candidates():
yield from Path(".github").rglob("*.yml")
yield from Path(".github").rglob("*.yaml")
if scan_dir.exists():
yield from scan_dir.rglob("*.md")
if Path("src").exists():
yield from Path("src").rglob("*.md")
hits = []
for path in sorted({p for p in file_candidates() if p.is_file()}):
try:
text = path.read_text(encoding="utf-8")
except UnicodeDecodeError:
print(f"::error file={path}::Failed to decode as UTF-8")
sys.exit(1)
for m in pattern.finditer(text):
cp = ord(m.group(0))
name = unicodedata.name(m.group(0), "UNKNOWN")
line = text.count("\n", 0, m.start()) + 1
last_nl = text.rfind("\n", 0, m.start())
col = (m.start() - last_nl) if last_nl != -1 else (m.start() + 1)
hits.append((path.as_posix(), line, col, cp, name))
if len(hits) >= 50:
break
if len(hits) >= 50:
break
if hits:
for path, line, col, cp, name in hits:
print(f"::error file={path},line={line},col={col}::Invisible Unicode detected: U+{cp:04X} {name}")
sys.exit(1)
print("OK: no bidi/hidden Unicode characters detected")
PY
- name: Unicode check (fail on warnings)
run: node book-formatter/scripts/check-unicode.js "${{ steps.scan.outputs.dir }}" --allowlist .book-formatter/unicode-allowlist.json --fail-on warn
- name: Textlint (PRH dictionary; fail on errors)
run: node book-formatter/scripts/check-textlint.js "${{ steps.scan.outputs.dir }}" --fail-on error
- name: Link check (internal + anchors)
run: node book-formatter/scripts/check-links.js "${{ steps.scan.outputs.dir }}"
- name: Layout risk scan (long lines / tables / images)
run: node book-formatter/scripts/check-layout-risk.js "${{ steps.scan.outputs.dir }}" --fail-on error --output "${{ runner.temp }}/layout-risk-report.json"
- name: Markdown structure check (front matter / headings / fences)
run: node book-formatter/scripts/check-markdown-structure.js "${{ steps.scan.outputs.dir }}" --fail-on error --output "${{ runner.temp }}/markdown-structure-report.json"
- name: Upload markdown structure report
if: always()
uses: actions/upload-artifact@v7
with:
name: markdown-structure-report
path: ${{ runner.temp }}/markdown-structure-report.json
if-no-files-found: ignore
- name: Upload layout risk report
if: always()
uses: actions/upload-artifact@v7
with:
name: layout-risk-report
path: ${{ runner.temp }}/layout-risk-report.json
if-no-files-found: ignore
- name: Setup Pages
uses: actions/configure-pages@v5
- name: Build (Jekyll; GitHub Pages compatible)
uses: actions/jekyll-build-pages@v1
with:
source: ./${{ steps.scan.outputs.dir }}
destination: ./_site
- name: Smoke check built site (top + navigation + assets)
shell: bash
run: |
python3 -m pip install --user pyyaml
python3 - << 'PY'
import sys
from pathlib import Path
import yaml
scan_dir = Path("${{ steps.scan.outputs.dir }}").resolve()
site_dir = Path("_site").resolve()
if not site_dir.exists():
print(f"::error::Built site directory not found: {site_dir}")
sys.exit(1)
def normalize_path(p):
if not isinstance(p, str):
return None
p = p.strip()
if not p:
return None
if p.startswith(("http://", "https://", "mailto:")):
return None
if not p.startswith("/"):
p = "/" + p
lower = p.lower()
if lower.endswith((".md", ".html", ".htm", ".pdf", ".txt")):
return p
return p if p.endswith("/") else p + "/"
def read_nav_paths():
nav = scan_dir / "_data" / "navigation.yml"
if not nav.exists():
return []
try:
raw = nav.read_text(encoding="utf-8")
data = yaml.safe_load(raw) or {}
except yaml.YAMLError as e:
nav_display = nav
try:
nav_display = nav.relative_to(Path.cwd())
except ValueError:
pass
print(f"::error file={nav_display}::Failed to parse navigation YAML: {e}")
sys.exit(1)
paths = []
# Keep this segment list broad to support different book structures.
for key in ["introduction", "chapters", "additional", "resources", "appendices", "afterword"]:
for item in (data.get(key) or []):
if isinstance(item, dict):
nested = item.get("items")
if not isinstance(nested, list):
nested = item.get("children")
if isinstance(nested, list):
for sub in (nested or []):
if not isinstance(sub, dict):
continue
p = normalize_path(sub.get("path"))
if p:
paths.append(p)
continue
if not isinstance(item, dict):
continue
p = normalize_path(item.get("path"))
if p:
paths.append(p)
return paths
def discover_paths():
paths = []
for seg in ["introduction", "chapters", "additional", "resources", "appendices", "afterword"]:
d = scan_dir / seg
if d.is_dir():
for child in sorted(d.iterdir()):
if child.is_dir():
paths.append(f"/{seg}/{child.name}/")
return paths
paths = read_nav_paths() or discover_paths() or ["/"]
if "/" not in paths:
paths.insert(0, "/")
# De-dup while keeping order
seen = set()
uniq = []
for p in paths:
if p in seen:
continue
seen.add(p)
uniq.append(p)
paths = uniq
def exists_any(candidates):
for c in candidates:
if c.exists() and c.is_file() and c.stat().st_size > 0:
return True
return False
def candidates_for(path_str):
if path_str == "/":
return [site_dir / "index.html"]
rel = path_str.lstrip("/")
lower = path_str.lower()
if lower.endswith((".html", ".htm", ".pdf", ".txt")):
return [site_dir / rel]
if lower.endswith(".md"):
# Jekyll usually converts Markdown to HTML, but Markdown without front matter may be copied as-is.
html_rel = rel[:-3] + ".html"
return [site_dir / rel, site_dir / html_rel]
# Directory-like (pretty permalink)
rel_dir = rel
if not rel_dir.endswith("/"):
rel_dir += "/"
# Both `/<dir>/index.html` and `/<dir>.html` are accepted.
return [site_dir / rel_dir / "index.html", site_dir / (rel_dir[:-1] + ".html")]
missing = []
for p in paths:
if not exists_any(candidates_for(p)):
missing.append(p)
# Ensure core assets exist in the built output.
required_assets = [
"assets/css/main.css",
"assets/css/syntax-highlighting.css",
"assets/js/theme.js",
"assets/js/search.js",
"assets/js/code-copy-lightweight.js",
]
missing_assets = [
a
for a in required_assets
if not (
(site_dir / a).exists()
and (site_dir / a).is_file()
and (site_dir / a).stat().st_size > 0
)
]
if missing or missing_assets:
if missing:
print("::error::Missing pages in built site:")
for p in missing:
print(f" - {p}")
if missing_assets:
print("::error::Missing assets in built site:")
for a in missing_assets:
print(f" - {a}")
sys.exit(1)
# Detect Liquid-eaten GitHub Actions expressions after Jekyll build.
import re
expr_re = re.compile(r"\$\{\{[^}]+\}\}")
expressions = set()
for md in scan_dir.rglob("*.md"):
try:
src = md.read_text(encoding="utf-8")
except UnicodeDecodeError:
continue
expressions.update(expr_re.findall(src))
html = ""
for f in site_dir.rglob("*.html"):
try:
html += f.read_text(encoding="utf-8", errors="replace")
except Exception:
continue
import html as html_mod
rendered_text = html_mod.unescape(re.sub(r"<[^>]+>", "", html))
missing_expr = [e for e in sorted(expressions) if e and e not in rendered_text]
broken_patterns = [
"refs/pull/$/merge",
"refs/pull/$/head",
]
found_broken = [p for p in broken_patterns if p in rendered_text]
if found_broken or missing_expr:
if found_broken:
print("::error::Broken GitHub Actions expression rendering detected:")
for p in found_broken:
print(f" - {p}")
if missing_expr:
print(f"::error::GitHub Actions expressions missing in built site: {len(missing_expr)}")
for e in missing_expr[:20]:
print(f" - {e}")
if len(missing_expr) > 20:
print(" - ...")
sys.exit(1)
print(f"OK: built site smoke check passed ({len(paths)} paths, {len(required_assets)} assets)")
PY