Book QA (Unicode + Links + Textlint + Layout Risk)

chore: add dependency audit gate #143

Workflow file for this run

.github/workflows/book-qa.yml at 9cbbc87

	name: Book QA (Unicode + Links + Textlint + Layout Risk)

	on:
	pull_request:
	push:
	branches: [ main ]

	env:
	FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: 'true'

	jobs:
	qa:
	runs-on: ubuntu-latest
	permissions:
	contents: read
	pages: read

	steps:
	- name: Checkout book
	uses: actions/checkout@v6

	- name: Checkout book-formatter (pinned)
	uses: actions/checkout@v6
	with:
	repository: itdojp/book-formatter
	ref: da2a49e7d2dcd9e1fa885e910c458130fe8d73a4
	path: book-formatter

	- name: Setup Node.js
	uses: actions/setup-node@v6
	with:
	node-version: '20'
	cache: npm
	cache-dependency-path: book-formatter/package-lock.json

	- name: Root dependency security audit
	run: npm run check:security

	- name: Book metadata consistency check
	run: node scripts/check-metadata-consistency.js

	- name: Install dependencies (book-formatter)
	working-directory: book-formatter
	run: npm ci

	- name: Determine scan directory
	id: scan
	shell: bash
	run: \|
	if [ -d docs ]; then
	echo "dir=docs" >> "$GITHUB_OUTPUT"
	else
	echo "dir=." >> "$GITHUB_OUTPUT"
	fi

	- name: Invisible Unicode check (bidi / zero-width; fail)
	shell: bash
	run: \|
	python3 - << 'PY'
	import sys
	import unicodedata
	from pathlib import Path
	import re

	scan_dir = Path("${{ steps.scan.outputs.dir }}").resolve()

	banned = [
	"\u061c", # ARABIC LETTER MARK
	"\u00ad", # SOFT HYPHEN
	"\u180e", # MONGOLIAN VOWEL SEPARATOR (deprecated)
	"\u200b", # ZERO WIDTH SPACE
	"\u200c", # ZERO WIDTH NON-JOINER
	"\u200d", # ZERO WIDTH JOINER
	"\u200e", # LEFT-TO-RIGHT MARK
	"\u200f", # RIGHT-TO-LEFT MARK
	"\u2060", # WORD JOINER
	"\u202a", # LEFT-TO-RIGHT EMBEDDING
	"\u202b", # RIGHT-TO-LEFT EMBEDDING
	"\u202c", # POP DIRECTIONAL FORMATTING
	"\u202d", # LEFT-TO-RIGHT OVERRIDE
	"\u202e", # RIGHT-TO-LEFT OVERRIDE
	"\u2066", # LEFT-TO-RIGHT ISOLATE
	"\u2067", # RIGHT-TO-LEFT ISOLATE
	"\u2068", # FIRST STRONG ISOLATE
	"\u2069", # POP DIRECTIONAL ISOLATE
	"\ufeff", # ZERO WIDTH NO-BREAK SPACE (BOM)
	]

	pattern = re.compile("[" + "".join(banned) + "]")

	def file_candidates():
	yield from Path(".github").rglob("*.yml")
	yield from Path(".github").rglob("*.yaml")
	if scan_dir.exists():
	yield from scan_dir.rglob("*.md")
	if Path("src").exists():
	yield from Path("src").rglob("*.md")

	hits = []
	for path in sorted({p for p in file_candidates() if p.is_file()}):
	try:
	text = path.read_text(encoding="utf-8")
	except UnicodeDecodeError:
	print(f"::error file={path}::Failed to decode as UTF-8")
	sys.exit(1)

	for m in pattern.finditer(text):
	cp = ord(m.group(0))
	name = unicodedata.name(m.group(0), "UNKNOWN")
	line = text.count("\n", 0, m.start()) + 1
	last_nl = text.rfind("\n", 0, m.start())
	col = (m.start() - last_nl) if last_nl != -1 else (m.start() + 1)
	hits.append((path.as_posix(), line, col, cp, name))
	if len(hits) >= 50:
	break
	if len(hits) >= 50:
	break

	if hits:
	for path, line, col, cp, name in hits:
	print(f"::error file={path},line={line},col={col}::Invisible Unicode detected: U+{cp:04X} {name}")
	sys.exit(1)

	print("OK: no bidi/hidden Unicode characters detected")
	PY

	- name: Unicode check (fail on warnings)
	run: node book-formatter/scripts/check-unicode.js "${{ steps.scan.outputs.dir }}" --allowlist .book-formatter/unicode-allowlist.json --fail-on warn

	- name: Textlint (PRH dictionary; fail on errors)
	run: node book-formatter/scripts/check-textlint.js "${{ steps.scan.outputs.dir }}" --fail-on error

	- name: Link check (internal + anchors)
	run: node book-formatter/scripts/check-links.js "${{ steps.scan.outputs.dir }}"

	- name: Layout risk scan (long lines / tables / images)
	run: node book-formatter/scripts/check-layout-risk.js "${{ steps.scan.outputs.dir }}" --fail-on error --output "${{ runner.temp }}/layout-risk-report.json"

	- name: Markdown structure check (front matter / headings / fences)
	run: node book-formatter/scripts/check-markdown-structure.js "${{ steps.scan.outputs.dir }}" --fail-on error --output "${{ runner.temp }}/markdown-structure-report.json"

	- name: Upload markdown structure report
	if: always()
	uses: actions/upload-artifact@v7
	with:
	name: markdown-structure-report
	path: ${{ runner.temp }}/markdown-structure-report.json
	if-no-files-found: ignore

	- name: Upload layout risk report
	if: always()
	uses: actions/upload-artifact@v7
	with:
	name: layout-risk-report
	path: ${{ runner.temp }}/layout-risk-report.json
	if-no-files-found: ignore
	- name: Setup Pages
	uses: actions/configure-pages@v5

	- name: Build (Jekyll; GitHub Pages compatible)
	uses: actions/jekyll-build-pages@v1
	with:
	source: ./${{ steps.scan.outputs.dir }}
	destination: ./_site

	- name: Smoke check built site (top + navigation + assets)
	shell: bash
	run: \|
	python3 -m pip install --user pyyaml
	python3 - << 'PY'
	import sys
	from pathlib import Path
	import yaml

	scan_dir = Path("${{ steps.scan.outputs.dir }}").resolve()
	site_dir = Path("_site").resolve()

	if not site_dir.exists():
	print(f"::error::Built site directory not found: {site_dir}")
	sys.exit(1)

	def normalize_path(p):
	if not isinstance(p, str):
	return None
	p = p.strip()
	if not p:
	return None
	if p.startswith(("http://", "https://", "mailto:")):
	return None
	if not p.startswith("/"):
	p = "/" + p
	lower = p.lower()
	if lower.endswith((".md", ".html", ".htm", ".pdf", ".txt")):
	return p
	return p if p.endswith("/") else p + "/"

	def read_nav_paths():
	nav = scan_dir / "_data" / "navigation.yml"
	if not nav.exists():
	return []
	try:
	raw = nav.read_text(encoding="utf-8")
	data = yaml.safe_load(raw) or {}
	except yaml.YAMLError as e:
	nav_display = nav
	try:
	nav_display = nav.relative_to(Path.cwd())
	except ValueError:
	pass
	print(f"::error file={nav_display}::Failed to parse navigation YAML: {e}")
	sys.exit(1)
	paths = []
	# Keep this segment list broad to support different book structures.
	for key in ["introduction", "chapters", "additional", "resources", "appendices", "afterword"]:
	for item in (data.get(key) or []):
	if isinstance(item, dict):
	nested = item.get("items")
	if not isinstance(nested, list):
	nested = item.get("children")
	if isinstance(nested, list):
	for sub in (nested or []):
	if not isinstance(sub, dict):
	continue
	p = normalize_path(sub.get("path"))
	if p:
	paths.append(p)
	continue
	if not isinstance(item, dict):
	continue
	p = normalize_path(item.get("path"))
	if p:
	paths.append(p)
	return paths

	def discover_paths():
	paths = []
	for seg in ["introduction", "chapters", "additional", "resources", "appendices", "afterword"]:
	d = scan_dir / seg
	if d.is_dir():
	for child in sorted(d.iterdir()):
	if child.is_dir():
	paths.append(f"/{seg}/{child.name}/")
	return paths

	paths = read_nav_paths() or discover_paths() or ["/"]
	if "/" not in paths:
	paths.insert(0, "/")

	# De-dup while keeping order
	seen = set()
	uniq = []
	for p in paths:
	if p in seen:
	continue
	seen.add(p)
	uniq.append(p)
	paths = uniq

	def exists_any(candidates):
	for c in candidates:
	if c.exists() and c.is_file() and c.stat().st_size > 0:
	return True
	return False

	def candidates_for(path_str):
	if path_str == "/":
	return [site_dir / "index.html"]
	rel = path_str.lstrip("/")
	lower = path_str.lower()
	if lower.endswith((".html", ".htm", ".pdf", ".txt")):
	return [site_dir / rel]
	if lower.endswith(".md"):
	# Jekyll usually converts Markdown to HTML, but Markdown without front matter may be copied as-is.
	html_rel = rel[:-3] + ".html"
	return [site_dir / rel, site_dir / html_rel]
	# Directory-like (pretty permalink)
	rel_dir = rel
	if not rel_dir.endswith("/"):
	rel_dir += "/"
	# Both `/<dir>/index.html` and `/<dir>.html` are accepted.
	return [site_dir / rel_dir / "index.html", site_dir / (rel_dir[:-1] + ".html")]

	missing = []
	for p in paths:
	if not exists_any(candidates_for(p)):
	missing.append(p)

	# Ensure core assets exist in the built output.
	required_assets = [
	"assets/css/main.css",
	"assets/css/syntax-highlighting.css",
	"assets/js/theme.js",
	"assets/js/search.js",
	"assets/js/code-copy-lightweight.js",
	]
	missing_assets = [
	a
	for a in required_assets
	if not (
	(site_dir / a).exists()
	and (site_dir / a).is_file()
	and (site_dir / a).stat().st_size > 0
	)
	]

	if missing or missing_assets:
	if missing:
	print("::error::Missing pages in built site:")
	for p in missing:
	print(f" - {p}")
	if missing_assets:
	print("::error::Missing assets in built site:")
	for a in missing_assets:
	print(f" - {a}")
	sys.exit(1)

	# Detect Liquid-eaten GitHub Actions expressions after Jekyll build.
	import re

	expr_re = re.compile(r"\$\{\{[^}]+\}\}")
	expressions = set()
	for md in scan_dir.rglob("*.md"):
	try:
	src = md.read_text(encoding="utf-8")
	except UnicodeDecodeError:
	continue
	expressions.update(expr_re.findall(src))

	html = ""
	for f in site_dir.rglob("*.html"):
	try:
	html += f.read_text(encoding="utf-8", errors="replace")
	except Exception:
	continue

	import html as html_mod

	rendered_text = html_mod.unescape(re.sub(r"<[^>]+>", "", html))

	missing_expr = [e for e in sorted(expressions) if e and e not in rendered_text]
	broken_patterns = [
	"refs/pull/$/merge",
	"refs/pull/$/head",
	]
	found_broken = [p for p in broken_patterns if p in rendered_text]

	if found_broken or missing_expr:
	if found_broken:
	print("::error::Broken GitHub Actions expression rendering detected:")
	for p in found_broken:
	print(f" - {p}")
	if missing_expr:
	print(f"::error::GitHub Actions expressions missing in built site: {len(missing_expr)}")
	for e in missing_expr[:20]:
	print(f" - {e}")
	if len(missing_expr) > 20:
	print(" - ...")
	sys.exit(1)

	print(f"OK: built site smoke check passed ({len(paths)} paths, {len(required_assets)} assets)")
	PY

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

chore: add dependency audit gate #143

Workflow file

chore: add dependency audit gate #143

Uh oh!

Workflow file for this run