vitali87 · vitali87 · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026
diff --git a/.github/workflows/split-score.yml b/.github/workflows/split-score.yml
@@ -0,0 +1,22 @@
+name: PR Split Score
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  pull-requests: write
+
+jobs:
+  score:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: ./
+        with:
+          max-loc: "400"
+          partition-strategy: "graph"
+          threshold-groups: "2"
diff --git a/README.md b/README.md
@@ -17,6 +17,7 @@
 
 ## Latest News 🔥
 
+- GitHub Action — add pr-split to any repo as a CI check. Scores every PR and posts a split plan comment when it's too large. No API key needed.
 - Smart LOC Bounds — set `--min-loc` and `--max-loc` to control sub-PR size across all three backends (LLM, graph, CP-SAT). Undersized groups get merged, oversized groups get penalised.
 - LLM Refinement Loop — enable `--max-refinement-iterations` and pr-split will automatically feed LOC violations back to the LLM until every group fits within your configured bounds.
 - Auto-derived Minimum LOC — when refinement is on and no `--min-loc` is set, pr-split picks a sensible default (25% of `--max-loc`) so you get well-sized groups out of the box.
@@ -185,6 +186,57 @@ Settings can be set via environment variables with the `PR_SPLIT_` prefix:
 | `PR_SPLIT_PARTITION_STRATEGY` | `llm` | Hunk-to-PR partition backend |
 | `PR_SPLIT_WEBHOOK_URL` | (none) | Webhook URL for merge notifications |
 
+## GitHub Action
+
+Add pr-split as a CI check that scores every PR and posts a split plan when it's too large. Uses the `graph` backend by default — no API key needed.
+
+```yaml
+# .github/workflows/split-score.yml
+name: PR Split Score
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  pull-requests: write
+
+jobs:
+  score:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: vitali87/pr-split@main
+        with:
+          max-loc: "400"
+          partition-strategy: "graph"
+          threshold-groups: "2"
+```
+
+### Action inputs
+
+| Input | Default | Description |
+|-------|---------|-------------|
+| `max-loc` | `400` | Maximum target diff lines per sub-PR |
+| `min-loc` | (unset) | Minimum target diff lines per sub-PR |
+| `partition-strategy` | `graph` | Backend for partitioning (`graph` or `cp_sat`) |
+| `priority` | `orthogonal` | Grouping priority (`orthogonal` or `logical`) |
+| `threshold-groups` | `2` | Minimum suggested groups before posting the split plan |
+| `python-version` | `3.12` | Python version to use |
+| `post-comment` | `true` | Whether to post a PR comment with the results |
-| `post-comment` | `true` | Whether to post a PR comment with the results |
+| `post-comment` | `true` | Whether to post a PR comment with the results |
+| `python-version` | `3.12` | Python version to use |
-| `post-comment` | `true` | Whether to post a PR comment with the results |
+| `post-comment` | `true` | Whether to post a PR comment with the results |
+| `python-version` | `3.12` | Python version to use |
+
+### Action outputs
+
+| Output | Description |
+|--------|-------------|
+| `total-loc` | Total lines of code in the PR diff |
+| `total-groups` | Number of suggested groups |
+| `objective` | Plan objective score (lower is better) |
+| `should-split` | Whether the PR should be split (`true`/`false`) |
+
 ## Planning backends
 
 `pr-split` now separates two optimization layers:

diff --git a/action.yml b/action.yml
@@ -0,0 +1,101 @@
+name: "pr-split score"
+description: "Score a PR's complexity and suggest splits when it's too large"
+branding:
+  icon: git-pull-request
+  color: blue
+
+inputs:
+  max-loc:
+    description: "Maximum target diff lines per sub-PR"
+    default: "400"
+  min-loc:
+    description: "Minimum target diff lines per sub-PR"
+    default: ""
+  partition-strategy:
+    description: "Backend for partitioning (graph or cp_sat)"
+    default: "graph"
+  priority:
+    description: "Grouping priority (orthogonal or logical)"
+    default: "orthogonal"
+  threshold-groups:
+    description: "Minimum suggested groups before showing the split plan"
+    default: "2"
+  python-version:
+    description: "Python version to use"
+    default: "3.12"
+  post-comment:
+    description: "Whether to post a PR comment with the results"
+    default: "true"
+
+outputs:
+  total-loc:
+    description: "Total lines of code in the PR diff"
+    value: ${{ steps.score.outputs.total_loc }}
+  total-groups:
+    description: "Number of suggested groups"
+    value: ${{ steps.score.outputs.total_groups }}
+  objective:
+    description: "Plan objective score (lower is better)"
+    value: ${{ steps.score.outputs.objective }}
+  should-split:
+    description: "Whether the PR should be split (true/false)"
+    value: ${{ steps.score.outputs.should_split }}
+
+runs:
+  using: composite
+  steps:
+    - name: Install uv
+      uses: astral-sh/setup-uv@v4
+
+    - name: Set up Python
+      shell: bash
+      env:
+        UV_PYTHON_VERSION: ${{ inputs.python-version }}
+      run: uv python install "$UV_PYTHON_VERSION"
+
+    - name: Install pr-split
+      shell: bash
+      run: uv tool install "${{ github.action_path }}"
+
+    - name: Score PR
+      id: score
+      shell: bash
+      env:
+        MAX_LOC: ${{ inputs.max-loc }}
+        MIN_LOC: ${{ inputs.min-loc }}
+        PARTITION_STRATEGY: ${{ inputs.partition-strategy }}
+        PRIORITY: ${{ inputs.priority }}
+        THRESHOLD_GROUPS: ${{ inputs.threshold-groups }}
+        PR_NUMBER: ${{ github.event.pull_request.number }}
+        BASE_BRANCH: ${{ github.event.pull_request.base.ref }}
+        HEAD_BRANCH: ${{ github.event.pull_request.head.ref }}
+      run: python "${{ github.action_path }}/scripts/score_pr.py"
+
+    - name: Post comment
+      if: inputs.post-comment == 'true' && steps.score.outputs.should_split == 'true'
+      uses: actions/github-script@v7
+      with:
+        script: |
+          const fs = require('fs');
+          const body = fs.readFileSync('${{ steps.score.outputs.comment_path }}', 'utf8');
+          const { data: comments } = await github.rest.issues.listComments({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            issue_number: context.payload.pull_request.number,
+          });
+          const existing = comments.find(c => c.body.includes('<!-- pr-split-score -->'));
+          if (existing) {
+            await github.rest.issues.updateComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: existing.id,
+              body,
+            });
+          } else {
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.payload.pull_request.number,
+              body,
+            });
+          }
diff --git a/scripts/score_pr.py b/scripts/score_pr.py
@@ -0,0 +1,193 @@
+"""Score a PR and generate a markdown comment with the split plan."""
+
+from __future__ import annotations
+
+import json
+import os
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+
+def _run(cmd: list[str], check: bool = True) -> subprocess.CompletedProcess[str]:
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if check and result.returncode != 0:
+        print(f"Command failed: {' '.join(cmd)}", file=sys.stderr)
+        print(result.stderr, file=sys.stderr)
+        sys.exit(1)
+    return result
+
+
+def _set_output(name: str, value: str) -> None:
+    with open(os.environ["GITHUB_OUTPUT"], "a") as f:
+        f.write(f"{name}={value}\n")
+
+
+def _skip(reason: str) -> None:
+    print(reason)
+    _set_output("total_groups", "1")
+    _set_output("objective", "0")
+    _set_output("should_split", "false")
+
+
+def _md_escape(s: str) -> str:
+    return s.replace("|", "\\|")
+
+
+def _parse_int_env(name: str, default: int) -> int:
+    raw = os.environ.get(name, str(default))
+    try:
+        return int(raw)
+    except ValueError:
+        print(f"Error: {name} must be an integer, got '{raw}'.", file=sys.stderr)
+        sys.exit(1)
+
+
+def main() -> None:
+    max_loc = _parse_int_env("MAX_LOC", 400)
+    min_loc_raw = os.environ.get("MIN_LOC", "")
+    strategy = os.environ.get("PARTITION_STRATEGY", "graph")
+    priority = os.environ.get("PRIORITY", "orthogonal")
+    threshold = _parse_int_env("THRESHOLD_GROUPS", 2)
+    pr_number = os.environ.get("PR_NUMBER", "")
+    base_branch = os.environ["BASE_BRANCH"]
+    head_branch = os.environ["HEAD_BRANCH"]
+
+    # Fetch refs — use refs/pull/{n}/head for fork compatibility
+    _run(["git", "fetch", "origin", base_branch])
+    if pr_number:
+        pr_ref = f"refs/pull/{pr_number}/head"
+        local_head = f"pr-split/head-{pr_number}"
+        _run(["git", "fetch", "origin", f"{pr_ref}:{local_head}"])
+    else:
+        _run(["git", "fetch", "origin", head_branch])
+        local_head = f"origin/{head_branch}"
+
+    # Compute diff stats
+    result = _run(["git", "diff", "--numstat", f"origin/{base_branch}...{local_head}"])
+
+    total_added = 0
+    total_removed = 0
+    file_count = 0
+    for line in result.stdout.strip().splitlines():
+        parts = line.split("\t")
+        if len(parts) >= 3:
+            added = int(parts[0]) if parts[0] != "-" else 0
+            removed = int(parts[1]) if parts[1] != "-" else 0
+            total_added += added
+            total_removed += removed
+            file_count += 1
+
+    total_loc = total_added + total_removed
+    _set_output("total_loc", str(total_loc))
+
+    if total_loc <= max_loc:
+        _skip(f"PR has {total_loc} LOC — under the {max_loc} threshold, no split needed.")
+        return
+
+    # Create local branch refs for pr-split
+    _run(["git", "branch", "-f", base_branch, f"origin/{base_branch}"])
+    _run(["git", "branch", "-f", head_branch, local_head])
+
+    # Run pr-split in dry-run mode
+    cmd = [
+        "pr-split", "split", head_branch,
+        "--base", base_branch,
+        "--partition-strategy", strategy,
+        "--priority", priority,
+        "--max-loc", str(max_loc),
+        "--dry-run",
+    ]
+    if min_loc_raw:
+        cmd.extend(["--min-loc", min_loc_raw])
+
+    result = subprocess.run(cmd, capture_output=True, text=True, input="done\n")
+    if result.returncode != 0:
+        print(f"pr-split failed:\n{result.stderr}", file=sys.stderr)
+        _skip("pr-split failed to generate a plan.")
+        return
+
+    plan_path = ".pr-split/plan.json"
+    if not os.path.exists(plan_path):
+        _skip("No plan file generated.")
+        return
+
+    with open(plan_path) as f:
+        plan = json.load(f)
+
+    groups = plan.get("groups", [])
+    total_groups = len(groups)
+
+    max_group_loc = max((g["estimated_loc"] for g in groups), default=0)
+    overflow = sum(max(0, g["estimated_loc"] - max_loc) for g in groups)
+    file_groups: dict[str, set[str]] = {}
+    for g in groups:
+        for a in g.get("assignments", []):
+            file_groups.setdefault(a["file_path"], set()).add(g["id"])
+    file_scatter = sum(max(0, len(gids) - 1) for gids in file_groups.values())
+
+    objective = overflow * 1000 + file_scatter * 50 + total_groups
+    should_split = total_groups >= threshold
+
+    _set_output("total_groups", str(total_groups))
+    _set_output("objective", str(objective))
+    _set_output("should_split", str(should_split).lower())
+
+    print(f"PR: {total_loc} LOC across {file_count} files")
+    print(f"Split plan: {total_groups} groups, objective={objective}")
+    print(f"Should split: {should_split}")
+
+    # Generate markdown comment
+    lines = [
+        "<!-- pr-split-score -->",
+        "## pr-split analysis",
+        "",
+        "| Metric | Value |",
+        "|--------|-------|",
+        f"| Total LOC | {total_loc:,} |",
+        f"| Files changed | {file_count} |",
+        f"| Suggested groups | {total_groups} |",
+        f"| Largest group | {max_group_loc:,} LOC |",
+        f"| LOC overflow | {overflow:,} |",
+        f"| File scatter | {file_scatter} |",
+        "",
+    ]
+
+    if should_split:
+        lines.append(
+            f"This PR has **{total_loc:,} LOC** and could be split into "
+            f"**{total_groups} smaller PRs**:"
+        )
+        lines.append("")
+        lines.append("| Group | Title | Diff | Depends On | Files |")
+        lines.append("|-------|-------|------|------------|-------|")
+        for g in groups:
+            files = ", ".join(
+                f"`{_md_escape(a['file_path'])}`"
+                for a in g.get("assignments", [])
+            )
+            deps = ", ".join(g.get("depends_on", [])) or "—"
+            diff_str = (
+                f"+{g.get('estimated_added', 0)}/-{g.get('estimated_removed', 0)}"
+            )
+            title = _md_escape(g["title"])
+            gid = _md_escape(g["id"])
+            lines.append(f"| {gid} | {title} | {diff_str} | {deps} | {files} |")
+        lines.append("")
+        lines.append(
+            "*Run `pr-split split` locally to create these sub-PRs, "
+            "or adjust `--max-loc` to change the target size.*"
+        )
+    else:
+        lines.append("This PR is within acceptable size limits.")
+
+    comment = "\n".join(lines)
+    tmp_dir = os.environ.get("RUNNER_TEMP", tempfile.gettempdir())
+    comment_path = Path(tmp_dir) / "pr-split-comment.md"
+    comment_path.write_text(comment)
+    _set_output("comment_path", str(comment_path))
+
+
+if __name__ == "__main__":
+    main()