diff --git a/.github/workflows/split-score.yml b/.github/workflows/split-score.yml new file mode 100644 index 0000000..ceb22cf --- /dev/null +++ b/.github/workflows/split-score.yml @@ -0,0 +1,22 @@ +name: PR Split Score + +on: + pull_request: + branches: [main] + +permissions: + pull-requests: write + +jobs: + score: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: ./ + with: + max-loc: "400" + partition-strategy: "graph" + threshold-groups: "2" diff --git a/README.md b/README.md index fb02957..567686e 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ ## Latest News 🔥 +- GitHub Action — add pr-split to any repo as a CI check. Scores every PR and posts a split plan comment when it's too large. No API key needed. - Smart LOC Bounds — set `--min-loc` and `--max-loc` to control sub-PR size across all three backends (LLM, graph, CP-SAT). Undersized groups get merged, oversized groups get penalised. - LLM Refinement Loop — enable `--max-refinement-iterations` and pr-split will automatically feed LOC violations back to the LLM until every group fits within your configured bounds. - Auto-derived Minimum LOC — when refinement is on and no `--min-loc` is set, pr-split picks a sensible default (25% of `--max-loc`) so you get well-sized groups out of the box. @@ -185,6 +186,57 @@ Settings can be set via environment variables with the `PR_SPLIT_` prefix: | `PR_SPLIT_PARTITION_STRATEGY` | `llm` | Hunk-to-PR partition backend | | `PR_SPLIT_WEBHOOK_URL` | (none) | Webhook URL for merge notifications | +## GitHub Action + +Add pr-split as a CI check that scores every PR and posts a split plan when it's too large. Uses the `graph` backend by default — no API key needed. + +```yaml +# .github/workflows/split-score.yml +name: PR Split Score + +on: + pull_request: + branches: [main] + +permissions: + pull-requests: write + +jobs: + score: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: vitali87/pr-split@main + with: + max-loc: "400" + partition-strategy: "graph" + threshold-groups: "2" +``` + +### Action inputs + +| Input | Default | Description | +|-------|---------|-------------| +| `max-loc` | `400` | Maximum target diff lines per sub-PR | +| `min-loc` | (unset) | Minimum target diff lines per sub-PR | +| `partition-strategy` | `graph` | Backend for partitioning (`graph` or `cp_sat`) | +| `priority` | `orthogonal` | Grouping priority (`orthogonal` or `logical`) | +| `threshold-groups` | `2` | Minimum suggested groups before posting the split plan | +| `python-version` | `3.12` | Python version to use | +| `post-comment` | `true` | Whether to post a PR comment with the results | + +### Action outputs + +| Output | Description | +|--------|-------------| +| `total-loc` | Total lines of code in the PR diff | +| `total-groups` | Number of suggested groups | +| `objective` | Plan objective score (lower is better) | +| `should-split` | Whether the PR should be split (`true`/`false`) | + ## Planning backends `pr-split` now separates two optimization layers: diff --git a/action.yml b/action.yml new file mode 100644 index 0000000..dc53a20 --- /dev/null +++ b/action.yml @@ -0,0 +1,101 @@ +name: "pr-split score" +description: "Score a PR's complexity and suggest splits when it's too large" +branding: + icon: git-pull-request + color: blue + +inputs: + max-loc: + description: "Maximum target diff lines per sub-PR" + default: "400" + min-loc: + description: "Minimum target diff lines per sub-PR" + default: "" + partition-strategy: + description: "Backend for partitioning (graph or cp_sat)" + default: "graph" + priority: + description: "Grouping priority (orthogonal or logical)" + default: "orthogonal" + threshold-groups: + description: "Minimum suggested groups before showing the split plan" + default: "2" + python-version: + description: "Python version to use" + default: "3.12" + post-comment: + description: "Whether to post a PR comment with the results" + default: "true" + +outputs: + total-loc: + description: "Total lines of code in the PR diff" + value: ${{ steps.score.outputs.total_loc }} + total-groups: + description: "Number of suggested groups" + value: ${{ steps.score.outputs.total_groups }} + objective: + description: "Plan objective score (lower is better)" + value: ${{ steps.score.outputs.objective }} + should-split: + description: "Whether the PR should be split (true/false)" + value: ${{ steps.score.outputs.should_split }} + +runs: + using: composite + steps: + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Set up Python + shell: bash + env: + UV_PYTHON_VERSION: ${{ inputs.python-version }} + run: uv python install "$UV_PYTHON_VERSION" + + - name: Install pr-split + shell: bash + run: uv tool install "${{ github.action_path }}" + + - name: Score PR + id: score + shell: bash + env: + MAX_LOC: ${{ inputs.max-loc }} + MIN_LOC: ${{ inputs.min-loc }} + PARTITION_STRATEGY: ${{ inputs.partition-strategy }} + PRIORITY: ${{ inputs.priority }} + THRESHOLD_GROUPS: ${{ inputs.threshold-groups }} + PR_NUMBER: ${{ github.event.pull_request.number }} + BASE_BRANCH: ${{ github.event.pull_request.base.ref }} + HEAD_BRANCH: ${{ github.event.pull_request.head.ref }} + run: python "${{ github.action_path }}/scripts/score_pr.py" + + - name: Post comment + if: inputs.post-comment == 'true' && steps.score.outputs.should_split == 'true' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const body = fs.readFileSync('${{ steps.score.outputs.comment_path }}', 'utf8'); + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + }); + const existing = comments.find(c => c.body.includes('')); + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + body, + }); + } diff --git a/scripts/score_pr.py b/scripts/score_pr.py new file mode 100644 index 0000000..f5bf36b --- /dev/null +++ b/scripts/score_pr.py @@ -0,0 +1,193 @@ +"""Score a PR and generate a markdown comment with the split plan.""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +import tempfile +from pathlib import Path + + +def _run(cmd: list[str], check: bool = True) -> subprocess.CompletedProcess[str]: + result = subprocess.run(cmd, capture_output=True, text=True) + if check and result.returncode != 0: + print(f"Command failed: {' '.join(cmd)}", file=sys.stderr) + print(result.stderr, file=sys.stderr) + sys.exit(1) + return result + + +def _set_output(name: str, value: str) -> None: + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + f.write(f"{name}={value}\n") + + +def _skip(reason: str) -> None: + print(reason) + _set_output("total_groups", "1") + _set_output("objective", "0") + _set_output("should_split", "false") + + +def _md_escape(s: str) -> str: + return s.replace("|", "\\|") + + +def _parse_int_env(name: str, default: int) -> int: + raw = os.environ.get(name, str(default)) + try: + return int(raw) + except ValueError: + print(f"Error: {name} must be an integer, got '{raw}'.", file=sys.stderr) + sys.exit(1) + + +def main() -> None: + max_loc = _parse_int_env("MAX_LOC", 400) + min_loc_raw = os.environ.get("MIN_LOC", "") + strategy = os.environ.get("PARTITION_STRATEGY", "graph") + priority = os.environ.get("PRIORITY", "orthogonal") + threshold = _parse_int_env("THRESHOLD_GROUPS", 2) + pr_number = os.environ.get("PR_NUMBER", "") + base_branch = os.environ["BASE_BRANCH"] + head_branch = os.environ["HEAD_BRANCH"] + + # Fetch refs — use refs/pull/{n}/head for fork compatibility + _run(["git", "fetch", "origin", base_branch]) + if pr_number: + pr_ref = f"refs/pull/{pr_number}/head" + local_head = f"pr-split/head-{pr_number}" + _run(["git", "fetch", "origin", f"{pr_ref}:{local_head}"]) + else: + _run(["git", "fetch", "origin", head_branch]) + local_head = f"origin/{head_branch}" + + # Compute diff stats + result = _run(["git", "diff", "--numstat", f"origin/{base_branch}...{local_head}"]) + + total_added = 0 + total_removed = 0 + file_count = 0 + for line in result.stdout.strip().splitlines(): + parts = line.split("\t") + if len(parts) >= 3: + added = int(parts[0]) if parts[0] != "-" else 0 + removed = int(parts[1]) if parts[1] != "-" else 0 + total_added += added + total_removed += removed + file_count += 1 + + total_loc = total_added + total_removed + _set_output("total_loc", str(total_loc)) + + if total_loc <= max_loc: + _skip(f"PR has {total_loc} LOC — under the {max_loc} threshold, no split needed.") + return + + # Create local branch refs for pr-split + _run(["git", "branch", "-f", base_branch, f"origin/{base_branch}"]) + _run(["git", "branch", "-f", head_branch, local_head]) + + # Run pr-split in dry-run mode + cmd = [ + "pr-split", "split", head_branch, + "--base", base_branch, + "--partition-strategy", strategy, + "--priority", priority, + "--max-loc", str(max_loc), + "--dry-run", + ] + if min_loc_raw: + cmd.extend(["--min-loc", min_loc_raw]) + + result = subprocess.run(cmd, capture_output=True, text=True, input="done\n") + if result.returncode != 0: + print(f"pr-split failed:\n{result.stderr}", file=sys.stderr) + _skip("pr-split failed to generate a plan.") + return + + plan_path = ".pr-split/plan.json" + if not os.path.exists(plan_path): + _skip("No plan file generated.") + return + + with open(plan_path) as f: + plan = json.load(f) + + groups = plan.get("groups", []) + total_groups = len(groups) + + max_group_loc = max((g["estimated_loc"] for g in groups), default=0) + overflow = sum(max(0, g["estimated_loc"] - max_loc) for g in groups) + file_groups: dict[str, set[str]] = {} + for g in groups: + for a in g.get("assignments", []): + file_groups.setdefault(a["file_path"], set()).add(g["id"]) + file_scatter = sum(max(0, len(gids) - 1) for gids in file_groups.values()) + + objective = overflow * 1000 + file_scatter * 50 + total_groups + should_split = total_groups >= threshold + + _set_output("total_groups", str(total_groups)) + _set_output("objective", str(objective)) + _set_output("should_split", str(should_split).lower()) + + print(f"PR: {total_loc} LOC across {file_count} files") + print(f"Split plan: {total_groups} groups, objective={objective}") + print(f"Should split: {should_split}") + + # Generate markdown comment + lines = [ + "", + "## pr-split analysis", + "", + "| Metric | Value |", + "|--------|-------|", + f"| Total LOC | {total_loc:,} |", + f"| Files changed | {file_count} |", + f"| Suggested groups | {total_groups} |", + f"| Largest group | {max_group_loc:,} LOC |", + f"| LOC overflow | {overflow:,} |", + f"| File scatter | {file_scatter} |", + "", + ] + + if should_split: + lines.append( + f"This PR has **{total_loc:,} LOC** and could be split into " + f"**{total_groups} smaller PRs**:" + ) + lines.append("") + lines.append("| Group | Title | Diff | Depends On | Files |") + lines.append("|-------|-------|------|------------|-------|") + for g in groups: + files = ", ".join( + f"`{_md_escape(a['file_path'])}`" + for a in g.get("assignments", []) + ) + deps = ", ".join(g.get("depends_on", [])) or "—" + diff_str = ( + f"+{g.get('estimated_added', 0)}/-{g.get('estimated_removed', 0)}" + ) + title = _md_escape(g["title"]) + gid = _md_escape(g["id"]) + lines.append(f"| {gid} | {title} | {diff_str} | {deps} | {files} |") + lines.append("") + lines.append( + "*Run `pr-split split` locally to create these sub-PRs, " + "or adjust `--max-loc` to change the target size.*" + ) + else: + lines.append("This PR is within acceptable size limits.") + + comment = "\n".join(lines) + tmp_dir = os.environ.get("RUNNER_TEMP", tempfile.gettempdir()) + comment_path = Path(tmp_dir) / "pr-split-comment.md" + comment_path.write_text(comment) + _set_output("comment_path", str(comment_path)) + + +if __name__ == "__main__": + main()