diff --git a/.claude/commands/open-code-review.md b/.claude/commands/open-code-review.md index a58cb588..f7339cbb 100644 --- a/.claude/commands/open-code-review.md +++ b/.claude/commands/open-code-review.md @@ -15,6 +15,7 @@ ocr review --audience agent [user-args] - If the user provides `--commit` or `--c`: pass through as-is. - If the user provides `--from` and `--to`: pass through as-is. - (Optional) Provide `--background "requirement context"` to review whether the requirements are correctly implemented. +- (Optional) Provide `--background-file ./requirements.md` to load the same context from a Markdown file (sanitised and limited to 8000 characters). Combined with `--background` the inline value is given first. - Capture full stdout. Set a 5-minute timeout. - If the `ocr` command is not found, install it by running `npm i -g @alibaba-group/open-code-review`. diff --git a/README.md b/README.md index 770df031..ed77428c 100644 --- a/README.md +++ b/README.md @@ -369,6 +369,7 @@ See the [`examples/`](./examples/) directory for integration examples: | `--timeout` | — | `10` | Concurrent task timeout in minutes | | `--audience` | — | `human` | `human` (show progress) or `agent` (summary only) | | `--background` | `-b` | — | Optional requirement/business context for the review; auto-filled from commit message when using `--commit` | +| `--background-file` | `-B` | — | Optional requirement/business context from a Markdown file; Combined with `--background` the inline value is given first | | `--model` | — | — | Select or override the LLM model for this review | | `--rule` | — | — | Path to custom JSON review rules | | `--max-tools` | — | built-in | Max tool call rounds per file; only takes effect when greater than template default | @@ -430,6 +431,12 @@ ocr review --commit abc123 --model claude-sonnet-4-6 # Provide requirement context for more targeted review ocr review --background "Adding rate limiting to the login API" +# Provide requirement context from a Markdown file +ocr review --background-file ./docs/my_business_context.md + +# Combine inline context with a local context file (both are used) +ocr review --background "Focus on auth" --background-file ./docs/my_business_context.md + # Use custom review rules ocr review --rule /path/to/my-rules.json diff --git a/cmd/opencodereview/background_file.go b/cmd/opencodereview/background_file.go new file mode 100644 index 00000000..557fb626 --- /dev/null +++ b/cmd/opencodereview/background_file.go @@ -0,0 +1,130 @@ +package main + +import ( + "fmt" + "os" + "regexp" + "strings" + "unicode" +) + +const ( + backgroundSoftLimit = 2000 + backgroundHardLimit = 8000 + backgroundOpenTag = "" + backgroundCloseTag = "" + maxBackgroundFileBytes = 1 << 20 // 1 MB +) + +var multiNewline = regexp.MustCompile(`\n{3,}`) + +// mergeBackground combines the inline --background value (or an auto-populated +// commit message) with the content read from --background-file, separated by a +// blank line. The inline value is sanitised the same way as the file content so +// both portions are cleaned consistently. The file content is already wrapped +// and sanitised by loadBackgroundFile. +func mergeBackground(inline, fromFile string) string { + inline = sanitizeMarkdown(inline) + switch { + case inline == "": + return fromFile + case fromFile == "": + return inline + default: + return inline + "\n\n" + fromFile + } +} + +func loadBackgroundFile(path string) (string, error) { + info, err := os.Stat(path) + if err != nil { + return "", fmt.Errorf("read background file %q: %w", path, err) + } + if info.IsDir() { + return "", fmt.Errorf("background file %q is a directory, not a file", path) + } + if info.Size() > maxBackgroundFileBytes { + return "", fmt.Errorf( + "background file %q is %d bytes, exceeding the maximum of %d bytes; please provide a smaller file", + path, info.Size(), maxBackgroundFileBytes, + ) + } + + raw, err := os.ReadFile(path) + if err != nil { + return "", fmt.Errorf("read background file %q: %w", path, err) + } + + cleaned := sanitizeMarkdown(string(raw)) + if cleaned == "" { + return "", fmt.Errorf("background file %q is empty after sanitisation", path) + } + + if strings.Contains(cleaned, backgroundOpenTag) || strings.Contains(cleaned, backgroundCloseTag) { + return "", fmt.Errorf( + "background file %q must not contain the reserved delimiters %q or %q", + path, backgroundOpenTag, backgroundCloseTag, + ) + } + + wrapped := backgroundOpenTag + "\n" + cleaned + "\n" + backgroundCloseTag + + if n := len([]rune(wrapped)); n > backgroundHardLimit { + return "", fmt.Errorf( + "background content is %d characters, exceeding the hard limit of %d (aborting)", + n, backgroundHardLimit, + ) + } else if n > backgroundSoftLimit { + fmt.Fprintf(os.Stderr, + "[ocr] --background-file content is %d characters, exceeding the recommended %d (continuing but review quality might be impacted)\n", + n, backgroundSoftLimit, + ) + } + + return wrapped, nil +} + +func sanitizeMarkdown(s string) string { + var b strings.Builder + b.Grow(len(s)) + + for _, r := range s { + switch r { + case '\n', '\t': + b.WriteRune(r) + continue + case '\r': + continue + } + if isForbiddenChar(r) { + continue + } + b.WriteRune(r) + } + + collapsed := multiNewline.ReplaceAllString(b.String(), "\n\n") + return strings.TrimSpace(collapsed) +} + +func isForbiddenChar(r rune) bool { + switch { + case r <= 0x1F: // C0 control characters (includes NUL) + return true + case r >= 0x7F && r <= 0x9F: // DEL and C1 control characters + return true + } + + switch r { + case '\u200B', // zero-width space + '\u200C', // zero-width non-joiner + '\u200D', // zero-width joiner + '\u200E', // left-to-right mark + '\u200F', // right-to-left mark + '\u2060', // word joiner + '\u00AD', // soft hyphen + '\uFEFF': // BOM / zero-width no-break space + return true + } + + return unicode.Is(unicode.Cf, r) +} diff --git a/cmd/opencodereview/background_file_test.go b/cmd/opencodereview/background_file_test.go new file mode 100644 index 00000000..11a552ec --- /dev/null +++ b/cmd/opencodereview/background_file_test.go @@ -0,0 +1,325 @@ +package main + +import ( + "os" + "os/exec" + "path/filepath" + "strings" + "testing" +) + +// writeTempFile writes content to a temporary file and returns its path. +func writeTempFile(t *testing.T, content string) string { + t.Helper() + path := filepath.Join(t.TempDir(), "background.md") + if err := os.WriteFile(path, []byte(content), 0o600); err != nil { + t.Fatalf("write temp file: %v", err) + } + return path +} + +func TestLoadBackgroundFileNotFound(t *testing.T) { + _, err := loadBackgroundFile(filepath.Join(t.TempDir(), "does-not-exist.md")) + if err == nil { + t.Fatal("expected an error for a missing file, got nil") + } +} + +func TestLoadBackgroundFileEmpty(t *testing.T) { + cases := map[string]string{ + "zero bytes": "", + "whitespace only": " \n\t \n ", + "invisible only": "\u200B\u200E\u00AD\uFEFF", + } + for name, content := range cases { + t.Run(name, func(t *testing.T) { + _, err := loadBackgroundFile(writeTempFile(t, content)) + if err == nil { + t.Fatal("expected an error for empty-after-sanitisation content, got nil") + } + if !strings.Contains(err.Error(), "empty") { + t.Errorf("error = %q, want it to mention 'empty'", err) + } + }) + } +} + +func TestLoadBackgroundFileControlCharRemoval(t *testing.T) { + // Mix in NUL, bell, DEL, a C1 control char, zero-width space, BOM and an + // LTR mark around legitimate text. + content := "Hello\x00\x07world\x7f\u0085!\u200B\uFEFF\u200E" + got, err := loadBackgroundFile(writeTempFile(t, content)) + if err != nil { + t.Fatalf("loadBackgroundFile: %v", err) + } + for _, bad := range []string{"\x00", "\x07", "\x7f", "\u0085", "\u200B", "\uFEFF", "\u200E"} { + if strings.Contains(got, bad) { + t.Errorf("result still contains control/invisible char %q: %q", bad, got) + } + } + if !strings.Contains(got, "Helloworld!") { + t.Errorf("expected cleaned text to contain %q, got %q", "Helloworld!", got) + } +} + +func TestSanitizeMarkdownPreservesNewlinesAndTabs(t *testing.T) { + got := sanitizeMarkdown("line1\n\tindented\nline3") + want := "line1\n\tindented\nline3" + if got != want { + t.Errorf("sanitizeMarkdown = %q, want %q", got, want) + } +} + +func TestSanitizeMarkdownCollapsesNewlines(t *testing.T) { + got := sanitizeMarkdown("a\n\n\n\n\nb") + want := "a\n\nb" + if got != want { + t.Errorf("sanitizeMarkdown = %q, want %q", got, want) + } +} + +func TestSanitizeMarkdownNormalizesCRLF(t *testing.T) { + got := sanitizeMarkdown("a\r\nb\r\nc") + want := "a\nb\nc" + if got != want { + t.Errorf("sanitizeMarkdown = %q, want %q", got, want) + } +} + +func TestSanitizeMarkdownTrims(t *testing.T) { + got := sanitizeMarkdown(" \n hello \n ") + if got != "hello" { + t.Errorf("sanitizeMarkdown = %q, want %q", got, "hello") + } +} + +func TestLoadBackgroundFileDelimiters(t *testing.T) { + got, err := loadBackgroundFile(writeTempFile(t, "Some requirement context.")) + if err != nil { + t.Fatalf("loadBackgroundFile: %v", err) + } + if !strings.HasPrefix(got, backgroundOpenTag+"\n") { + t.Errorf("result missing opening delimiter: %q", got) + } + if !strings.HasSuffix(got, "\n"+backgroundCloseTag) { + t.Errorf("result missing closing delimiter: %q", got) + } + want := backgroundOpenTag + "\nSome requirement context.\n" + backgroundCloseTag + if got != want { + t.Errorf("result = %q, want %q", got, want) + } +} + +func TestLoadBackgroundFileRejectsReservedDelimiters(t *testing.T) { + for _, tag := range []string{backgroundOpenTag, backgroundCloseTag} { + t.Run(tag, func(t *testing.T) { + content := "Some context " + tag + " and more text." + _, err := loadBackgroundFile(writeTempFile(t, content)) + if err == nil { + t.Fatalf("expected an error for content containing %q, got nil", tag) + } + if !strings.Contains(err.Error(), "reserved delimiters") { + t.Errorf("error = %q, want it to mention 'reserved delimiters'", err) + } + }) + } +} + +func TestMergeBackgroundSanitizesInline(t *testing.T) { + t.Run("inline only", func(t *testing.T) { + // Control char, zero-width space and surrounding whitespace must be removed. + got := mergeBackground(" \x00Inline\u200B context ", "") + if got != "Inline context" { + t.Errorf("mergeBackground = %q, want %q", got, "Inline context") + } + }) + + t.Run("inline combined with file", func(t *testing.T) { + wrapped := backgroundOpenTag + "\nfrom file\n" + backgroundCloseTag + got := mergeBackground("\x07dirty\uFEFF inline\n\n\n\nend", wrapped) + if strings.ContainsRune(got, '\x07') || strings.ContainsRune(got, '\uFEFF') { + t.Errorf("inline portion was not sanitised: %q", got) + } + // Excess blank lines in the inline portion are collapsed to one. + if strings.Contains(got, "\n\n\n") { + t.Errorf("inline newlines were not collapsed: %q", got) + } + // The file portion is preserved intact. + if !strings.Contains(got, wrapped) { + t.Errorf("file portion was altered: %q", got) + } + }) +} + +func TestMergeBackground(t *testing.T) { + wrapped := backgroundOpenTag + "\nfrom file\n" + backgroundCloseTag + + t.Run("both present are combined", func(t *testing.T) { + got := mergeBackground("inline context", wrapped) + want := "inline context\n\n" + wrapped + if got != want { + t.Errorf("mergeBackground = %q, want %q", got, want) + } + // Both inputs must survive in the result. + if !strings.Contains(got, "inline context") || !strings.Contains(got, "from file") { + t.Errorf("merged background dropped one of the inputs: %q", got) + } + }) + + t.Run("inline only", func(t *testing.T) { + if got := mergeBackground("inline only", ""); got != "inline only" { + t.Errorf("mergeBackground = %q, want %q", got, "inline only") + } + }) + + t.Run("file only", func(t *testing.T) { + if got := mergeBackground("", wrapped); got != wrapped { + t.Errorf("mergeBackground = %q, want %q", got, wrapped) + } + }) +} + +func TestLoadBackgroundFileSoftLimit(t *testing.T) { + // Just above the soft limit but below the hard size limit: must succeed. + content := strings.Repeat("a", backgroundSoftLimit+100) + got, err := loadBackgroundFile(writeTempFile(t, content)) + if err != nil { + t.Fatalf("loadBackgroundFile: %v", err) + } + if !strings.Contains(got, content) { + t.Error("expected content to be preserved past the soft limit") + } +} + +func TestLoadBackgroundFileOversized(t *testing.T) { + // A file larger than maxBackgroundFileBytes must be rejected up front, + // before its content is read into memory. + content := strings.Repeat("a", maxBackgroundFileBytes+1) + _, err := loadBackgroundFile(writeTempFile(t, content)) + if err == nil { + t.Fatal("expected an error for an oversized file, got nil") + } + if !strings.Contains(err.Error(), "maximum") { + t.Errorf("error = %q, want it to mention the byte 'maximum'", err) + } +} + +func TestLoadBackgroundFileDirectory(t *testing.T) { + if _, err := loadBackgroundFile(t.TempDir()); err == nil { + t.Fatal("expected an error when the path is a directory, got nil") + } +} + +func TestLoadBackgroundFileHardLimit(t *testing.T) { + content := strings.Repeat("a", backgroundHardLimit+1) + _, err := loadBackgroundFile(writeTempFile(t, content)) + if err == nil { + t.Fatal("expected an error when exceeding the hard size limit, got nil") + } + if !strings.Contains(err.Error(), "hard limit") { + t.Errorf("error = %q, want it to mention 'hard limit'", err) + } +} + +func TestLoadBackgroundFileLengthCountedAfterWrapping(t *testing.T) { + // Content that fits under the hard size limit on its own but exceeds it once the + // delimiters are added must be rejected. + overhead := len(backgroundOpenTag) + len(backgroundCloseTag) + 2 // two newlines + content := strings.Repeat("a", backgroundHardLimit-overhead+1) + _, err := loadBackgroundFile(writeTempFile(t, content)) + if err == nil { + t.Fatal("expected hard-cap error once wrapping pushes length over the limit") + } +} + +func TestLoadBackgroundFileMultiByteRuneCount(t *testing.T) { + // Multi-byte runes must be counted as single characters, not bytes. + // A precomposed accented letter is one rune but two bytes; a string filling + // the hard size limit in runes (minus wrapping overhead) is ~2x the byte cap and + // must still be accepted. + overhead := len(backgroundOpenTag) + len(backgroundCloseTag) + 2 // two newlines + content := strings.Repeat("\u00E9", backgroundHardLimit-overhead) + got, err := loadBackgroundFile(writeTempFile(t, content)) + if err != nil { + t.Fatalf("loadBackgroundFile rejected content within the rune limit: %v", err) + } + if !strings.Contains(got, content) { + t.Error("expected multi-byte content to be preserved") + } +} + +// initRepoWithCommit creates a real git repository with a single commit whose +// message is `message`, and returns the repo directory and the commit hash. +func initRepoWithCommit(t *testing.T, message string) (string, string) { + t.Helper() + repo := t.TempDir() + run := func(args ...string) []byte { + cmd := exec.Command("git", args...) + cmd.Dir = repo + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("git %v failed: %v\n%s", args, err, out) + } + return out + } + run("init", "-q") + run("config", "user.email", "test@example.com") + run("config", "user.name", "Test") + run("config", "commit.gpgsign", "false") + if err := os.WriteFile(filepath.Join(repo, "file.txt"), []byte("hello\n"), 0o600); err != nil { + t.Fatalf("write file: %v", err) + } + run("add", ".") + run("commit", "-q", "-m", message) + hash := strings.TrimSpace(string(run("rev-parse", "HEAD"))) + return repo, hash +} + +// TestBackgroundFromCommitThenFile reproduces the resolution order used by +// runReview when --background-file is supplied but --background is not: the +// inline background is first auto-filled from the commit message, then the +// background file is appended. Both must end up in the final background. +func TestBackgroundFromCommitThenFile(t *testing.T) { + const commitMsg = "Implement rate limiting on login" + repo, hash := initRepoWithCommit(t, commitMsg) + + // Mirror runReview: --background empty + --commit set -> use commit message. + background := "" + msg, err := getCommitMessage(repo, hash) + if err != nil { + t.Fatalf("getCommitMessage: %v", err) + } + if msg != commitMsg { + t.Fatalf("commit message = %q, want %q", msg, commitMsg) + } + if background == "" { + background = msg + } + + // Then --background-file is loaded and merged in. + fileBg, err := loadBackgroundFile(writeTempFile(t, "Extra context from a file.")) + if err != nil { + t.Fatalf("loadBackgroundFile: %v", err) + } + background = mergeBackground(background, fileBg) + + // The commit message must come first, followed by the wrapped file content. + if !strings.HasPrefix(background, commitMsg+"\n\n") { + t.Errorf("expected commit message to lead the background, got %q", background) + } + if !strings.Contains(background, "Extra context from a file.") { + t.Errorf("expected file content to be appended, got %q", background) + } + if !strings.Contains(background, backgroundOpenTag) || !strings.Contains(background, backgroundCloseTag) { + t.Errorf("expected file content to keep its delimiters, got %q", background) + } +} + +func TestLoadBackgroundFilePerformance(t *testing.T) { + // Sanitising a large (near hard-cap) input should be fast. This guards + // against accidental quadratic behaviour in the cleaning routine. + content := strings.Repeat("word \u200B\t", 1000) // ~8k bytes with invisibles + for i := 0; i < 1000; i++ { + _ = sanitizeMarkdown(content) + } +} diff --git a/cmd/opencodereview/flags.go b/cmd/opencodereview/flags.go index 4432ceb1..2824ddc8 100644 --- a/cmd/opencodereview/flags.go +++ b/cmd/opencodereview/flags.go @@ -105,6 +105,7 @@ type reviewOptions struct { outputFormat string audience string // --audience: "human" (default) or "agent" background string // --background: optional requirement context + backgroundFile string // --background-file: path to a Markdown file used as background model string // --model: override resolved LLM model for this review concurrency int perFileTimeout int @@ -131,6 +132,7 @@ func parseReviewFlags(args []string) (reviewOptions, error) { a.IntVar(&opts.perFileTimeout, "timeout", 10, "concurrent task timeout in minutes") a.StringVar(&opts.audience, "audience", "human", "output audience: human (show progress) or agent (summary only)") a.StringVarP(&opts.background, "background", "b", "", "optional requirement/business context for the review") + a.StringVarP(&opts.backgroundFile, "background-file", "B", "", "optional requirement/business context from a Markdown file (combined with --background; inline value appears first when both are set)") a.StringVar(&opts.model, "model", "", "override LLM model for this review (e.g., claude-opus-4-6)") a.IntVar(&opts.maxTools, "max-tools", 0, "max tool call rounds per file (0 = template default; min 10)") a.IntVar(&opts.maxGitProcs, "max-git-procs", 16, "max concurrent git subprocesses") @@ -214,22 +216,28 @@ Examples: ocr review --preview ocr review -c abc123 -p + # Provide requirement/business context inline, from a Markdown file, or both + ocr review --background "Adding rate limiting to the login API" + ocr review --background-file ./docs/requirements.md + ocr review --background "Focus on auth" --background-file ./docs/requirements.md + Flags: - --audience string output audience: human (show progress) or agent (summary only) (default "human") - -b, --background string optional requirement/business context for the review - -c, --commit string single commit hash or tag to review (vs its parent) - -f, --format string output format: text or json (default "text") - --concurrency int max concurrent file reviews (default 8) - --max-git-procs int max concurrent git subprocesses (default 16) - --from string source ref to start diff from (e.g., 'main') - --max-tools int max tool call rounds per file (0 = template default; min 10) - --model string override LLM model for this review (e.g., claude-opus-4-6) - -p, --preview preview which files will be reviewed without running the LLM - --repo string root directory of the git repository (default: current dir) - --rule string path to JSON file with system review rules - --timeout int concurrent task timeout in minutes (default 10) - --to string target ref to end diff at (e.g., 'feature-branch') - --tools string path to JSON tools config file (default: embedded)`) + --audience string output audience: human (show progress) or agent (summary only) (default "human") + -b, --background string optional requirement/business context for the review + -B, --background-file string path to a Markdown file used as review background (combined with --background; inline value appears first when both are set) + -c, --commit string single commit hash or tag to review (vs its parent) + -f, --format string output format: text or json (default "text") + --concurrency int max concurrent file reviews (default 8) + --max-git-procs int max concurrent git subprocesses (default 16) + --from string source ref to start diff from (e.g., 'main') + --max-tools int max tool call rounds per file (0 = template default; min 10) + --model string override LLM model for this review (e.g., claude-opus-4-6) + -p, --preview preview which files will be reviewed without running the LLM + --repo string root directory of the git repository (default: current dir) + --rule string path to JSON file with system review rules + --timeout int concurrent task timeout in minutes (default 10) + --to string target ref to end diff at (e.g., 'feature-branch') + --tools string path to JSON tools config file (default: embedded)`) } // --- config subcommand --- diff --git a/cmd/opencodereview/flags_test.go b/cmd/opencodereview/flags_test.go index 617dfa44..d1149041 100644 --- a/cmd/opencodereview/flags_test.go +++ b/cmd/opencodereview/flags_test.go @@ -2,6 +2,20 @@ package main import "testing" +func TestParseReviewFlagsBackgroundFile(t *testing.T) { + for _, flag := range []string{"--background-file", "-B"} { + t.Run(flag, func(t *testing.T) { + opts, err := parseReviewFlags([]string{flag, "./docs/req.md"}) + if err != nil { + t.Fatalf("parseReviewFlags: %v", err) + } + if opts.backgroundFile != "./docs/req.md" { + t.Errorf("backgroundFile = %q, want %q", opts.backgroundFile, "./docs/req.md") + } + }) + } +} + func TestParseReviewFlagsModelOverride(t *testing.T) { opts, err := parseReviewFlags([]string{"--model", "claude-opus-4-6"}) if err != nil { diff --git a/cmd/opencodereview/review_cmd.go b/cmd/opencodereview/review_cmd.go index 91a74912..aeb8382c 100644 --- a/cmd/opencodereview/review_cmd.go +++ b/cmd/opencodereview/review_cmd.go @@ -42,6 +42,16 @@ func runReview(args []string) error { } } + var fileBackground string + if opts.backgroundFile != "" { + fileBackground, err = loadBackgroundFile(opts.backgroundFile) + if err != nil { + return err + } + } + + opts.background = mergeBackground(opts.background, fileBackground) + if opts.preview { return runPreview(cc, opts) } diff --git a/plugins/open-code-review/commands/review.md b/plugins/open-code-review/commands/review.md index 82a1e997..1e2e19a3 100644 --- a/plugins/open-code-review/commands/review.md +++ b/plugins/open-code-review/commands/review.md @@ -17,6 +17,7 @@ ocr review --audience agent [user-args] - If the user provides `--commit` or `--c`: pass through as-is. - If the user provides `--from` and `--to`: pass through as-is. - (Optional) Provide `--background "requirement context"` to review whether the requirements are correctly implemented. +- (Optional) Provide `--background-file ./requirements.md` to load the same context from a Markdown file (sanitised and limited to 8000 characters). Combined with `--background` the inline value is given first. - Capture full stdout. Set a 5-minute timeout. - If the `ocr` command is not found, install it by running `npm i -g @alibaba-group/open-code-review`. @@ -32,4 +33,4 @@ Silently discard low-confidence comments. Display the remaining comments. ### Step 3: Fix -Automatically fix issues and suggestions that are worth adopting. \ No newline at end of file +Automatically fix issues and suggestions that are worth adopting.