diff --git a/AGENTS.md b/AGENTS.md index f76e2ecb7..0760c585f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -33,6 +33,26 @@ the Ruby VM to the Rust crate logic. - `lib`: The rest of the Ruby code - `test`: Ruby test files +### MCP launcher + +`exe/rubydex_mcp` is a thin Ruby wrapper that boots a Bundler context before `exec`ing the precompiled +`rubydex_mcp` Rust binary. Its only job is to discover index roots that require Ruby/Bundler context — the +Rust server still performs the actual indexing pass. + +`lib/rubydex/mcp_server_bridge.rb` (`Rubydex::MCPServerBridge`) holds the launcher logic: + +- It is launcher-only. **Do not require it from `lib/rubydex.rb`** — the main Ruby API owns graph construction + through `Rubydex::Graph`. +- `setup_bundler_context` activates the workspace's Bundler environment. It mutates `ENV["BUNDLE_GEMFILE"]` + (only if unset), which is acceptable in the launcher since it execs immediately, but long-lived callers + (e.g. tests) must save and restore that variable. +- `compute_index_paths` returns the workspace path(s) plus every Bundler dependency `require_path`, so the + Rust server indexes both the project and its gem dependencies. Only the first argv entry drives the Bundler + context, but all argv entries become index roots. +- Index roots may overlap (a dependency nested under the workspace, or the workspace's own gemspec resolving + back to the root). This is harmless: the graph keys documents by `UriId` and overwrites on re-index, so a + doubly-discovered file costs only wasted indexing work, never duplicate declarations. + ### Naming Conventions The C extension uses prefixed function names to distinguish between abstraction layers: diff --git a/exe/rubydex_mcp b/exe/rubydex_mcp index bca33de47..cf928ec32 100755 --- a/exe/rubydex_mcp +++ b/exe/rubydex_mcp @@ -1,11 +1,11 @@ #!/usr/bin/env ruby # frozen_string_literal: true -require "rbconfig" +$LOAD_PATH.unshift(File.expand_path("../lib", __dir__)) -host_os = RbConfig::CONFIG.fetch("host_os") -executable = host_os.match?(/mswin|mingw|cygwin/) ? "rubydex_mcp.exe" : "rubydex_mcp" -binary = File.expand_path("../lib/rubydex/bin/#{executable}", __dir__) +require "rubydex/mcp_server_bridge" + +binary = Rubydex::MCPServerBridge.binary_path unless File.executable?(binary) abort(<<~MESSAGE.chomp) @@ -14,4 +14,13 @@ unless File.executable?(binary) MESSAGE end -exec(binary, *ARGV) +args = if Rubydex::MCPServerBridge.passthrough?(ARGV) + ARGV +elsif Rubydex::MCPServerBridge.setup_bundler_context(ARGV.first || Dir.pwd) + # The wrapper only discovers paths that require Ruby/Bundler context. Rust still performs the indexing pass. + Rubydex::MCPServerBridge.compute_index_paths(ARGV) +else + ARGV.empty? ? [Dir.pwd] : ARGV +end + +exec(binary, *args) diff --git a/lib/rubydex/mcp_server_bridge.rb b/lib/rubydex/mcp_server_bridge.rb new file mode 100644 index 000000000..aec4b889a --- /dev/null +++ b/lib/rubydex/mcp_server_bridge.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +require "rbconfig" + +module Rubydex + # Launcher-only bridge for `exe/rubydex_mcp`. + # + # Do not require this file from `lib/rubydex.rb`. The main Ruby API owns graph construction through + # `Rubydex::Graph`; this file exists only so the executable wrapper can inspect Bundler before it execs the Rust MCP + # server. The Rust server still performs the indexing pass. + module MCPServerBridge + PASSTHROUGH_ARGUMENTS = ["-h", "--help", "-V", "--version"].freeze + + extend self + + def executable_name + host_os = RbConfig::CONFIG.fetch("host_os") + host_os.match?(/mswin|mingw|cygwin/) ? "rubydex_mcp.exe" : "rubydex_mcp" + end + + def binary_path + File.expand_path("bin/#{executable_name}", __dir__) + end + + def passthrough?(argv) + argv.any? { |arg| PASSTHROUGH_ARGUMENTS.include?(arg) } + end + + # Activates the workspace's Bundler environment so dependency gems can be discovered. + # + # Side effect: sets ENV["BUNDLE_GEMFILE"] (only if unset) to the workspace Gemfile, since + # `bundler/setup` resolves against that variable. This permanently mutates process env — acceptable + # in the launcher, which execs the Rust binary immediately afterwards, but callers in long-lived + # processes (e.g. tests) should save and restore ENV["BUNDLE_GEMFILE"] themselves. + def setup_bundler_context(workspace_path) + root = File.expand_path(workspace_path) + root = File.dirname(root) if File.file?(root) + + gemfile = File.join(root, "Gemfile") + ENV["BUNDLE_GEMFILE"] ||= gemfile if File.file?(gemfile) + + require "bundler/setup" + require "bundler" + + true + rescue LoadError, Bundler::BundlerError => e + warn("Warning: failed to load Bundler context: #{e.message}") + false + end + + # Builds the full set of index roots: the workspace path(s) from argv plus every Bundler + # dependency require_path. + # + # Contract note: only the first argv entry drives the Bundler context (see the call to + # `setup_bundler_context(ARGV.first || Dir.pwd)` in `exe/rubydex_mcp`), but ALL argv entries + # become index roots here. So dependency paths are resolved relative to the first path's + # Gemfile, while every requested path is still indexed. + def compute_index_paths(argv) + paths = argv.empty? ? [Dir.pwd] : argv + paths = paths.map { |path| File.expand_path(path) } + + paths.concat(dependency_index_paths) + paths.uniq + end + + def dependency_index_paths + Bundler.load.specs.flat_map do |spec| + spec.require_paths.filter_map do |path| + next if File.absolute_path?(path) + + full_path = File.join(spec.full_gem_path, path) + full_path if File.directory?(full_path) + end + end.uniq + rescue StandardError => e + warn("Warning: failed to collect Bundler dependency paths: #{e.message}") + [] + end + end +end diff --git a/rust/rubydex-mcp/src/main.rs b/rust/rubydex-mcp/src/main.rs index 258e2b7df..d557793c8 100644 --- a/rust/rubydex-mcp/src/main.rs +++ b/rust/rubydex-mcp/src/main.rs @@ -10,27 +10,24 @@ mod tools; version )] struct Args { - #[arg(value_name = "PATH", default_value = ".")] - path: String, + #[arg(value_name = "PATH")] + paths: Vec, } fn main() { let args = Args::parse(); - - let root = match std::fs::canonicalize(&args.path) { - Ok(p) => p - .into_os_string() - .into_string() - .expect("Project path is not valid UTF-8"), - Err(e) => { - eprintln!("Warning: failed to canonicalize '{}': {e}", args.path); - args.path - } + let paths = if args.paths.is_empty() { + vec![".".to_string()] + } else { + args.paths }; + let paths: Vec = paths.into_iter().map(canonicalize_path).collect(); + let root = paths.first().expect("expected at least one path").clone(); + // Create the server and start indexing in the background. let server = server::RubydexServer::new(root.clone()); - server.spawn_indexer(root); + server.spawn_indexer(paths); // Serve MCP over stdio immediately while indexing runs. // We need to do this because Claude Code's default MCP server timeout is 30 seconds, @@ -46,3 +43,16 @@ fn main() { std::process::exit(1); } } + +fn canonicalize_path(path: String) -> String { + match std::fs::canonicalize(&path) { + Ok(p) => p.into_os_string().into_string().unwrap_or_else(|_| { + eprintln!("Warning: canonicalized path for '{path}' is not valid UTF-8, using original"); + path + }), + Err(e) => { + eprintln!("Warning: failed to canonicalize '{path}': {e}"); + path + } + } +} diff --git a/rust/rubydex-mcp/src/server.rs b/rust/rubydex-mcp/src/server.rs index 4d813d6d0..d59c9cb8a 100644 --- a/rust/rubydex-mcp/src/server.rs +++ b/rust/rubydex-mcp/src/server.rs @@ -44,11 +44,15 @@ impl RubydexServer { } /// Spawns a background thread that indexes the codebase and marks the server as ready. - pub fn spawn_indexer(&self, path: String) { + pub fn spawn_indexer(&self, paths: Vec) { let state = Arc::clone(&self.state); std::thread::spawn(move || { let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { - let (file_paths, errors) = rubydex::listing::collect_file_paths(vec![path], &HashSet::new()); + // Paths may overlap (e.g. a dependency's require_path nested under the workspace, or the + // workspace's own gemspec resolving back to the root). This is harmless: the graph keys + // documents by UriId and overwrites on re-index, so a file discovered twice costs only + // wasted indexing work, never duplicate declarations. + let (file_paths, errors) = rubydex::listing::collect_file_paths(paths, &HashSet::new()); for error in &errors { eprintln!("Listing error: {error}"); } diff --git a/rust/rubydex-mcp/tests/mcp.rs b/rust/rubydex-mcp/tests/mcp.rs index cac8beb14..7a99a9d3d 100644 --- a/rust/rubydex-mcp/tests/mcp.rs +++ b/rust/rubydex-mcp/tests/mcp.rs @@ -300,3 +300,94 @@ fn mcp_server_e2e() { let _ = child.wait().unwrap(); }); } + +#[test] +fn mcp_server_indexes_multiple_roots() { + with_context(|context| { + context.write("workspace/app.rb", "class WorkspaceThing; end"); + context.write("dependency/lib/dependency_thing.rb", "class DependencyThing; end"); + + let mut child = Command::cargo_bin("rubydex_mcp") + .unwrap() + .args([ + context.absolute_path_to("workspace").to_str().unwrap(), + context.absolute_path_to("dependency/lib").to_str().unwrap(), + ]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + let mut stdin = child.stdin.take().unwrap(); + let stdout = child.stdout.take().unwrap(); + let mut reader = BufReader::new(stdout); + + initialize_session(&mut stdin, &mut reader); + + let mut request_id = 3; + wait_for_indexing_to_complete(&mut stdin, &mut reader, &mut request_id); + + let search_response = call_next_tool( + &mut stdin, + &mut reader, + &mut request_id, + "search_declarations", + &json!({ "query": "DependencyThing" }), + ); + let results = search_response["results"].as_array().unwrap(); + let result_names = names_from_entries(results); + assert_has_name(&result_names, "DependencyThing", "search results"); + + drop(stdin); + let _ = child.wait().unwrap(); + }); +} + +#[test] +fn mcp_server_dedups_overlapping_roots() { + with_context(|context| { + // `lib/thing.rb` lives under the workspace root. Passing both the root and the nested + // `lib` directory makes the file discoverable through two roots, mirroring how a + // dependency's require_path (or the workspace's own gemspec) can nest under the root. + context.write("lib/thing.rb", "class OverlappingThing; end"); + + let mut child = Command::cargo_bin("rubydex_mcp") + .unwrap() + .args([ + context.absolute_path().to_str().unwrap(), + context.absolute_path_to("lib").to_str().unwrap(), + ]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + let mut stdin = child.stdin.take().unwrap(); + let stdout = child.stdout.take().unwrap(); + let mut reader = BufReader::new(stdout); + + initialize_session(&mut stdin, &mut reader); + + let mut request_id = 3; + wait_for_indexing_to_complete(&mut stdin, &mut reader, &mut request_id); + + let decl = call_next_tool( + &mut stdin, + &mut reader, + &mut request_id, + "get_declaration", + &json!({ "name": "OverlappingThing" }), + ); + let definitions = decl["definitions"].as_array().unwrap(); + assert_eq!( + definitions.len(), + 1, + "Expected the doubly-discovered file to yield exactly one definition, got: {decl}" + ); + + drop(stdin); + let _ = child.wait().unwrap(); + }); +} diff --git a/test/mcp_server_bridge_test.rb b/test/mcp_server_bridge_test.rb new file mode 100644 index 000000000..e1cefe14c --- /dev/null +++ b/test/mcp_server_bridge_test.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +require "rubydex/mcp_server_bridge" + +class MCPServerBridgeTest < Minitest::Test + def setup + @original_bundle_gemfile = ENV.fetch("BUNDLE_GEMFILE", nil) + end + + def teardown + if @original_bundle_gemfile.nil? + ENV.delete("BUNDLE_GEMFILE") + else + ENV["BUNDLE_GEMFILE"] = @original_bundle_gemfile + end + end + + def test_compute_index_paths_include_bundler_dependency_require_paths + Rubydex::MCPServerBridge.setup_bundler_context(Dir.pwd) + + paths = Rubydex::MCPServerBridge.compute_index_paths([Dir.pwd]) + spec = Bundler.load.specs.find { |loaded_spec| loaded_spec.name == "rake" } + expected_path = File.join(spec.full_gem_path, "lib") + + assert_includes(paths, Dir.pwd) + assert_includes(paths, expected_path) + end + + def test_passthrough_detects_help_and_version_flags + assert(Rubydex::MCPServerBridge.passthrough?(["-h"])) + assert(Rubydex::MCPServerBridge.passthrough?(["--help"])) + assert(Rubydex::MCPServerBridge.passthrough?(["-V"])) + assert(Rubydex::MCPServerBridge.passthrough?(["--version"])) + assert(Rubydex::MCPServerBridge.passthrough?(["/some/path", "--help"])) + end + + def test_passthrough_is_false_for_paths + refute(Rubydex::MCPServerBridge.passthrough?([])) + refute(Rubydex::MCPServerBridge.passthrough?(["/some/path"])) + refute(Rubydex::MCPServerBridge.passthrough?(["/some/path", "/another/path"])) + end + + def test_compute_index_paths_defaults_to_cwd_when_argv_empty + Rubydex::MCPServerBridge.setup_bundler_context(Dir.pwd) + + paths = Rubydex::MCPServerBridge.compute_index_paths([]) + + assert_includes(paths, Dir.pwd) + end +end