From 9590047f32cad6b5024bd1bc10f904742199818a Mon Sep 17 00:00:00 2001 From: Brandon Ferguson Date: Thu, 14 May 2026 19:21:38 +0200 Subject: [PATCH 1/2] Exclude default ignored directories from MCP indexing The MCP server walked the entire workspace root with an empty exclusion set, so directories like .claude, .git, and node_modules were indexed and surfaced in tool results. The Ruby gem already skips these via Graph::IGNORED_DIRECTORIES, but the MCP server bypasses the Ruby layer and calls collect_file_paths directly. Mirror that list in the MCP server and pass it to collect_file_paths so spawn_indexer skips the same directories. --- rust/rubydex-mcp/src/server.rs | 35 +++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/rust/rubydex-mcp/src/server.rs b/rust/rubydex-mcp/src/server.rs index f40bc1637..deacacb93 100644 --- a/rust/rubydex-mcp/src/server.rs +++ b/rust/rubydex-mcp/src/server.rs @@ -20,6 +20,20 @@ use rubydex::model::{ }; use url::Url; +/// Directory names that are never worth descending into while indexing a workspace. +/// Mirrors `Rubydex::Graph::IGNORED_DIRECTORIES` in the Ruby gem. +const IGNORED_DIRECTORIES: &[&str] = &[ + ".bundle", + ".claude", + ".git", + ".github", + ".ruby-lsp", + ".vscode", + "log", + "node_modules", + "tmp", +]; + struct ServerState { graph: Option, error: Option, @@ -46,9 +60,10 @@ impl RubydexServer { /// Spawns a background thread that indexes the codebase and marks the server as ready. pub fn spawn_indexer(&self, path: String) { let state = Arc::clone(&self.state); + let excluded = default_excluded_paths(&path); std::thread::spawn(move || { let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { - let (file_paths, errors) = rubydex::listing::collect_file_paths(vec![path], &HashSet::new()); + let (file_paths, errors) = rubydex::listing::collect_file_paths(vec![path], &excluded); for error in &errors { eprintln!("Listing error: {error}"); } @@ -95,6 +110,13 @@ impl RubydexServer { } } +/// Builds the set of paths to skip during file discovery: each `IGNORED_DIRECTORIES` +/// entry resolved against the workspace root. +fn default_excluded_paths(root: &str) -> HashSet { + let root = Path::new(root); + IGNORED_DIRECTORIES.iter().map(|dir| root.join(dir)).collect() +} + /// Returns a structured JSON error string with a machine-readable type, message, and suggestion. fn error_json(error_type: &str, message: &str, suggestion: &str) -> String { serde_json::to_string(&serde_json::json!({ @@ -1141,4 +1163,15 @@ mod tests { } assert_error(&server.codebase_stats(), "indexing_failed"); } + + #[test] + fn default_excluded_paths_resolves_ignored_dirs_against_root() { + let excluded = default_excluded_paths("/workspace"); + + assert_eq!(excluded.len(), IGNORED_DIRECTORIES.len()); + assert!(excluded.contains(&PathBuf::from("/workspace/.claude"))); + assert!(excluded.contains(&PathBuf::from("/workspace/node_modules"))); + assert!(excluded.contains(&PathBuf::from("/workspace/tmp"))); + assert!(!excluded.contains(&PathBuf::from("/workspace/lib"))); + } } From d7ddbe397e61bdad0716a18ffd4bc0a7c294b466 Mon Sep 17 00:00:00 2001 From: Brandon Ferguson Date: Thu, 14 May 2026 23:25:35 +0200 Subject: [PATCH 2/2] Add e2e test for MCP ignored directory exclusion Boots the MCP server against a workspace with a Ruby file under .claude/worktrees and asserts it is neither counted in codebase_stats nor returned by search_declarations, while a top-level file still is. --- rust/rubydex-mcp/tests/mcp.rs | 55 +++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/rust/rubydex-mcp/tests/mcp.rs b/rust/rubydex-mcp/tests/mcp.rs index cac8beb14..0611e2201 100644 --- a/rust/rubydex-mcp/tests/mcp.rs +++ b/rust/rubydex-mcp/tests/mcp.rs @@ -300,3 +300,58 @@ fn mcp_server_e2e() { let _ = child.wait().unwrap(); }); } + +#[test] +fn mcp_server_skips_ignored_directories() { + with_context(|context| { + context.write("app.rb", "class IndexedClass; end"); + // Lives under an ignored directory and must never be indexed. + context.write(".claude/worktrees/scratch.rb", "class IgnoredClass; end"); + + let mut child = Command::cargo_bin("rubydex_mcp") + .unwrap() + .args([context.absolute_path().to_str().unwrap()]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + let mut stdin = child.stdin.take().unwrap(); + let stdout = child.stdout.take().unwrap(); + let mut reader = BufReader::new(stdout); + + initialize_session(&mut stdin, &mut reader); + + let mut request_id = 3; + let stats = wait_for_indexing_to_complete(&mut stdin, &mut reader, &mut request_id); + // app.rb plus the synthetic core document; scratch.rb under .claude is excluded. + assert_eq!(stats["files"], 2, "Expected only app.rb to be indexed"); + + let search_response = call_next_tool( + &mut stdin, + &mut reader, + &mut request_id, + "search_declarations", + &json!({ "query": "IgnoredClass" }), + ); + assert_eq!( + search_response["total"].as_u64().unwrap(), + 0, + "Expected no results for a declaration under .claude, got: {search_response}" + ); + + let indexed_response = call_next_tool( + &mut stdin, + &mut reader, + &mut request_id, + "search_declarations", + &json!({ "query": "IndexedClass" }), + ); + let indexed_names = names_from_entries(indexed_response["results"].as_array().unwrap()); + assert_has_name(&indexed_names, "IndexedClass", "search results"); + + drop(stdin); + let _ = child.wait().unwrap(); + }); +}