diff --git a/exe/rdx b/exe/rdx index 38143b2ef..8954d86b3 100755 --- a/exe/rdx +++ b/exe/rdx @@ -5,40 +5,108 @@ $LOAD_PATH.unshift(File.expand_path("../lib", __dir__)) require "optparse" -options = {} +USAGE = <<~TEXT + Usage: rdx [options] -OptionParser.new do |parser| - parser.on("--version", "Print the gem's version") do - require "rubydex/version" - puts "v#{Rubydex::VERSION}" - exit - end + Commands: + query Run a Cypher query against the workspace graph and print the result + schema Describe the queryable Cypher schema (labels, relationships, properties) + console Open an interactive session with a populated graph for the current workspace + help Show this help message - parser.on("-h", "--help", "Prints this help") do - puts parser - exit - end + Run `rdx --help` for command-specific options. +TEXT - parser.on("-i", "--interactive", "Open an interactive session with a populated graph for the current workspace") do - options[:interactive] = true - end -end.parse! +def abort_with_usage(message) + warn(message) + warn("") + warn(USAGE) + exit(1) +end -require "rubydex" +# Top-level --version / --help / bare invocation, handled before command dispatch. +case ARGV.first +when "--version", "version" + require "rubydex/version" + puts "v#{Rubydex::VERSION}" + exit +when nil, "-h", "--help", "help" + puts USAGE + exit +end -def __with_timer(message, &block) - print(message) +command = ARGV.shift + +def with_timer(io, message) + io.print(message) start = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond) - block.call + yield duration = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond) - start - puts " finished in #{duration.round(2)}ms" + io.puts(" finished in #{duration.round(2)}ms") +end + +# Builds the workspace graph, sending progress messages to `progress_io`. +def build_graph(progress_io) + graph = Rubydex::Graph.new + with_timer(progress_io, "Indexing workspace...") { graph.index_workspace } + with_timer(progress_io, "Resolving graph...") { graph.resolve } + graph +end + +# Parses `--format`/`--help` for a command and returns the chosen format. +def parse_format(usage) + format = "table" + OptionParser.new do |parser| + parser.banner = usage + parser.on("--format FORMAT", ["table", "json"], "Output format (table or json)") { |value| format = value } + parser.on("-h", "--help", "Show this help") do + puts parser + exit + end + end.parse! + format end -graph = Rubydex::Graph.new -__with_timer("Indexing workspace...") { graph.index_workspace } -__with_timer("Resolving graph...") { graph.resolve } +case command +when "query" + format = parse_format("Usage: rdx query [options]") + query = ARGV.shift + abort_with_usage("`query` requires a Cypher query argument") if query.nil? || query.empty? + + require "rubydex" + + # Parse the query first so a malformed query fails fast, before the expensive workspace indexing. + parsed = begin + Rubydex::Query.parse(query) + rescue ArgumentError => e + abort(e.message) + end + + # Progress goes to stderr so stdout carries only the query result (e.g. for piping JSON). + graph = build_graph($stderr) + begin + print(parsed.render(graph, format)) + rescue ArgumentError => e + abort(e.message) + end +when "schema" + format = parse_format("Usage: rdx schema [options]") + + require "rubydex" + # The schema is static, so describe it without indexing the workspace. + print(Rubydex::Query.schema(format)) +when "console" + OptionParser.new do |parser| + parser.banner = "Usage: rdx console" + parser.on("-h", "--help", "Show this help") do + puts parser + exit + end + end.parse! + + require "rubydex" + graph = build_graph($stdout) -if options[:interactive] begin require "irb" IRB.setup(nil) @@ -48,4 +116,6 @@ if options[:interactive] rescue LoadError abort("Interactive mode requires `irb` to be in the bundle") end +else + abort_with_usage("unknown command: #{command}") end diff --git a/ext/rubydex/graph.c b/ext/rubydex/graph.c index 15912be63..5e4414e6a 100644 --- a/ext/rubydex/graph.c +++ b/ext/rubydex/graph.c @@ -9,6 +9,7 @@ #include "utils.h" static VALUE cGraph; +static VALUE cQuery; static VALUE mRubydex; static VALUE cKeyword; static VALUE cKeywordParameter; @@ -16,6 +17,9 @@ static VALUE cKeywordParameter; // Interned once in `rdxi_initialize_graph` to avoid repeated symbol-table lookups on hot completion paths. static ID id_self_receiver; +// Coerces an optional format argument (String, Symbol, or nil) to a C string; defined below. +static const char *cypher_format_cstr(VALUE format); + // Extracts the required `self_receiver:` kwarg from `opts`. Returns NULL when the value is `nil`, // which means "no self-type to walk" (e.g., empty class body where the singleton class hasn't // been created). Raises ArgumentError if the kwarg is absent, of the wrong type, or an empty @@ -750,6 +754,99 @@ static VALUE rdxr_graph_keyword(VALUE self, VALUE name) { return rb_class_new_instance(2, argv, cKeyword); } +// Rubydex::Query.schema(format = :table) -> String +// Returns a description of the queryable Cypher schema. `format` may be "table" (default) or "json". +// The schema is static, so this is a class method and does not require a graph. +static VALUE rdxr_cypher_schema(int argc, VALUE *argv, VALUE self) { + VALUE format; + rb_scan_args(argc, argv, "01", &format); + + const char *output = rdx_cypher_schema(cypher_format_cstr(format)); + VALUE result = output == NULL ? rb_utf8_str_new_cstr("") : rb_utf8_str_new_cstr(output); + if (output != NULL) { + free_c_string(output); + } + + return result; +} + +// Coerces an optional format argument (String, Symbol, or nil) to a C string, defaulting to "table". +static const char *cypher_format_cstr(VALUE format) { + if (NIL_P(format)) { + return "table"; + } + if (RB_TYPE_P(format, T_SYMBOL)) { + format = rb_sym2str(format); + } + Check_Type(format, T_STRING); + return StringValueCStr(format); +} + +// Free function for Rubydex::Query: releases the parsed query allocated by Rust. +static void query_free(void *ptr) { + if (ptr) { + rdx_cypher_query_free(ptr); + } +} + +static const rb_data_type_t query_type = { + .wrap_struct_name = "Rubydex::Query", + .function = { + .dmark = NULL, + .dfree = query_free, + .dsize = NULL, + .dcompact = NULL, + }, + .parent = NULL, + .data = NULL, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +// Rubydex::Query.parse(query) -> Rubydex::Query +// Parses a Cypher query into an opaque, reusable object, without needing a graph. Raises +// ArgumentError on a syntax error, so callers can validate a query before building a graph. +static VALUE rdxr_query_parse(VALUE klass, VALUE query) { + Check_Type(query, T_STRING); + + struct CParseResult result = rdx_cypher_parse(StringValueCStr(query)); + if (result.error != NULL) { + VALUE message = rb_utf8_str_new_cstr(result.error); + free_c_string(result.error); + rb_raise(rb_eArgError, "%s", StringValueCStr(message)); + } + + return TypedData_Wrap_Struct(klass, &query_type, result.query); +} + +// Rubydex::Query#render(graph, format = :table) -> String +// Runs this parsed query against the given graph and returns the formatted output. `format` may be +// "table" (default) or "json". Raises ArgumentError on an execution or format error. +static VALUE rdxr_query_render(int argc, VALUE *argv, VALUE self) { + VALUE graph_obj, format; + rb_scan_args(argc, argv, "11", &graph_obj, &format); + + void *query; + TypedData_Get_Struct(self, void *, &query_type, query); + + void *graph; + TypedData_Get_Struct(graph_obj, void *, &graph_type, graph); + + struct CQueryResult result = rdx_query_run(query, graph, cypher_format_cstr(format)); + + if (result.error != NULL) { + VALUE message = rb_utf8_str_new_cstr(result.error); + free_c_string(result.error); + rb_raise(rb_eArgError, "%s", StringValueCStr(message)); + } + + VALUE output = result.output == NULL ? rb_utf8_str_new_cstr("") : rb_utf8_str_new_cstr(result.output); + if (result.output != NULL) { + free_c_string(result.output); + } + + return output; +} + void rdxi_initialize_graph(VALUE moduleRubydex) { mRubydex = moduleRubydex; cGraph = rb_define_class_under(mRubydex, "Graph", rb_cObject); @@ -784,4 +881,10 @@ void rdxi_initialize_graph(VALUE moduleRubydex) { rb_define_method(cGraph, "exclude_paths", rdxr_graph_exclude_paths, 1); rb_define_method(cGraph, "excluded_paths", rdxr_graph_excluded_paths, 0); rb_define_method(cGraph, "keyword", rdxr_graph_keyword, 1); + + cQuery = rb_define_class_under(mRubydex, "Query", rb_cObject); + rb_undef_alloc_func(cQuery); + rb_define_singleton_method(cQuery, "parse", rdxr_query_parse, 1); + rb_define_singleton_method(cQuery, "schema", rdxr_cypher_schema, -1); + rb_define_method(cQuery, "render", rdxr_query_render, -1); } diff --git a/rbi/rubydex.rbi b/rbi/rubydex.rbi index dd413eed8..618d34e62 100644 --- a/rbi/rubydex.rbi +++ b/rbi/rubydex.rbi @@ -272,6 +272,19 @@ end class Rubydex::IntegrityFailure < Rubydex::Failure; end +class Rubydex::Query + class << self + sig { params(query: String).returns(Rubydex::Query) } + def parse(query); end + + sig { params(format: T.any(String, Symbol)).returns(String) } + def schema(format = :table); end + end + + sig { params(graph: Rubydex::Graph, format: T.any(String, Symbol)).returns(String) } + def render(graph, format = :table); end +end + class Rubydex::Graph IGNORED_DIRECTORIES = T.let(T.unsafe(nil), T::Array[String]) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 98fab4410..70a1189d7 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -318,6 +318,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "cypher-parser" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed0d1e561d51e651bdf70f8439da293fd0f1fe34d8431059061eadaefc7abb1" + [[package]] name = "darling" version = "0.23.0" @@ -1057,6 +1063,7 @@ dependencies = [ "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", + "cypher-parser", "glob", "libc", "line-index", diff --git a/rust/rubydex-sys/src/graph_api.rs b/rust/rubydex-sys/src/graph_api.rs index 562963f7b..99c6e8ec1 100644 --- a/rust/rubydex-sys/src/graph_api.rs +++ b/rust/rubydex-sys/src/graph_api.rs @@ -14,6 +14,7 @@ use rubydex::model::ids::{DeclarationId, NameId, UriId}; use rubydex::model::keywords; use rubydex::model::name::NameRef; use rubydex::model::visibility::Visibility; +use rubydex::query::cypher::{self, OutputFormat}; use rubydex::query::{CompletionCandidate, CompletionContext, CompletionReceiver}; use rubydex::resolution::Resolver; use rubydex::{indexing, integrity, listing, query}; @@ -977,6 +978,145 @@ pub unsafe extern "C" fn rdx_keyword_get(name: *const c_char) -> *const CKeyword } } +/// The result of running a Cypher query, carrying either the formatted output or an error message. +#[repr(C)] +pub struct CQueryResult { + /// Non-null on success; null on error. Caller must free with `free_c_string`. + pub output: *const c_char, + /// Non-null on error; null on success. Caller must free with `free_c_string`. + pub error: *const c_char, +} + +impl CQueryResult { + fn success(output: &str) -> Self { + match CString::new(output) { + Ok(c_string) => Self { + output: c_string.into_raw().cast_const(), + error: ptr::null(), + }, + Err(_) => Self::error("query output contained an interior NUL byte"), + } + } + + fn error(message: &str) -> Self { + Self { + output: ptr::null(), + error: CString::new(message).map_or(ptr::null(), |s| s.into_raw().cast_const()), + } + } +} + +/// The result of parsing a Cypher query into an opaque, reusable parsed-query object. +#[repr(C)] +pub struct CParseResult { + /// Non-null on success: a heap-allocated parsed query. Free with `rdx_cypher_query_free`. + pub query: *mut c_void, + /// Non-null on error; null on success. Caller must free with `free_c_string`. + pub error: *const c_char, +} + +/// Parses a Cypher query string into an opaque parsed-query object, without needing a graph. +/// +/// On success, `query` is a heap-allocated parsed query that can be executed against a graph with +/// `rdx_query_run` and must eventually be freed with `rdx_cypher_query_free`. On failure, `error` +/// holds the message. +/// +/// # Safety +/// +/// - `query` must be a valid, null-terminated UTF-8 string. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn rdx_cypher_parse(query: *const c_char) -> CParseResult { + let Ok(query_str) = (unsafe { utils::convert_char_ptr_to_string(query) }) else { + return CParseResult { + query: ptr::null_mut(), + error: CString::new("query is not valid UTF-8").map_or(ptr::null(), |s| s.into_raw().cast_const()), + }; + }; + + match cypher::parse(&query_str) { + Ok(parsed) => CParseResult { + query: Box::into_raw(Box::new(parsed)).cast::(), + error: ptr::null(), + }, + Err(error) => CParseResult { + query: ptr::null_mut(), + error: CString::new(error.to_string()).map_or(ptr::null(), |s| s.into_raw().cast_const()), + }, + } +} + +/// Frees a parsed query previously returned by `rdx_cypher_parse`. +/// +/// # Safety +/// +/// - `query` must be a pointer returned by `rdx_cypher_parse`, or null. It must not be used after. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn rdx_cypher_query_free(query: *mut c_void) { + if query.is_null() { + return; + } + let _ = unsafe { Box::from_raw(query.cast::()) }; +} + +/// Executes a previously parsed query (from `rdx_cypher_parse`) against the graph and returns the +/// formatted output or an error message. `format` must be `"table"` or `"json"`. +/// +/// # Safety +/// +/// - `query` must be a valid pointer returned by `rdx_cypher_parse`. +/// - `pointer` must be a valid `GraphPointer` previously returned by this crate. +/// - `format` must be a valid, null-terminated UTF-8 string. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn rdx_query_run( + query: *const c_void, + pointer: GraphPointer, + format: *const c_char, +) -> CQueryResult { + if query.is_null() { + return CQueryResult::error("query is null"); + } + + let Ok(format_str) = (unsafe { utils::convert_char_ptr_to_string(format) }) else { + return CQueryResult::error("format is not valid UTF-8"); + }; + + let output_format = match format_str.as_str() { + "table" => OutputFormat::Table, + "json" => OutputFormat::Json, + other => { + return CQueryResult::error(&format!("unknown query format `{other}` (expected `table` or `json`)")); + } + }; + + let parsed = unsafe { &*query.cast::() }; + + with_graph(pointer, |graph| { + match cypher::run_parsed(graph, parsed, output_format) { + Ok(output) => CQueryResult::success(&output), + Err(error) => CQueryResult::error(&error.to_string()), + } + }) +} + +/// Returns a description of the queryable Cypher schema (node labels, relationship types, and +/// properties) in the given format (`"table"` or `"json"`). The schema is static and requires no +/// graph. Caller must free the returned pointer with `free_c_string`. +/// +/// # Safety +/// +/// - `format` must be a valid, null-terminated UTF-8 string. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn rdx_cypher_schema(format: *const c_char) -> *const c_char { + let format_str = unsafe { utils::convert_char_ptr_to_string(format) }.unwrap_or_else(|_| "table".to_string()); + let output_format = if format_str == "json" { + OutputFormat::Json + } else { + OutputFormat::Table + }; + + CString::new(cypher::schema(output_format)).map_or(ptr::null(), |s| s.into_raw().cast_const()) +} + #[repr(u8)] #[derive(Debug, Clone, Copy)] pub enum CVisibility { diff --git a/rust/rubydex/Cargo.toml b/rust/rubydex/Cargo.toml index 2d6021eca..f1bb044bd 100644 --- a/rust/rubydex/Cargo.toml +++ b/rust/rubydex/Cargo.toml @@ -22,6 +22,7 @@ crate-type = ["rlib"] test_utils = ["dep:tempfile"] [dependencies] +cypher-parser = "0.2" ruby-prism = "1.9.0" ruby-rbs = "0.3" url = "2.5.4" diff --git a/rust/rubydex/src/main.rs b/rust/rubydex/src/main.rs index d6a4c7f87..b95b65dd2 100644 --- a/rust/rubydex/src/main.rs +++ b/rust/rubydex/src/main.rs @@ -6,6 +6,7 @@ use rubydex::{ indexing::{self, IndexerBackend}, integrity, listing, model::graph::Graph, + query::cypher::{self, OutputFormat}, resolution::Resolver, stats::{ memory::MemoryStats, @@ -52,6 +53,38 @@ struct Args { help = "Write orphan definitions report to specified file" )] report_orphans: Option, + + #[arg(long = "query", value_name = "CYPHER", help = "Run a Cypher query against the graph")] + query: Option, + + #[arg( + long = "schema", + help = "Describe the queryable Cypher schema (labels, relationships, properties) and exit" + )] + schema: bool, + + #[arg( + long = "format", + value_enum, + default_value = "table", + help = "Output format for --query and --schema results" + )] + format: Format, +} + +#[derive(Debug, Clone, Copy, ValueEnum)] +enum Format { + Table, + Json, +} + +impl From for OutputFormat { + fn from(format: Format) -> Self { + match format { + Format::Table => OutputFormat::Table, + Format::Json => OutputFormat::Json, + } + } } #[derive(Debug, Clone, ValueEnum)] @@ -88,6 +121,21 @@ fn exit(print_stats: bool) { fn main() { let args = Args::parse(); + // The Cypher schema is static, so describe it without indexing the workspace. + if args.schema { + print!("{}", cypher::schema(args.format.into())); + std::process::exit(0); + } + + // Parse the query up front, before any indexing, so a malformed query fails fast. + let parsed_query = args.query.as_ref().map(|query| match cypher::parse(query) { + Ok(parsed) => parsed, + Err(error) => { + eprintln!("{error}"); + std::process::exit(1); + } + }); + if args.stats { Timer::set_global_timer(Timer::new()); } @@ -173,6 +221,25 @@ fn main() { } } + // Cypher query: execute the query parsed earlier against the now-built graph. + if let Some(query) = &parsed_query { + match time_it!(querying, { cypher::run_parsed(&graph, query, args.format.into()) }) { + Ok(output) => print!("{output}"), + Err(error) => { + eprintln!("{error}"); + std::process::exit(1); + } + } + + if args.stats { + Timer::print_breakdown(); + MemoryStats::print_memory_usage(); + } + + mem::forget(graph); + return; + } + // Generate visualization or print statistics if args.dot { println!("{}", dot::DotBuilder::generate(&graph, args.show_builtins)); diff --git a/rust/rubydex/src/model/document.rs b/rust/rubydex/src/model/document.rs index c99daa8e9..bd3419225 100644 --- a/rust/rubydex/src/model/document.rs +++ b/rust/rubydex/src/model/document.rs @@ -84,6 +84,39 @@ impl Document { self.diagnostics.push(diagnostic); } + /// The file-system path of this document, decoded from its URI. + /// + /// Returns `None` when the URI is not a `file://` URL (e.g. the synthetic built-in document) or + /// cannot be converted to a path. Uses `Url` so percent-encoding and platform-specific paths + /// (including Windows drive paths) are handled correctly. + #[must_use] + pub fn file_path(&self) -> Option { + let url = Url::parse(&self.uri).ok()?; + if url.scheme() != "file" { + return None; + } + url.to_file_path().ok() + } + + /// The base file name of this document (the last path segment), decoded from its URI. + /// + /// Prefers the platform file path, but falls back to the last URL path segment so it still works + /// for `file://` URIs that don't convert to a local path on the current platform (e.g. a + /// drive-less path like `file:///foo.rb` on Windows). Returns `None` only when the URI has no + /// usable path segment (e.g. the synthetic built-in document). + #[must_use] + pub fn file_name(&self) -> Option { + if let Some(path) = self.file_path() + && let Some(name) = path.file_name() + { + return Some(name.to_string_lossy().into_owned()); + } + + let url = Url::parse(&self.uri).ok()?; + let segment = url.path_segments()?.rfind(|segment| !segment.is_empty())?; + Some(segment.to_string()) + } + /// Computes the require path for this document given load paths. /// /// Returns `None` if: @@ -97,12 +130,7 @@ impl Document { /// Panics if load path entries exceed u16. #[must_use] pub fn require_path(&self, load_paths: &[PathBuf]) -> Option<(String, u16)> { - let url = Url::parse(&self.uri).ok()?; - if url.scheme() != "file" { - return None; - } - - let file_path = url.to_file_path().ok()?; + let file_path = self.file_path()?; if file_path.extension().is_none_or(|ext| ext != "rb") { return None; } diff --git a/rust/rubydex/src/query.rs b/rust/rubydex/src/query.rs index 85f6f4ec9..dd005ec25 100644 --- a/rust/rubydex/src/query.rs +++ b/rust/rubydex/src/query.rs @@ -15,6 +15,8 @@ use crate::model::keywords::{self, Keyword}; use crate::model::name::NameRef; use crate::model::visibility::Visibility; +pub mod cypher; + /// Controls how declaration names are matched against the search query. #[derive(Default)] pub enum MatchMode { diff --git a/rust/rubydex/src/query/cypher.rs b/rust/rubydex/src/query/cypher.rs new file mode 100644 index 000000000..63a7c0403 --- /dev/null +++ b/rust/rubydex/src/query/cypher.rs @@ -0,0 +1,57 @@ +//! A small Cypher query engine that runs read-only queries directly against the in-memory +//! [`Graph`](crate::model::graph::Graph). +//! +//! Supported subset: +//! - `MATCH` with node patterns `(v:Label {prop: value})` — labels may be a disjunction +//! (`(v:Class|Module)` matches a node with **any** of the listed labels) — and relationship +//! patterns `-[:TYPE]->`, `<-[:TYPE]-`, `-[:TYPE]-`, including variable-length `-[:TYPE*min..max]->`. +//! - `WHERE` with `=`, `<>`, `<`, `<=`, `>`, `>=`, `CONTAINS`, `STARTS WITH`, `ENDS WITH`, +//! combined with `AND`, `OR`, `NOT`. +//! - `RETURN` with `DISTINCT`, `AS` aliases, and the aggregates `count`, `collect`, `min`, `max`, +//! `sum`, `avg`. +//! - `ORDER BY`, `SKIP`, `LIMIT`. +//! +//! See [`schema`] for the node labels and relationship types exposed to queries. + +// The whole Cypher engine — lexer, parser, AST, executor, values, and formatting — lives in the +// graph-independent `cypher-parser` crate. rubydex only provides the `GraphProvider` mapping for its +// `Graph` (in `schema`) and the static schema description (in `schema_info`). +// +// `Query` is the opaque parsed-query object: callers can `parse` a query string once (failing fast +// on syntax errors), then `run_parsed` it against a graph that was built afterwards. +pub use cypher_parser::{CypherError, OutputFormat, Query, parse}; + +pub mod schema; +pub mod schema_info; + +use crate::model::graph::Graph; + +/// Parses and executes a Cypher query against the graph, returning the formatted output. +/// +/// # Errors +/// +/// Returns a [`CypherError`] if the query cannot be parsed or executed. +pub fn run_query(graph: &Graph, query: &str, output_format: OutputFormat) -> Result { + cypher_parser::run_query(graph, query, output_format) +} + +/// Executes an already-parsed [`Query`] against the graph and formats the result. Pair with +/// [`parse`] to validate a query before building the graph. +/// +/// # Errors +/// +/// Returns a [`CypherError`] if the query cannot be executed. +pub fn run_parsed(graph: &Graph, query: &Query, output_format: OutputFormat) -> Result { + let result = cypher_parser::execute(graph, query)?; + Ok(cypher_parser::format::format(&result, output_format)) +} + +/// Returns a description of the queryable schema (node labels, relationship types, and properties) +/// in the requested format. The schema is static and does not require a graph. +#[must_use] +pub fn schema(output_format: OutputFormat) -> String { + schema_info::describe(output_format) +} + +#[cfg(test)] +mod tests; diff --git a/rust/rubydex/src/query/cypher/schema.rs b/rust/rubydex/src/query/cypher/schema.rs new file mode 100644 index 000000000..6bd485de9 --- /dev/null +++ b/rust/rubydex/src/query/cypher/schema.rs @@ -0,0 +1,565 @@ +//! Maps the rubydex [`Graph`] onto a property-graph schema for Cypher execution. +//! +//! Node labels: +//! - `Document` — a source file. +//! - `Definition` — a per-file occurrence of a Ruby construct. +//! - `Declaration` — the global, merged concept of a named entity. Declarations also carry +//! kind sub-labels (`Class`, `Module`, `SingletonClass`, `Method`, `Constant`, `ConstantAlias`, +//! `GlobalVariable`, `InstanceVariable`, `ClassVariable`) plus the grouping label `Namespace` +//! (any of `Class`/`Module`/`SingletonClass`). +//! +//! Relationship types mirror `dot.rs`: +//! - `DEFINES`: `Document` → `Definition` +//! - `DECLARES`: `Definition` → `Declaration` +//! - `CONTAINS`: `Definition` → `Definition` (lexical nesting in one file, e.g. a class written +//! inside a module; the source-level counterpart of declaration-level `OWNS`) +//! - `INHERITS`: `Declaration` → `Declaration` (superclass) +//! - `INCLUDES` / `PREPENDS` / `EXTENDS`: `Declaration` → `Declaration` (mixins) +//! - `OWNS`: `Declaration` → `Declaration` (declaration-level membership, e.g. a namespace's methods +//! and nested constants, merged across all files) +//! - `ANCESTOR`: `Declaration` → `Declaration` (linearized ancestor chain) +//! - `DESCENDANT`: `Declaration` → `Declaration` +//! - `REFERENCES`: `Document` → `Declaration` (constant references) + +use std::collections::{HashSet, VecDeque}; + +use crate::model::declaration::Declaration; +use crate::model::definitions::{Definition, Mixin}; +use crate::model::graph::Graph; +use crate::model::ids::{ConstantReferenceId, DeclarationId, DefinitionId, UriId}; + +use cypher_parser::{CypherValue, GraphProvider}; + +/// A handle to a node in the graph. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum NodeRef { + Declaration(DeclarationId), + Definition(DefinitionId), + Document(UriId), +} + +/// A relationship type. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum RelType { + /// `Document` → `Definition`: a file defines a construct occurrence. + Defines, + /// `Definition` → `Declaration`: a per-file occurrence declares the global merged entity. + Declares, + /// `Definition` → `Definition`: lexical nesting within a single file, e.g. a class written + /// inside a module. This is the source-level structure; the declaration-level (merged across + /// files) counterpart is [`RelType::Owns`]. + Contains, + /// `Declaration` → `Declaration`: direct superclass (a single hop, not the full chain). + Inherits, + /// `Declaration` → `Declaration`: an included module mixin. + Includes, + /// `Declaration` → `Declaration`: a prepended module mixin. + Prepends, + /// `Declaration` → `Declaration`: an extended module mixin. + Extends, + /// `Declaration` → `Declaration`: declaration-level membership (a namespace's methods and + /// nested constants), merged across all files. The per-file source counterpart is + /// [`RelType::Contains`]. + Owns, + /// `Declaration` → `Declaration`: an entry in the linearized ancestor chain (transitive + /// superclasses plus included/prepended modules). + Ancestor, + /// `Declaration` → `Declaration`: the reverse of [`RelType::Ancestor`]. + Descendant, + /// `Document` → `Declaration`: a constant reference in the file resolves to a declaration. + References, +} + +impl RelType { + /// Parses a relationship type name (case-insensitive). Returns `None` if unknown. + #[must_use] + pub fn parse(name: &str) -> Option { + match name.to_ascii_uppercase().as_str() { + "DEFINES" => Some(RelType::Defines), + "DECLARES" => Some(RelType::Declares), + "CONTAINS" => Some(RelType::Contains), + "INHERITS" => Some(RelType::Inherits), + "INCLUDES" => Some(RelType::Includes), + "PREPENDS" => Some(RelType::Prepends), + "EXTENDS" => Some(RelType::Extends), + "OWNS" => Some(RelType::Owns), + "ANCESTOR" => Some(RelType::Ancestor), + "DESCENDANT" => Some(RelType::Descendant), + "REFERENCES" => Some(RelType::References), + _ => None, + } + } + + /// All relationship types, used when a pattern leaves the type unspecified. + #[must_use] + pub fn all() -> &'static [RelType] { + &[ + RelType::Defines, + RelType::Declares, + RelType::Contains, + RelType::Inherits, + RelType::Includes, + RelType::Prepends, + RelType::Extends, + RelType::Owns, + RelType::Ancestor, + RelType::Descendant, + RelType::References, + ] + } + + /// The canonical uppercase name of this relationship type. + #[must_use] + pub fn name(self) -> &'static str { + match self { + RelType::Defines => "DEFINES", + RelType::Declares => "DECLARES", + RelType::Contains => "CONTAINS", + RelType::Inherits => "INHERITS", + RelType::Includes => "INCLUDES", + RelType::Prepends => "PREPENDS", + RelType::Extends => "EXTENDS", + RelType::Owns => "OWNS", + RelType::Ancestor => "ANCESTOR", + RelType::Descendant => "DESCENDANT", + RelType::References => "REFERENCES", + } + } +} + +/// Exposes the rubydex [`Graph`] to the `cypher-parser` executor as a property graph. This is the +/// rubydex-specific mapping; the executor itself is generic over this trait. +impl GraphProvider for Graph { + type NodeId = NodeRef; + + fn scan(&self, labels: &[String]) -> Vec { + scan(self, labels) + } + + fn matches_label(&self, node: NodeRef, label: &str) -> bool { + matches_label(self, node, label) + } + + fn relationship_types(&self) -> Vec { + RelType::all().iter().map(|rel| rel.name().to_string()).collect() + } + + fn expand(&self, node: NodeRef, rel_type: &str) -> Vec { + RelType::parse(rel_type).map_or_else(Vec::new, |rel| expand_out(self, node, rel)) + } + + fn rel_sources(&self, rel_type: &str) -> Vec { + RelType::parse(rel_type).map_or_else(Vec::new, |rel| rel_source_nodes(self, rel)) + } + + fn property(&self, node: NodeRef, prop: &str) -> CypherValue { + property(self, node, prop) + } + + fn label(&self, node: NodeRef) -> String { + node_label(self, node) + } + + fn name(&self, node: NodeRef) -> String { + node_name(self, node) + } +} + +/// Returns all nodes matching the given labels. An empty slice matches every node; otherwise a node +/// is returned if it matches **any** of the labels (label disjunction, e.g. `(:Class|Module)`). +#[must_use] +pub fn scan(graph: &Graph, labels: &[String]) -> Vec { + if labels.is_empty() { + let mut nodes = Vec::new(); + nodes.extend(graph.documents().keys().map(|id| NodeRef::Document(*id))); + nodes.extend(graph.definitions().keys().map(|id| NodeRef::Definition(*id))); + nodes.extend(graph.declarations().keys().map(|id| NodeRef::Declaration(*id))); + return nodes; + } + + let mut seen = HashSet::new(); + let mut nodes = Vec::new(); + for label in labels { + for node in scan_label(graph, label) { + if seen.insert(node) { + nodes.push(node); + } + } + } + nodes +} + +/// Returns all nodes matching a single label. +fn scan_label(graph: &Graph, label: &str) -> Vec { + match label { + "Document" => graph.documents().keys().map(|id| NodeRef::Document(*id)).collect(), + "Definition" => graph.definitions().keys().map(|id| NodeRef::Definition(*id)).collect(), + other => graph + .declarations() + .iter() + .filter(|(_, declaration)| declaration_matches_label(declaration, other)) + .map(|(id, _)| NodeRef::Declaration(*id)) + .collect(), + } +} + +/// Returns whether a node matches a single label. +#[must_use] +pub fn matches_label(graph: &Graph, node: NodeRef, label: &str) -> bool { + match node { + NodeRef::Document(_) => label == "Document", + NodeRef::Definition(_) => label == "Definition", + NodeRef::Declaration(id) => graph + .declarations() + .get(&id) + .is_some_and(|declaration| declaration_matches_label(declaration, label)), + } +} + +fn declaration_matches_label(declaration: &Declaration, label: &str) -> bool { + match label { + "Declaration" => true, + "Namespace" => declaration.as_namespace().is_some(), + other => declaration.kind() == other, + } +} + +/// Returns the top-level label name of a node, used for display and JSON output. +#[must_use] +pub fn node_label(graph: &Graph, node: NodeRef) -> String { + match node { + NodeRef::Document(_) => "Document".to_string(), + NodeRef::Definition(id) => graph + .definitions() + .get(&id) + .map_or_else(|| "Definition".to_string(), |definition| definition.kind().to_string()), + NodeRef::Declaration(id) => graph.declarations().get(&id).map_or_else( + || "Declaration".to_string(), + |declaration| declaration.kind().to_string(), + ), + } +} + +/// The primary display name of a node (FQN for declarations, URI basename for documents). +#[must_use] +pub fn node_name(graph: &Graph, node: NodeRef) -> String { + match node { + NodeRef::Declaration(id) => graph + .declarations() + .get(&id) + .map_or_else(String::new, |declaration| declaration.name().to_string()), + NodeRef::Definition(id) => graph + .definitions() + .get(&id) + .and_then(|definition| graph.definition_to_declaration_id(definition)) + .and_then(|decl_id| graph.declarations().get(decl_id)) + .map_or_else(String::new, |declaration| declaration.name().to_string()), + NodeRef::Document(id) => graph.documents().get(&id).map_or_else(String::new, |document| { + document.file_name().unwrap_or_else(|| document.uri().to_string()) + }), + } +} + +/// Resolves a node property to a value, where `prop` is the property name read off the node (the +/// `x` in `RETURN n.x` / `WHERE n.x = ...`). Unknown properties yield `NULL`. +#[must_use] +pub fn property(graph: &Graph, node: NodeRef, prop: &str) -> CypherValue { + match prop { + "label" | "kind" => CypherValue::Str(node_label(graph, node)), + _ => match node { + NodeRef::Declaration(id) => declaration_property(graph, id, prop), + NodeRef::Definition(id) => definition_property(graph, id, prop), + NodeRef::Document(id) => document_property(graph, id, prop), + }, + } +} + +fn declaration_property(graph: &Graph, id: DeclarationId, prop: &str) -> CypherValue { + let Some(declaration) = graph.declarations().get(&id) else { + return CypherValue::Null; + }; + + match prop { + "name" => CypherValue::Str(declaration.name().to_string()), + "unqualified_name" => CypherValue::Str(declaration.unqualified_name()), + "visibility" => graph + .visibility(&id) + .map_or(CypherValue::Null, |visibility| CypherValue::Str(visibility.to_string())), + "definition_count" => CypherValue::Int(i64::try_from(declaration.definitions().len()).unwrap_or(i64::MAX)), + _ => CypherValue::Null, + } +} + +fn definition_property(graph: &Graph, id: DefinitionId, prop: &str) -> CypherValue { + let Some(definition) = graph.definitions().get(&id) else { + return CypherValue::Null; + }; + + match prop { + "name" => CypherValue::Str(node_name(graph, NodeRef::Definition(id))), + "file" => graph + .documents() + .get(definition.uri_id()) + .map_or(CypherValue::Null, |document| { + CypherValue::Str(document.uri().to_string()) + }), + "line" => graph + .documents() + .get(definition.uri_id()) + .map_or(CypherValue::Null, |document| { + let location = definition.offset().to_location(document).to_presentation(); + CypherValue::Int(i64::from(location.start_line())) + }), + _ => CypherValue::Null, + } +} + +fn document_property(graph: &Graph, id: UriId, prop: &str) -> CypherValue { + let Some(document) = graph.documents().get(&id) else { + return CypherValue::Null; + }; + + // Non-`file://` URIs (the synthetic built-in document) have no file path, so `path`/`name` fall + // back to the raw URI. + match prop { + // Full document URI, e.g. `file:///app/models/user.rb`. + "uri" => CypherValue::Str(document.uri().to_string()), + // File-system path, e.g. `/app/models/user.rb`. + "path" => CypherValue::Str(document.file_path().map_or_else( + || document.uri().to_string(), + |path| path.to_string_lossy().into_owned(), + )), + // Base file name, e.g. `user.rb`. + "name" => CypherValue::Str(document.file_name().unwrap_or_else(|| document.uri().to_string())), + _ => CypherValue::Null, + } +} + +/// Returns the candidate source nodes for a relationship type, used to build reverse adjacency. +#[must_use] +pub fn rel_source_nodes(graph: &Graph, rel: RelType) -> Vec { + match rel { + RelType::Defines | RelType::References => graph.documents().keys().map(|id| NodeRef::Document(*id)).collect(), + RelType::Declares | RelType::Contains => { + graph.definitions().keys().map(|id| NodeRef::Definition(*id)).collect() + } + RelType::Inherits + | RelType::Includes + | RelType::Prepends + | RelType::Extends + | RelType::Owns + | RelType::Ancestor + | RelType::Descendant => graph + .declarations() + .keys() + .map(|id| NodeRef::Declaration(*id)) + .collect(), + } +} + +/// Expands the outgoing edges of `node` for the given relationship type. +#[must_use] +pub fn expand_out(graph: &Graph, node: NodeRef, rel: RelType) -> Vec { + match (node, rel) { + (NodeRef::Document(uri_id), RelType::Defines) => graph + .documents() + .get(&uri_id) + .map(|document| { + document + .definitions() + .iter() + .map(|id| NodeRef::Definition(*id)) + .collect() + }) + .unwrap_or_default(), + (NodeRef::Document(uri_id), RelType::References) => document_references(graph, uri_id), + (NodeRef::Definition(def_id), RelType::Declares) => graph + .definitions() + .get(&def_id) + .and_then(|definition| graph.definition_to_declaration_id(definition)) + .map(|decl_id| vec![NodeRef::Declaration(*decl_id)]) + .unwrap_or_default(), + (NodeRef::Definition(def_id), RelType::Contains) => definition_children(graph, def_id), + (NodeRef::Declaration(decl_id), RelType::Inherits) => superclasses(graph, decl_id), + (NodeRef::Declaration(decl_id), RelType::Includes) => mixin_targets(graph, decl_id, MixinKind::Include), + (NodeRef::Declaration(decl_id), RelType::Prepends) => mixin_targets(graph, decl_id, MixinKind::Prepend), + (NodeRef::Declaration(decl_id), RelType::Extends) => mixin_targets(graph, decl_id, MixinKind::Extend), + (NodeRef::Declaration(decl_id), RelType::Owns) => members(graph, decl_id), + (NodeRef::Declaration(decl_id), RelType::Ancestor) => ancestors(graph, decl_id), + (NodeRef::Declaration(decl_id), RelType::Descendant) => descendants(graph, decl_id), + _ => Vec::new(), + } +} + +fn document_references(graph: &Graph, uri_id: UriId) -> Vec { + let Some(document) = graph.documents().get(&uri_id) else { + return Vec::new(); + }; + + let mut seen = HashSet::new(); + let mut targets = Vec::new(); + for ref_id in document.constant_references() { + if let Some(decl_id) = resolve_ref(graph, *ref_id) + && seen.insert(decl_id) + { + targets.push(NodeRef::Declaration(decl_id)); + } + } + targets +} + +fn definition_children(graph: &Graph, def_id: DefinitionId) -> Vec { + let Some(definition) = graph.definitions().get(&def_id) else { + return Vec::new(); + }; + + let children: &[DefinitionId] = match definition { + Definition::Class(d) => d.members(), + Definition::Module(d) => d.members(), + Definition::SingletonClass(d) => d.members(), + _ => &[], + }; + children.iter().map(|id| NodeRef::Definition(*id)).collect() +} + +fn superclasses(graph: &Graph, decl_id: DeclarationId) -> Vec { + let Some(declaration) = graph.declarations().get(&decl_id) else { + return Vec::new(); + }; + + let mut seen = HashSet::new(); + let mut targets = Vec::new(); + for definition_id in declaration.definitions() { + if let Some(Definition::Class(class_def)) = graph.definitions().get(definition_id) + && let Some(superclass_ref) = class_def.superclass_ref() + && let Some(target) = resolve_ref_to_namespace(graph, *superclass_ref) + && seen.insert(target) + { + targets.push(NodeRef::Declaration(target)); + } + } + targets +} + +#[derive(Clone, Copy)] +enum MixinKind { + Include, + Prepend, + Extend, +} + +fn mixin_targets(graph: &Graph, decl_id: DeclarationId, kind: MixinKind) -> Vec { + let Some(declaration) = graph.declarations().get(&decl_id) else { + return Vec::new(); + }; + + let mut seen = HashSet::new(); + let mut targets = Vec::new(); + for definition_id in declaration.definitions() { + let mixins: &[Mixin] = match graph.definitions().get(definition_id) { + Some(Definition::Class(d)) => d.mixins(), + Some(Definition::Module(d)) => d.mixins(), + Some(Definition::SingletonClass(d)) => d.mixins(), + _ => &[], + }; + + for mixin in mixins { + let matches = matches!( + (kind, mixin), + (MixinKind::Include, Mixin::Include(_)) + | (MixinKind::Prepend, Mixin::Prepend(_)) + | (MixinKind::Extend, Mixin::Extend(_)) + ); + if matches + && let Some(target) = resolve_ref_to_namespace(graph, *mixin.constant_reference_id()) + && seen.insert(target) + { + targets.push(NodeRef::Declaration(target)); + } + } + } + targets +} + +fn members(graph: &Graph, decl_id: DeclarationId) -> Vec { + graph + .declarations() + .get(&decl_id) + .and_then(Declaration::as_namespace) + .map(|namespace| { + namespace + .members() + .values() + .map(|id| NodeRef::Declaration(*id)) + .collect() + }) + .unwrap_or_default() +} + +fn ancestors(graph: &Graph, decl_id: DeclarationId) -> Vec { + use crate::model::declaration::Ancestor; + + graph + .declarations() + .get(&decl_id) + .and_then(Declaration::as_namespace) + .map(|namespace| { + namespace + .ancestors() + .iter() + .filter_map(|ancestor| match ancestor { + Ancestor::Complete(id) if *id != decl_id => Some(NodeRef::Declaration(*id)), + _ => None, + }) + .collect() + }) + .unwrap_or_default() +} + +fn descendants(graph: &Graph, decl_id: DeclarationId) -> Vec { + graph + .declarations() + .get(&decl_id) + .and_then(Declaration::as_namespace) + .map(|namespace| { + namespace + .descendants() + .iter() + .map(|id| NodeRef::Declaration(*id)) + .collect() + }) + .unwrap_or_default() +} + +/// Resolves a constant reference to the declaration of the name it points to. +fn resolve_ref(graph: &Graph, ref_id: ConstantReferenceId) -> Option { + let constant_ref = graph.constant_references().get(&ref_id)?; + graph.name_id_to_declaration_id(*constant_ref.name_id()).copied() +} + +/// Resolves a constant reference to a namespace declaration, following constant aliases. +fn resolve_ref_to_namespace(graph: &Graph, ref_id: ConstantReferenceId) -> Option { + resolve_to_namespace(graph, resolve_ref(graph, ref_id)?) +} + +/// Walks constant-alias chains until reaching a namespace declaration. +fn resolve_to_namespace(graph: &Graph, declaration_id: DeclarationId) -> Option { + let mut queue = VecDeque::from([declaration_id]); + let mut seen = HashSet::new(); + + while let Some(current_id) = queue.pop_front() { + if !seen.insert(current_id) { + continue; + } + + match graph.declarations().get(¤t_id)? { + Declaration::Namespace(_) => return Some(current_id), + Declaration::ConstantAlias(_) => { + queue.extend(graph.alias_targets(¤t_id)?); + } + _ => {} + } + } + + None +} diff --git a/rust/rubydex/src/query/cypher/schema_info.rs b/rust/rubydex/src/query/cypher/schema_info.rs new file mode 100644 index 000000000..cb0a356bd --- /dev/null +++ b/rust/rubydex/src/query/cypher/schema_info.rs @@ -0,0 +1,374 @@ +//! Static, self-describing catalog of the Cypher property-graph model: the node labels, +//! relationship types, and node properties that queries can use. This mirrors the mapping +//! implemented in [`super::schema`] and is exposed via `--schema` for discoverability. + +use cypher_parser::OutputFormat; +use cypher_parser::value::write_json_string; + +/// A node label and what graph entity it matches. +struct LabelInfo { + label: &'static str, + matches: &'static str, + description: &'static str, +} + +/// A relationship type and its endpoints. +struct RelInfo { + name: &'static str, + from: &'static str, + to: &'static str, + description: &'static str, +} + +/// A property exposed on a node type. +struct PropInfo { + node_type: &'static str, + property: &'static str, + description: &'static str, +} + +const LABELS: &[LabelInfo] = &[ + LabelInfo { + label: "Document", + matches: "source files", + description: "A source file in the workspace", + }, + LabelInfo { + label: "Definition", + matches: "per-file occurrences", + description: "A single occurrence of a Ruby construct in one file", + }, + LabelInfo { + label: "Declaration", + matches: "merged entities", + description: "The global, merged concept of a named entity", + }, + LabelInfo { + label: "Namespace", + matches: "Class | Module | SingletonClass declarations", + description: "Grouping label for namespace-like declarations", + }, + LabelInfo { + label: "Class", + matches: "declarations of kind Class", + description: "A class declaration", + }, + LabelInfo { + label: "Module", + matches: "declarations of kind Module", + description: "A module declaration", + }, + LabelInfo { + label: "SingletonClass", + matches: "declarations of kind SingletonClass", + description: "A singleton class declaration", + }, + LabelInfo { + label: "Method", + matches: "declarations of kind Method", + description: "A method declaration", + }, + LabelInfo { + label: "Constant", + matches: "declarations of kind Constant", + description: "A constant declaration", + }, + LabelInfo { + label: "ConstantAlias", + matches: "declarations of kind ConstantAlias", + description: "A constant alias declaration", + }, + LabelInfo { + label: "GlobalVariable", + matches: "declarations of kind GlobalVariable", + description: "A global variable declaration", + }, + LabelInfo { + label: "InstanceVariable", + matches: "declarations of kind InstanceVariable", + description: "An instance variable declaration", + }, + LabelInfo { + label: "ClassVariable", + matches: "declarations of kind ClassVariable", + description: "A class variable declaration", + }, +]; + +const RELATIONSHIPS: &[RelInfo] = &[ + RelInfo { + name: "DEFINES", + from: "Document", + to: "Definition", + description: "A file defines a construct occurrence", + }, + RelInfo { + name: "DECLARES", + from: "Definition", + to: "Declaration", + description: "An occurrence contributes to a declaration", + }, + RelInfo { + name: "CONTAINS", + from: "Definition", + to: "Definition", + description: "Lexical nesting of definitions", + }, + RelInfo { + name: "INHERITS", + from: "Class", + to: "Class", + description: "Superclass relationship", + }, + RelInfo { + name: "INCLUDES", + from: "Declaration", + to: "Declaration", + description: "`include` mixin", + }, + RelInfo { + name: "PREPENDS", + from: "Declaration", + to: "Declaration", + description: "`prepend` mixin", + }, + RelInfo { + name: "EXTENDS", + from: "Declaration", + to: "Declaration", + description: "`extend` mixin", + }, + RelInfo { + name: "OWNS", + from: "Declaration", + to: "Declaration", + description: "A namespace owns a member declaration", + }, + RelInfo { + name: "ANCESTOR", + from: "Declaration", + to: "Declaration", + description: "An entry in the linearized ancestor chain", + }, + RelInfo { + name: "DESCENDANT", + from: "Declaration", + to: "Declaration", + description: "A declaration that descends from this one", + }, + RelInfo { + name: "REFERENCES", + from: "Document", + to: "Declaration", + description: "A file references a constant declaration", + }, +]; + +const PROPERTIES: &[PropInfo] = &[ + PropInfo { + node_type: "(any)", + property: "label", + description: "The node's top-level label / kind", + }, + PropInfo { + node_type: "(any)", + property: "kind", + description: "Alias of `label`", + }, + PropInfo { + node_type: "Declaration", + property: "name", + description: "Fully qualified name", + }, + PropInfo { + node_type: "Declaration", + property: "unqualified_name", + description: "Name without its namespace prefix", + }, + PropInfo { + node_type: "Declaration", + property: "visibility", + description: "public / protected / private (when applicable)", + }, + PropInfo { + node_type: "Declaration", + property: "definition_count", + description: "Number of definitions that compose the declaration", + }, + PropInfo { + node_type: "Definition", + property: "name", + description: "Name of the declaration this definition contributes to", + }, + PropInfo { + node_type: "Definition", + property: "file", + description: "URI of the file containing the definition", + }, + PropInfo { + node_type: "Definition", + property: "line", + description: "1-indexed start line of the definition", + }, + PropInfo { + node_type: "Document", + property: "uri", + description: "Full document URI", + }, + PropInfo { + node_type: "Document", + property: "path", + description: "File system path of the document", + }, + PropInfo { + node_type: "Document", + property: "name", + description: "Base file name of the document", + }, +]; + +/// Renders the schema catalog in the requested format. +#[must_use] +pub fn describe(format: OutputFormat) -> String { + match format { + OutputFormat::Table => render_table(), + OutputFormat::Json => render_json(), + } +} + +fn render_table() -> String { + let mut out = String::new(); + + out.push_str("Node labels\n"); + let label_rows: Vec<[&str; 3]> = LABELS.iter().map(|l| [l.label, l.matches, l.description]).collect(); + push_table(&mut out, &["Label", "Matches", "Description"], &label_rows); + + out.push_str("\nRelationship types\n"); + let rel_rows: Vec<[&str; 4]> = RELATIONSHIPS + .iter() + .map(|r| [r.name, r.from, r.to, r.description]) + .collect(); + push_table(&mut out, &["Type", "From", "To", "Description"], &rel_rows); + + out.push_str("\nProperties\n"); + let prop_rows: Vec<[&str; 3]> = PROPERTIES + .iter() + .map(|p| [p.node_type, p.property, p.description]) + .collect(); + push_table(&mut out, &["Node type", "Property", "Description"], &prop_rows); + + out +} + +/// Renders a single aligned table section. `N` is the column count. +fn push_table(out: &mut String, headers: &[&str; N], rows: &[[&str; N]]) { + let mut widths: [usize; N] = std::array::from_fn(|i| headers[i].chars().count()); + for row in rows { + for (index, cell) in row.iter().enumerate() { + widths[index] = widths[index].max(cell.chars().count()); + } + } + + push_table_row(out, headers, &widths); + for (index, width) in widths.iter().enumerate() { + if index > 0 { + out.push_str("-+-"); + } + for _ in 0..*width { + out.push('-'); + } + } + out.push('\n'); + for row in rows { + push_table_row(out, row, &widths); + } +} + +fn push_table_row(out: &mut String, cells: &[&str; N], widths: &[usize; N]) { + for (index, width) in widths.iter().enumerate() { + if index > 0 { + out.push_str(" | "); + } + let cell = cells[index]; + out.push_str(cell); + for _ in 0..width.saturating_sub(cell.chars().count()) { + out.push(' '); + } + } + out.push('\n'); +} + +fn render_json() -> String { + let mut out = String::from("{\"node_labels\":["); + for (index, label) in LABELS.iter().enumerate() { + if index > 0 { + out.push(','); + } + out.push_str("{\"label\":"); + write_json_string(&mut out, label.label); + out.push_str(",\"matches\":"); + write_json_string(&mut out, label.matches); + out.push_str(",\"description\":"); + write_json_string(&mut out, label.description); + out.push('}'); + } + + out.push_str("],\"relationships\":["); + for (index, rel) in RELATIONSHIPS.iter().enumerate() { + if index > 0 { + out.push(','); + } + out.push_str("{\"type\":"); + write_json_string(&mut out, rel.name); + out.push_str(",\"from\":"); + write_json_string(&mut out, rel.from); + out.push_str(",\"to\":"); + write_json_string(&mut out, rel.to); + out.push_str(",\"description\":"); + write_json_string(&mut out, rel.description); + out.push('}'); + } + + out.push_str("],\"properties\":["); + for (index, prop) in PROPERTIES.iter().enumerate() { + if index > 0 { + out.push(','); + } + out.push_str("{\"node_type\":"); + write_json_string(&mut out, prop.node_type); + out.push_str(",\"property\":"); + write_json_string(&mut out, prop.property); + out.push_str(",\"description\":"); + write_json_string(&mut out, prop.description); + out.push('}'); + } + + out.push_str("]}"); + out +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn table_lists_labels_relationships_and_properties() { + let output = describe(OutputFormat::Table); + assert!(output.contains("Node labels")); + assert!(output.contains("Relationship types")); + assert!(output.contains("Properties")); + assert!(output.contains("Namespace")); + assert!(output.contains("INHERITS")); + assert!(output.contains("unqualified_name")); + } + + #[test] + fn json_is_well_formed_object() { + let output = describe(OutputFormat::Json); + assert!(output.starts_with("{\"node_labels\":[")); + assert!(output.contains("\"relationships\":[")); + assert!(output.contains("\"properties\":[")); + assert!(output.contains("\"type\":\"DEFINES\"")); + assert!(output.ends_with("]}")); + } +} diff --git a/rust/rubydex/src/query/cypher/tests.rs b/rust/rubydex/src/query/cypher/tests.rs new file mode 100644 index 000000000..5b0dfce9f --- /dev/null +++ b/rust/rubydex/src/query/cypher/tests.rs @@ -0,0 +1,213 @@ +use super::run_query; +use crate::model::graph::Graph; +use crate::test_utils::GraphTest; +use cypher_parser::{CypherValue, OutputFormat, ResultSet, execute, parse}; + +// Parser-only tests live in the `cypher-parser` crate. These exercise the executor and the +// end-to-end query/format path against a real graph. + +fn fixture_graph() -> Graph { + let mut context = GraphTest::new(); + context.index_uri( + "file:///zoo.rb", + " + module Walkable + end + + class Animal + def speak; end + end + + class Dog < Animal + include Walkable + end + + class Cat < Animal + end + ", + ); + context.resolve(); + context.into_graph() +} + +fn run(graph: &Graph, query: &str) -> ResultSet { + let parsed = parse(query).unwrap(); + execute(graph, &parsed).unwrap() +} + +fn column_strings(result: &ResultSet, column: usize) -> Vec { + let mut values: Vec = result.rows.iter().map(|row| row[column].to_display_string()).collect(); + values.sort(); + values +} + +#[test] +fn scans_declarations_by_label_and_property() { + let graph = fixture_graph(); + let result = run(&graph, "MATCH (c:Class {name: 'Dog'}) RETURN c.name"); + assert_eq!(result.columns, vec!["c.name".to_string()]); + assert_eq!(column_strings(&result, 0), vec!["Dog".to_string()]); +} + +#[test] +fn scans_label_disjunction() { + let graph = fixture_graph(); + let result = run( + &graph, + "MATCH (n:Class|Module) WHERE n.name = 'Animal' OR n.name = 'Walkable' RETURN n.name, n.kind", + ); + let names = column_strings(&result, 0); + assert_eq!(names, vec!["Animal".to_string(), "Walkable".to_string()]); +} + +#[test] +fn follows_inherits_relationship() { + let graph = fixture_graph(); + let result = run( + &graph, + "MATCH (c:Class)-[:INHERITS]->(p:Class) WHERE c.name = 'Dog' RETURN p.name", + ); + assert_eq!(column_strings(&result, 0), vec!["Animal".to_string()]); +} + +#[test] +fn follows_incoming_relationship() { + let graph = fixture_graph(); + let result = run( + &graph, + "MATCH (p:Class)<-[:INHERITS]-(c:Class) WHERE p.name = 'Animal' RETURN c.name", + ); + assert_eq!(column_strings(&result, 0), vec!["Cat".to_string(), "Dog".to_string()]); +} + +#[test] +fn follows_includes_relationship() { + let graph = fixture_graph(); + let result = run( + &graph, + "MATCH (c:Class)-[:INCLUDES]->(m) WHERE c.name = 'Dog' RETURN m.name", + ); + assert_eq!(column_strings(&result, 0), vec!["Walkable".to_string()]); +} + +#[test] +fn follows_owns_to_method() { + let graph = fixture_graph(); + let result = run( + &graph, + "MATCH (c:Class)-[:OWNS]->(m:Method) WHERE c.name = 'Animal' RETURN m.unqualified_name", + ); + assert!(column_strings(&result, 0).iter().any(|name| name.contains("speak"))); +} + +#[test] +fn variable_length_ancestor_chain() { + let graph = fixture_graph(); + let result = run( + &graph, + "MATCH (c:Class)-[:ANCESTOR]->(a) WHERE c.name = 'Dog' RETURN a.name", + ); + let ancestors = column_strings(&result, 0); + assert!(ancestors.contains(&"Animal".to_string())); + assert!(ancestors.contains(&"Walkable".to_string())); + assert!(ancestors.contains(&"Object".to_string())); +} + +#[test] +fn traverses_document_to_declaration() { + let graph = fixture_graph(); + let result = run( + &graph, + "MATCH (d:Document)-[:DEFINES]->(def:Definition)-[:DECLARES]->(decl) WHERE decl.name = 'Dog' RETURN decl.name", + ); + assert_eq!(column_strings(&result, 0), vec!["Dog".to_string()]); +} + +#[test] +fn aggregation_counts_subclasses() { + let graph = fixture_graph(); + let result = run( + &graph, + "MATCH (c:Class)-[:INHERITS]->(p:Class) WHERE p.name = 'Animal' RETURN p.name, count(c) AS subclasses", + ); + assert_eq!(result.rows.len(), 1); + assert_eq!(result.rows[0][0], CypherValue::Str("Animal".into())); + assert_eq!(result.rows[0][1], CypherValue::Int(2)); +} + +#[test] +fn distinct_and_order_and_limit() { + let graph = fixture_graph(); + let result = run( + &graph, + "MATCH (c:Class)-[:INHERITS]->(p:Class) RETURN DISTINCT p.name ORDER BY p.name LIMIT 1", + ); + assert_eq!(result.rows.len(), 1); + assert_eq!(result.rows[0][0], CypherValue::Str("Animal".into())); +} + +#[test] +fn where_with_boolean_operators() { + let graph = fixture_graph(); + let result = run( + &graph, + "MATCH (c:Class) WHERE c.name = 'Dog' OR c.name = 'Cat' RETURN c.name", + ); + assert_eq!(column_strings(&result, 0), vec!["Cat".to_string(), "Dog".to_string()]); +} + +#[test] +fn run_query_table_output() { + let graph = fixture_graph(); + let output = run_query( + &graph, + "MATCH (c:Class {name: 'Dog'}) RETURN c.name", + OutputFormat::Table, + ) + .unwrap(); + assert!(output.contains("c.name")); + assert!(output.contains("Dog")); + assert!(output.contains("1 row")); +} + +#[test] +fn run_query_json_output() { + let graph = fixture_graph(); + let output = run_query( + &graph, + "MATCH (c:Class {name: 'Dog'}) RETURN c.name", + OutputFormat::Json, + ) + .unwrap(); + assert_eq!(output, "[{\"c.name\":\"Dog\"}]"); +} + +#[test] +fn unknown_relationship_type_errors() { + let graph = fixture_graph(); + let parsed = parse("MATCH (a)-[:BOGUS]->(b) RETURN a").unwrap(); + assert!(execute(&graph, &parsed).is_err()); +} + +#[test] +fn document_uri_path_and_name_are_distinct() { + let graph = fixture_graph(); + let result = run( + &graph, + "MATCH (d:Document) WHERE d.uri = 'file:///zoo.rb' RETURN d.uri, d.path, d.name", + ); + assert_eq!( + result.columns, + vec!["d.uri".to_string(), "d.path".to_string(), "d.name".to_string()] + ); + // `uri` is the full URI and `name` is the basename on every platform. + assert_eq!(column_strings(&result, 0), vec!["file:///zoo.rb".to_string()]); + assert_eq!(column_strings(&result, 2), vec!["zoo.rb".to_string()]); + + // `path` is the decoded file-system path. A drive-less `file://` URI has no valid Windows path, + // so there it falls back to the raw URI; on Unix it decodes to `/zoo.rb`. + #[cfg(not(windows))] + assert_eq!(column_strings(&result, 1), vec!["/zoo.rb".to_string()]); + #[cfg(windows)] + assert_eq!(column_strings(&result, 1), vec!["file:///zoo.rb".to_string()]); +} diff --git a/rust/rubydex/tests/cli.rs b/rust/rubydex/tests/cli.rs index 5c5a4cb59..7227dea35 100644 --- a/rust/rubydex/tests/cli.rs +++ b/rust/rubydex/tests/cli.rs @@ -89,6 +89,74 @@ fn dot_flag() { }); } +#[test] +fn query_flag_table_output() { + with_context(|context| { + context.write("zoo.rb", "class Animal\nend\n\nclass Dog < Animal\nend\n"); + + rdx(&[ + context.absolute_path().to_str().unwrap(), + "--query", + "MATCH (c:Class)-[:INHERITS]->(p:Class) WHERE c.name = 'Dog' RETURN p.name", + ]) + .success() + .stdout(predicate::str::contains("p.name")) + .stdout(predicate::str::contains("Animal")) + .stdout(predicate::str::contains("1 row")); + }); +} + +#[test] +fn query_flag_json_output() { + with_context(|context| { + context.write("zoo.rb", "class Animal\nend\n\nclass Dog < Animal\nend\n"); + + rdx(&[ + context.absolute_path().to_str().unwrap(), + "--query", + "MATCH (c:Class {name: 'Dog'}) RETURN c.name", + "--format", + "json", + ]) + .success() + .stdout(predicate::str::contains("[{\"c.name\":\"Dog\"}]")); + }); +} + +#[test] +fn schema_flag_describes_model() { + rdx(&["--schema"]) + .success() + .stdout(predicate::str::contains("Node labels")) + .stdout(predicate::str::contains("Relationship types")) + .stdout(predicate::str::contains("Properties")) + .stdout(predicate::str::contains("INHERITS")) + .stdout(predicate::str::contains("unqualified_name")); +} + +#[test] +fn schema_flag_json_format() { + rdx(&["--schema", "--format", "json"]) + .success() + .stdout(predicate::str::contains("\"node_labels\":[")) + .stdout(predicate::str::contains("\"type\":\"DEFINES\"")); +} + +#[test] +fn query_flag_reports_syntax_error() { + with_context(|context| { + context.write("zoo.rb", "class Animal\nend\n"); + + rdx(&[ + context.absolute_path().to_str().unwrap(), + "--query", + "MATCH (c RETURN c", + ]) + .failure() + .stderr(predicate::str::contains("Cypher syntax error")); + }); +} + #[test] fn stop_after() { with_context(|context| { diff --git a/test/graph_test.rb b/test/graph_test.rb index 5a73a5958..b684c4a39 100644 --- a/test/graph_test.rb +++ b/test/graph_test.rb @@ -2,6 +2,7 @@ require "test_helper" require "helpers/context" +require "json" class GraphTest < Minitest::Test include Test::Helpers::WithContext @@ -1440,6 +1441,126 @@ def test_document_returns_correct_document_with_multiple_documents assert_equal("file:///bar.rb", document.uri) end + def test_cypher_schema_table + output = Rubydex::Query.schema + + assert_match(/Node labels/, output) + assert_match(/Relationship types/, output) + assert_match(/Properties/, output) + assert_match(/INHERITS/, output) + assert_match(/unqualified_name/, output) + end + + def test_cypher_schema_json + output = Rubydex::Query.schema(:json) + + parsed = JSON.parse(output) + assert_equal(["node_labels", "relationships", "properties"], parsed.keys) + assert(parsed["relationships"].any? { |r| r["type"] == "DEFINES" }) + end + + def test_parsed_query_runs_against_graph + with_context do |context| + context.write!("zoo.rb", "class Animal; end\nclass Dog < Animal; end\n") + + query = Rubydex::Query.parse("MATCH (c:Class)-[:INHERITS]->(p:Class) WHERE c.name = 'Dog' RETURN p.name") + assert_instance_of(Rubydex::Query, query) + + graph = Rubydex::Graph.new + graph.index_all(context.glob("**/*.rb")) + graph.resolve + + assert_equal("[{\"p.name\":\"Animal\"}]", query.render(graph, :json)) + end + end + + def test_parse_raises_on_syntax_error + error = assert_raises(ArgumentError) { Rubydex::Query.parse("MATCH (c RETURN c") } + assert_match(/Cypher syntax error/, error.message) + end + + def test_parsed_query_reusable_across_graphs + query = Rubydex::Query.parse("MATCH (c:Class {name: 'Dog'}) RETURN c.name") + + with_context do |context| + context.write!("zoo.rb", "class Dog; end\n") + graph = Rubydex::Graph.new + graph.index_all(context.glob("**/*.rb")) + graph.resolve + + assert_equal("[{\"c.name\":\"Dog\"}]", query.render(graph, :json)) + end + end + + def test_query_returns_table_output + with_context do |context| + context.write!("zoo.rb", <<~RUBY) + class Animal; end + class Dog < Animal; end + class Cat < Animal; end + RUBY + + graph = Rubydex::Graph.new + graph.index_all(context.glob("**/*.rb")) + graph.resolve + + query = Rubydex::Query.parse("MATCH (c:Class)-[:INHERITS]->(p:Class) WHERE p.name = 'Animal' RETURN c.name ORDER BY c.name") + output = query.render(graph) + + assert_match(/c\.name/, output) + assert_match(/Cat/, output) + assert_match(/Dog/, output) + assert_match(/2 rows/, output) + end + end + + def test_query_label_disjunction + with_context do |context| + context.write!("zoo.rb", <<~RUBY) + class Animal; end + module Walkable; end + class Dog < Animal; end + RUBY + + graph = Rubydex::Graph.new + graph.index_all(context.glob("**/*.rb")) + graph.resolve + + query = Rubydex::Query.parse( + "MATCH (n:Class|Module) WHERE n.name = 'Animal' OR n.name = 'Walkable' RETURN n.name ORDER BY n.name", + ) + + assert_equal("[{\"n.name\":\"Animal\"},{\"n.name\":\"Walkable\"}]", query.render(graph, :json)) + end + end + + def test_query_accepts_string_format + with_context do |context| + context.write!("zoo.rb", "class Dog; end\n") + + graph = Rubydex::Graph.new + graph.index_all(context.glob("**/*.rb")) + graph.resolve + + query = Rubydex::Query.parse("MATCH (c:Class {name: 'Dog'}) RETURN c.name") + assert_equal("[{\"c.name\":\"Dog\"}]", query.render(graph, "json")) + end + end + + def test_render_raises_on_invalid_format + with_context do |context| + context.write!("zoo.rb", "class Dog; end\n") + + graph = Rubydex::Graph.new + graph.index_all(context.glob("**/*.rb")) + graph.resolve + + query = Rubydex::Query.parse("MATCH (c:Class) RETURN c.name") + error = assert_raises(ArgumentError) { query.render(graph, :yaml) } + assert_match(/unknown query format/, error.message) + end + end + private def assert_diagnostics(expected, actual)