diff --git a/ext/rubydex/graph.c b/ext/rubydex/graph.c index 5e4414e6..3f64b318 100644 --- a/ext/rubydex/graph.c +++ b/ext/rubydex/graph.c @@ -1,5 +1,6 @@ #include "graph.h" #include "declaration.h" +#include "definition.h" #include "diagnostic.h" #include "document.h" #include "location.h" @@ -847,6 +848,84 @@ static VALUE rdxr_query_render(int argc, VALUE *argv, VALUE self) { return output; } +// Converts a structured result cell into a Ruby value. Node cells become real graph handles +// (Declaration / Definition / Document) built against `graph_obj`; lists recurse. +static VALUE cypher_cell_to_value(VALUE graph_obj, const struct CCell *cell) { + switch (cell->tag) { + case CCellTag_Null: + return Qnil; + case CCellTag_Bool: + return cell->payload.bool_val ? Qtrue : Qfalse; + case CCellTag_Int: + return LL2NUM(cell->payload.int_val); + case CCellTag_Str: + return cell->payload.str_val == NULL ? Qnil : rb_utf8_str_new_cstr(cell->payload.str_val); + case CCellTag_List: { + VALUE array = rb_ary_new_capa((long)cell->payload.list.len); + for (size_t i = 0; i < cell->payload.list.len; i++) { + rb_ary_push(array, cypher_cell_to_value(graph_obj, &cell->payload.list.items[i])); + } + return array; + } + case CCellTag_Node: { + VALUE argv[] = {graph_obj, ULL2NUM(cell->payload.node.id)}; + VALUE klass; + switch (cell->payload.node.category) { + case CNodeCategory_Declaration: + klass = rdxi_declaration_class_for_kind((CDeclarationKind)cell->payload.node.kind); + break; + case CNodeCategory_Definition: + klass = rdxi_definition_class_for_kind((DefinitionKind)cell->payload.node.kind); + break; + case CNodeCategory_Document: + default: + klass = cDocument; + break; + } + return rb_class_new_instance(2, argv, klass); + } + default: + return Qnil; + } +} + +// Rubydex::Query#run(graph) -> Array[Hash[String, Object]] +// Runs this parsed query against the given graph and returns the rows as Ruby objects: each row is a +// Hash keyed by RETURN column name. Scalar cells become String/Integer/true/false/nil, lists become +// Arrays, and node cells become Declaration / Definition / Document handles. Raises ArgumentError on +// an execution error. +static VALUE rdxr_query_run(VALUE self, VALUE graph_obj) { + void *query; + TypedData_Get_Struct(self, void *, &query_type, query); + + void *graph; + TypedData_Get_Struct(graph_obj, void *, &graph_type, graph); + + struct CRunRows run = rdx_query_run_rows(query, graph); + + if (run.error != NULL) { + VALUE message = rb_utf8_str_new_cstr(run.error); + free_c_string(run.error); + rb_raise(rb_eArgError, "%s", StringValueCStr(message)); + } + + struct CResultSet *result = run.result; + VALUE rows = rb_ary_new_capa((long)result->row_count); + + for (size_t r = 0; r < result->row_count; r++) { + struct CResultRow row = result->rows[r]; + VALUE hash = rb_hash_new(); + for (size_t c = 0; c < row.len && c < result->column_count; c++) { + VALUE key = rb_utf8_str_new_cstr(result->columns[c]); + rb_hash_aset(hash, key, cypher_cell_to_value(graph_obj, &row.cells[c])); + } + rb_ary_push(rows, hash); + } + + rdx_result_set_free(result); + return rows; +} + void rdxi_initialize_graph(VALUE moduleRubydex) { mRubydex = moduleRubydex; cGraph = rb_define_class_under(mRubydex, "Graph", rb_cObject); @@ -886,5 +965,6 @@ void rdxi_initialize_graph(VALUE moduleRubydex) { rb_undef_alloc_func(cQuery); rb_define_singleton_method(cQuery, "parse", rdxr_query_parse, 1); rb_define_singleton_method(cQuery, "schema", rdxr_cypher_schema, -1); + rb_define_method(cQuery, "run", rdxr_query_run, 1); rb_define_method(cQuery, "render", rdxr_query_render, -1); } diff --git a/rbi/rubydex.rbi b/rbi/rubydex.rbi index 618d34e6..a98f2343 100644 --- a/rbi/rubydex.rbi +++ b/rbi/rubydex.rbi @@ -281,6 +281,9 @@ class Rubydex::Query def schema(format = :table); end end + sig { params(graph: Rubydex::Graph).returns(T::Array[T::Hash[String, T.untyped]]) } + def run(graph); end + sig { params(graph: Rubydex::Graph, format: T.any(String, Symbol)).returns(String) } def render(graph, format = :table); end end diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 70a1189d..8b5778ab 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -320,9 +320,9 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "cypher-parser" -version = "0.2.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed0d1e561d51e651bdf70f8439da293fd0f1fe34d8431059061eadaefc7abb1" +checksum = "3720cc550f59900be9dfa59d529a1280d7d8c440db8d5057e5e376c3e71746b2" [[package]] name = "darling" diff --git a/rust/rubydex-sys/src/graph_api.rs b/rust/rubydex-sys/src/graph_api.rs index 99c6e8ec..9452ec3c 100644 --- a/rust/rubydex-sys/src/graph_api.rs +++ b/rust/rubydex-sys/src/graph_api.rs @@ -3,6 +3,7 @@ use crate::declaration_api::CDeclaration; use crate::declaration_api::DeclarationsIter; use crate::declaration_api::decl_id_from_char_ptr; +use crate::definition_api::map_definition_to_kind; use crate::document_api::DocumentsIter; use crate::reference_api::{CConstantReference, CMethodReference, ConstantReferencesIter, MethodReferencesIter}; use crate::{name_api, utils}; @@ -14,7 +15,8 @@ use rubydex::model::ids::{DeclarationId, NameId, UriId}; use rubydex::model::keywords; use rubydex::model::name::NameRef; use rubydex::model::visibility::Visibility; -use rubydex::query::cypher::{self, OutputFormat}; +use rubydex::query::cypher::schema::NodeRef; +use rubydex::query::cypher::{self, CypherValue, OutputFormat}; use rubydex::query::{CompletionCandidate, CompletionContext, CompletionReceiver}; use rubydex::resolution::Resolver; use rubydex::{indexing, integrity, listing, query}; @@ -1098,6 +1100,308 @@ pub unsafe extern "C" fn rdx_query_run( }) } +/// Tag for a structured result cell. +#[repr(C)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CCellTag { + Null = 0, + Bool = 1, + Int = 2, + Str = 3, + Node = 4, + List = 5, +} + +/// Which family of graph node a `Node` cell refers to (selects the Ruby handle class family). +#[repr(C)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CNodeCategory { + Declaration = 0, + Definition = 1, + Document = 2, +} + +/// `Node` cell payload: which handle family to build, the kind value, and the entity id. +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct CNode { + /// Which handle family to build. + pub category: CNodeCategory, + /// The `CDeclarationKind`/`DefinitionKind` value (ignored for documents). + pub kind: u32, + /// The entity id to build the handle from. + pub id: u64, +} + +/// `List` cell payload: a heap array of nested cells (freed by `rdx_result_set_free`). +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct CList { + pub items: *mut CCell, + pub len: usize, +} + +/// Payload of a `CCell`. The active field is selected by the cell's `tag`; reading any other field +/// is undefined. `Null` carries no payload. +#[repr(C)] +#[derive(Clone, Copy)] +pub union CCellPayload { + /// `Bool`. + pub bool_val: bool, + /// `Int`. + pub int_val: i64, + /// `Str`: owned C string (freed by `rdx_result_set_free`). + pub str_val: *const c_char, + /// `Node`. + pub node: CNode, + /// `List`. + pub list: CList, +} + +/// A single structured result value: a `tag` discriminant plus a `payload` union whose active +/// field the tag selects. +#[repr(C)] +pub struct CCell { + pub tag: CCellTag, + pub payload: CCellPayload, +} + +impl CCell { + fn new(tag: CCellTag, payload: CCellPayload) -> Self { + Self { tag, payload } + } + + fn null() -> Self { + Self { + tag: CCellTag::Null, + payload: CCellPayload { int_val: 0 }, + } + } +} + +/// One row of structured cells. +#[repr(C)] +pub struct CResultRow { + pub cells: *mut CCell, + pub len: usize, +} + +/// A structured query result: column names plus rows of typed cells. +#[repr(C)] +pub struct CResultSet { + pub columns: *const *const c_char, + pub column_count: usize, + pub rows: *mut CResultRow, + pub row_count: usize, +} + +/// The result of running a query for structured rows: either a result set or an error message. +#[repr(C)] +pub struct CRunRows { + /// Non-null on success; free with `rdx_result_set_free`. + pub result: *mut CResultSet, + /// Non-null on error; free with `free_c_string`. + pub error: *const c_char, +} + +fn cstring_raw(value: &str) -> *const c_char { + CString::new(value).map_or(ptr::null(), |s| s.into_raw().cast_const()) +} + +/// Converts a `CypherValue` into a `CCell`, resolving node identity to a handle-buildable category + +/// kind + id. A node whose id cannot be decoded or found falls back to its display name as a string. +fn build_cell(graph: &Graph, value: &CypherValue) -> CCell { + match value { + CypherValue::Null => CCell::null(), + CypherValue::Bool(b) => CCell::new(CCellTag::Bool, CCellPayload { bool_val: *b }), + CypherValue::Int(i) => CCell::new(CCellTag::Int, CCellPayload { int_val: *i }), + CypherValue::Str(s) => CCell::new(CCellTag::Str, CCellPayload { str_val: cstring_raw(s) }), + CypherValue::List(items) => { + let cells: Vec = items.iter().map(|item| build_cell(graph, item)).collect(); + let len = cells.len(); + let items = if cells.is_empty() { + ptr::null_mut() + } else { + Box::into_raw(cells.into_boxed_slice()).cast::() + }; + CCell::new(CCellTag::List, CCellPayload { list: CList { items, len } }) + } + CypherValue::Node { id, name, .. } => build_node_cell(graph, id) + .unwrap_or_else(|| CCell::new(CCellTag::Str, CCellPayload { str_val: cstring_raw(name) })), + } +} + +/// Builds a `Node` cell by decoding the opaque node id and looking up its kind in the graph. +fn build_node_cell(graph: &Graph, encoded_id: &str) -> Option { + match NodeRef::decode(encoded_id)? { + NodeRef::Declaration(id) => { + let kind = CDeclaration::kind_from_declaration(graph.declarations().get(&id)?); + Some(CCell::new( + CCellTag::Node, + CCellPayload { + node: CNode { + category: CNodeCategory::Declaration, + kind: kind as u32, + id: *id, + }, + }, + )) + } + NodeRef::Definition(id) => { + let kind = map_definition_to_kind(graph.definitions().get(&id)?); + Some(CCell::new( + CCellTag::Node, + CCellPayload { + node: CNode { + category: CNodeCategory::Definition, + kind: kind as u32, + id: *id, + }, + }, + )) + } + NodeRef::Document(id) => graph.documents().contains_key(&id).then(|| { + CCell::new( + CCellTag::Node, + CCellPayload { + node: CNode { + category: CNodeCategory::Document, + kind: 0, + id: *id, + }, + }, + ) + }), + } +} + +/// Runs a previously parsed query and returns the structured result set (column names + typed rows), +/// so callers can build their own value/handle objects instead of a formatted string. +/// +/// # Safety +/// +/// - `query` must be a valid pointer returned by `rdx_cypher_parse`. +/// - `pointer` must be a valid `GraphPointer` previously returned by this crate. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn rdx_query_run_rows(query: *const c_void, pointer: GraphPointer) -> CRunRows { + if query.is_null() { + return CRunRows { + result: ptr::null_mut(), + error: cstring_raw("query is null"), + }; + } + + let parsed = unsafe { &*query.cast::() }; + + with_graph(pointer, |graph| { + let result_set = match cypher::execute(graph, parsed) { + Ok(result_set) => result_set, + Err(error) => { + return CRunRows { + result: ptr::null_mut(), + error: cstring_raw(&error.to_string()), + }; + } + }; + + let columns: Vec<*const c_char> = result_set.columns.iter().map(|name| cstring_raw(name)).collect(); + let column_count = columns.len(); + let columns_ptr = Box::into_raw(columns.into_boxed_slice()).cast::<*const c_char>(); + + let rows: Vec = result_set + .rows + .iter() + .map(|row| { + let cells: Vec = row.iter().map(|cell| build_cell(graph, cell)).collect(); + let len = cells.len(); + let cells_ptr = if cells.is_empty() { + ptr::null_mut() + } else { + Box::into_raw(cells.into_boxed_slice()).cast::() + }; + CResultRow { cells: cells_ptr, len } + }) + .collect(); + let row_count = rows.len(); + let rows_ptr = Box::into_raw(rows.into_boxed_slice()).cast::(); + + CRunRows { + result: Box::into_raw(Box::new(CResultSet { + columns: columns_ptr.cast_const(), + column_count, + rows: rows_ptr, + row_count, + })), + error: ptr::null(), + } + }) +} + +/// Recursively frees a `CCell`'s owned allocations (its string, or its nested list cells). +unsafe fn free_cell(cell: &CCell) { + match cell.tag { + // SAFETY: the tag selects the active union field. + CCellTag::Str => { + let str_val = unsafe { cell.payload.str_val }; + if !str_val.is_null() { + let _ = unsafe { CString::from_raw(str_val.cast_mut()) }; + } + } + // SAFETY: the tag selects the active union field. + CCellTag::List => { + let list = unsafe { cell.payload.list }; + if !list.items.is_null() && list.len > 0 { + let slice = unsafe { Box::from_raw(ptr::slice_from_raw_parts_mut(list.items, list.len)) }; + for nested in &slice { + unsafe { free_cell(nested) }; + } + } + } + _ => {} + } +} + +/// Frees a `CResultSet` previously returned by `rdx_query_run_rows`, including all nested +/// allocations. +/// +/// # Safety +/// +/// - `ptr` must be a pointer returned by `rdx_query_run_rows`, or null. It must not be used after. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn rdx_result_set_free(ptr: *mut CResultSet) { + if ptr.is_null() { + return; + } + + let result_set = unsafe { Box::from_raw(ptr) }; + + if !result_set.columns.is_null() && result_set.column_count > 0 { + let columns = unsafe { + Box::from_raw(ptr::slice_from_raw_parts_mut( + result_set.columns.cast_mut(), + result_set.column_count, + )) + }; + for column in &columns { + if !column.is_null() { + let _ = unsafe { CString::from_raw((*column).cast_mut()) }; + } + } + } + + if !result_set.rows.is_null() && result_set.row_count > 0 { + let rows = unsafe { Box::from_raw(ptr::slice_from_raw_parts_mut(result_set.rows, result_set.row_count)) }; + for row in &rows { + if !row.cells.is_null() && row.len > 0 { + let cells = unsafe { Box::from_raw(ptr::slice_from_raw_parts_mut(row.cells, row.len)) }; + for cell in &cells { + unsafe { free_cell(cell) }; + } + } + } + } +} + /// Returns a description of the queryable Cypher schema (node labels, relationship types, and /// properties) in the given format (`"table"` or `"json"`). The schema is static and requires no /// graph. Caller must free the returned pointer with `free_c_string`. diff --git a/rust/rubydex/Cargo.toml b/rust/rubydex/Cargo.toml index f1bb044b..d61faad0 100644 --- a/rust/rubydex/Cargo.toml +++ b/rust/rubydex/Cargo.toml @@ -22,7 +22,7 @@ crate-type = ["rlib"] test_utils = ["dep:tempfile"] [dependencies] -cypher-parser = "0.2" +cypher-parser = "0.4" ruby-prism = "1.9.0" ruby-rbs = "0.3" url = "2.5.4" diff --git a/rust/rubydex/src/query/cypher/mod.rs b/rust/rubydex/src/query/cypher/mod.rs index 63a7c040..2fbadcc7 100644 --- a/rust/rubydex/src/query/cypher/mod.rs +++ b/rust/rubydex/src/query/cypher/mod.rs @@ -19,7 +19,7 @@ // // `Query` is the opaque parsed-query object: callers can `parse` a query string once (failing fast // on syntax errors), then `run_parsed` it against a graph that was built afterwards. -pub use cypher_parser::{CypherError, OutputFormat, Query, parse}; +pub use cypher_parser::{CypherError, CypherValue, OutputFormat, Query, ResultSet, execute, parse}; pub mod schema; pub mod schema_info; diff --git a/rust/rubydex/src/query/cypher/schema.rs b/rust/rubydex/src/query/cypher/schema.rs index ee323736..e90b43f8 100644 --- a/rust/rubydex/src/query/cypher/schema.rs +++ b/rust/rubydex/src/query/cypher/schema.rs @@ -36,6 +36,22 @@ pub enum NodeRef { Document(UriId), } +impl NodeRef { + /// Decodes the opaque id produced by [`GraphProvider::node_id`] back into a `NodeRef`. Returns + /// `None` if the string is not a recognized `tag:id` form. + #[must_use] + pub fn decode(encoded: &str) -> Option { + let (tag, rest) = encoded.split_once(':')?; + let value: u64 = rest.parse().ok()?; + match tag { + "decl" => Some(NodeRef::Declaration(DeclarationId::new(value))), + "def" => Some(NodeRef::Definition(DefinitionId::new(value))), + "doc" => Some(NodeRef::Document(UriId::new(value))), + _ => None, + } + } +} + /// A relationship type. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum RelType { @@ -145,6 +161,17 @@ impl GraphProvider for Graph { fn name(&self, node: NodeRef) -> String { node_name(self, node) } + + fn node_id(&self, node: NodeRef) -> String { + // Encode the node category plus the underlying hashed id so a consumer can decode it back to + // the right handle. The category tag is required because labels alone are ambiguous + // (a class declaration and a class definition both have the label "Class"). + match node { + NodeRef::Declaration(id) => format!("decl:{}", *id), + NodeRef::Definition(id) => format!("def:{}", *id), + NodeRef::Document(id) => format!("doc:{}", *id), + } + } } /// Returns all nodes matching the given labels. An empty slice matches every node; otherwise a node diff --git a/test/graph_test.rb b/test/graph_test.rb index b684c4a3..cae4f534 100644 --- a/test/graph_test.rb +++ b/test/graph_test.rb @@ -1459,7 +1459,7 @@ def test_cypher_schema_json assert(parsed["relationships"].any? { |r| r["type"] == "DEFINES" }) end - def test_parsed_query_runs_against_graph + def test_run_returns_rows_of_hashes with_context do |context| context.write!("zoo.rb", "class Animal; end\nclass Dog < Animal; end\n") @@ -1470,7 +1470,42 @@ def test_parsed_query_runs_against_graph graph.index_all(context.glob("**/*.rb")) graph.resolve - assert_equal("[{\"p.name\":\"Animal\"}]", query.render(graph, :json)) + rows = query.run(graph) + assert_equal([{ "p.name" => "Animal" }], rows) + end + end + + def test_run_returns_declaration_handles_for_node_cells + with_context do |context| + context.write!("zoo.rb", "class Animal; end\nclass Dog < Animal; end\n") + + query = Rubydex::Query.parse("MATCH (c:Class {name: 'Dog'}) RETURN c") + + graph = Rubydex::Graph.new + graph.index_all(context.glob("**/*.rb")) + graph.resolve + + rows = query.run(graph) + assert_equal(1, rows.length) + node = rows.first["c"] + assert_kind_of(Rubydex::Declaration, node) + assert_equal("Dog", node.name) + end + end + + def test_run_maps_scalars_and_aggregates + with_context do |context| + context.write!("zoo.rb", "class Animal; end\nclass Dog < Animal; end\nclass Cat < Animal; end\n") + + query = Rubydex::Query.parse( + "MATCH (c:Class)-[:INHERITS]->(p:Class) WHERE p.name = 'Animal' RETURN p.name, count(c) AS subclasses", + ) + + graph = Rubydex::Graph.new + graph.index_all(context.glob("**/*.rb")) + graph.resolve + + assert_equal([{ "p.name" => "Animal", "subclasses" => 2 }], query.run(graph)) end end @@ -1488,11 +1523,11 @@ def test_parsed_query_reusable_across_graphs graph.index_all(context.glob("**/*.rb")) graph.resolve - assert_equal("[{\"c.name\":\"Dog\"}]", query.render(graph, :json)) + assert_equal([{ "c.name" => "Dog" }], query.run(graph)) end end - def test_query_returns_table_output + def test_render_returns_table_output with_context do |context| context.write!("zoo.rb", <<~RUBY) class Animal; end @@ -1514,27 +1549,7 @@ class Cat < Animal; end end end - def test_query_label_disjunction - with_context do |context| - context.write!("zoo.rb", <<~RUBY) - class Animal; end - module Walkable; end - class Dog < Animal; end - RUBY - - graph = Rubydex::Graph.new - graph.index_all(context.glob("**/*.rb")) - graph.resolve - - query = Rubydex::Query.parse( - "MATCH (n:Class|Module) WHERE n.name = 'Animal' OR n.name = 'Walkable' RETURN n.name ORDER BY n.name", - ) - - assert_equal("[{\"n.name\":\"Animal\"},{\"n.name\":\"Walkable\"}]", query.render(graph, :json)) - end - end - - def test_query_accepts_string_format + def test_render_json_and_string_format with_context do |context| context.write!("zoo.rb", "class Dog; end\n") @@ -1543,6 +1558,7 @@ def test_query_accepts_string_format graph.resolve query = Rubydex::Query.parse("MATCH (c:Class {name: 'Dog'}) RETURN c.name") + assert_equal("[{\"c.name\":\"Dog\"}]", query.render(graph, :json)) assert_equal("[{\"c.name\":\"Dog\"}]", query.render(graph, "json")) end end