diff --git a/.gitignore b/.gitignore index 613c299..3e65e28 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /.cpcache/ /.nrepl-port +/target/ diff --git a/LICENSE.txt b/LICENSE.txt index e07f994..a3e71d9 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,7 +1,202 @@ -Copyright 2021 Darrick Wiebe -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 Darrick Wiebe + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 6190636..1277aa0 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,14 @@ In my examples below I will try to follow the pattern of decomposing functions into small atomic units. This decomposition leads to surprising flexibility without any performance overhead at all. +### Transducers + +The most recent work on Fermor is in replicating all of the graph query +functionality in transducer variants. This is currently an experimental branch, +but seems very promising for both cleaner library design and higher performance. + +I think this is an exciting development! + ## Force directed graph layout This project also includes a force directed layout engine that was originally @@ -58,7 +66,69 @@ reasonably dense in a minimal rectangular area. You can try the graph layout out in the `fermor.ui` namespace, either in the repl or by running `clj -M -m fermor.ui`. -### Introduction +## Using Fermor + +Fermor has several internal namespaces, but designed that anything typically +used by consumers of the library is exposed via the `fermor.core` namespace. All +fermor functionality below would be imported from that namespace. + +### Building a Graph + +To create a graph, simply call `(graph)`. + +``` clojure +(def g (graph)) +``` + +> It is possible to specify options allowing edges with a given label to have +> different characteristics (undirected, acyclic, etc.), but typically, the +> default settings are best. + +Once you've built a graph, there are both bulk and individual methods to create a graph. + +Fermor is backed by Bifurcan, and adopts its idea of `linear` and `forked` modes. +Linear mode is a mutable graph. It's called linear because the work editing a mutable +graph should be done in a linear single process. Unlike Bifurcan, Fermor's forked mode is +read-only. You must switch back to linear mode to edit the graph. + +The graph is created in linear mode. + +Vertices must have an ID value when they are created. The ID may be any value. +It's common to use integer IDs, but symbols, keywords or any other immutable Clojure +value may be used. + +Nodes may be either created standalone or created implicitly when +adding an edge between two node IDs using the `add-edges` function. This is +similar to how many much simpler graph tools create graphs. It allows +graphs to be defined flexibly and easily. Both approaches may be intermixed. + +For example, the following graph has 8 edges labelled `:to`, which I use in my +graph algos test. + + + +``` clojure +(def simple-graph + (-> (g/graph) + (g/add-edges :to '[[A T] [A B] [A C] [T B] [C B] [B D] [C E] [E D]]) + g/forked)) +``` + +I can add more edges using the `:to` label or add edges using any other label +with additional calls to `add-edges`. + +Once a graph is created, a specific vertex can be selected with either +`get-vertex` or `get-vertex!`. The latter verifies that the vertex actually +exists in the graph. A nonexistent vertex will simply have no edges and no document. + +``` clojure +(g/get-vertex simple-graph 'A) +;; => (v A) + +(def A (g/get-vertex simple-graph 'A)) +``` + +### Traversal To traverse from one sequence of vertices to a related sequence of vertices means following an edge in the graph. You can do that using the `in` and `out` @@ -69,61 +139,146 @@ other way, and we can do that with `in`, which follows edges that point to the vertices in the sequence and returns the vertices that those edges originate from. -It's good practice to include both element types in an edge name as I do here. +Using A from the example above, we can traverse out from A to the vertices +related on the :to edges +``` clojure +(->> A (g/out :to)) +;; => ((v B) (v C) (v T)) +``` -The following example is admittedly terrible. Until I get a better example set -up, please see the [gremlin examples](/test/fermor/gremlin_examples_test.clj) and -[cypher examples](/test/fermor/cypher_examples.clj), where I've taken the most -sophisticated examples I've been able to find in those projects' documentation -and translated them to use fermor. More interesting examples are very welcome so -please do send them my way if you know of good ones. +Or traverse out from a collection of vertices to all related on the :to edges +``` clojure +(->> [A A] + (g/out :to)) +;; => ((v B) (v C) (v T) (v B) (v C) (v T)) +``` + +Or traverse along any type of edge out from A, then in from all of those edges. +This will bring us back to A multiple times, but also to other vertices that +can traverse into those edges. +``` clojure +(->> A g/out g/in) +;; => ((v A) (v C) (v T) (v A) (v A)) +``` + +We can also traverse to the edges that outward from A +``` clojure +(->> A (g/out-e :to)) +;; => ((e-> A :to B) (e-> A :to C) (e-> A :to T)) +``` + + +#### Building up toward a domain-specific query language + +By defining simple functions like the following, you can rapidly create a very +expressive and high performance graph query system. ```clojure -(defn cities [states] - (->> states +(defn cities [state-nodes] + (->> state-nodes (out :state->city))) -(defn states [cities] - (->> cities +(defn states [city-nodes] + (->> city-nodes (in :state->city))) +``` + +#### Filtering +In Clojure, we `filter` data all the time. We don't need to change that. First +we'll create a predicate function, then a traversal that uses it. + +```clojure (defn large-city? [city] (< 1000000 (population city))) -``` - -In Clojure, we `filter` data all the time. We don't need to change that: -```clojure -(defn large-cities [states] - (->> states +(defn large-cities [state-nodes] + (->> state-nodes cities (filter large-city?))) ``` -Lookaheads come in a few varieties. They are filters that match based on whether -there is or isn't the expected data connected to the element at hand. - +Lookaheads are useful for filtering based on data relationships. They come in a +few varieties. They are filters that match based on whether there is or isn't +the expected data connected to the element at hand. ```clojure -(defn states-with-a-large-city [states] - (->> states +(defn states-with-a-large-city [state-nodes] + (->> state-nodes (lookahead large-cities))) ``` We can also do lookaheads with specific min and max arguments. ```clojure -(defn states-with-2to5-large-cities [states] - (->> states +(defn states-with-2to5-large-cities [state-nodes] + (->> state-nodes (lookahead {:min 2 :max 5} large-cities))) ``` Or do a negative lookahead to say what we don't want (like the core `remove` function). ```clojure -(defn states-without-a-large-city [states] - (->> states +(defn states-without-a-large-city [state-nodes] + (->> state-nodes (neg-lookahead large-cities))) ``` +#### Recursive traversal + +If the cities have rail connections defined, we can use those to find all available train destinations. + +``` clojure +(defn outbound-trains [city-nodes] + (out :train/city->city city-nodes)) + +(defn inbound-trains [city-nodes] + (in :train/city->city city-nodes)) +``` + +By using `all`, we can recursively query each city the train arrives at to find +the available destinations. Cycles are detected and eliminated, but all paths to +each destination will be found, meaning that if the train network is well +connected, some destinations may be included many times in the results. To +counter that a simple strategy is to add `distinct`. + +``` clojure +(defn train-destinations [city-nodes] + (->> city-nodes (all outbound-trains) distinct) +``` + +There are many variations on `all`, including `deepest`, `all-cycles`, `all-paths`, `search`, and several others. + +All of those functions are built using the `descend` graph traversal power tool which may be used to easily implement +sophisticated custom graph traversals. + +#### More complex traversal and filtering examples + +For more complex examples, please see the [gremlin examples](/test/fermor/gremlin_examples_test.clj) and +[cypher examples](/test/fermor/cypher_examples.clj), where I've taken the most +sophisticated examples present in the documentation of those projects +and translated them to use Fermor. + +## Graph algos + +I have used this project extensively for the back end of a Sea of Nodes-style +optimizing compiler. For that work I've needed several graph algorithms, which +I've been accumulating in the `fermor.graph.algo` namespace more-or-less +as-needed. I've also exposed the algos in Bifurcan wherever I could find a good way to do so. + +They include `strongly-connected-components`, `shortest-path`, `strongly-connected-subgraphs`, `cycles` +`connected-components`, `biconnected-components`, `articulation-points`, `postwalk`, `reverse-postwalk`, +`postwalk-reduce`, `prewalk-reduce` `reverse-postwalk-reduce`, `reverse-post-order-numbering`, +`post-order-numbering`, `immediate-dominators`, `dominator-depth`, `dominance-frontiers`, `intervals`, +`loop-tree`, `breadth-first-nodes`, `breadth-first-reduce`, `non-loop-vertices-between`, `loop-info`. + +Most of those algos are implemented in a flexible way allowing you to define the +`predecessor` and `successor` traversals (as needed), enabling the algos to be run even in +complex labeled graphs. + +## Pattern matching + +My [Pattern](https://github.com/pangloss/pattern) library can be extended to create powerful graph matching. I have used that extensively +in my internal projects, but highly customized to the projects' specific domain requirements. I would be interested in working +with someone to develop a set of matchers to include in this project. diff --git a/build.clj b/build.clj new file mode 100644 index 0000000..bfa17e6 --- /dev/null +++ b/build.clj @@ -0,0 +1,62 @@ +(ns build + (:require [clojure.tools.build.api :as b])) + +(def lib 'com.github.pangloss/fermor) +(def version (format "1.0.%s" (b/git-count-revs nil))) +(def class-dir "target/classes") +(def jar-file (format "target/%s-%s.jar" (name lib) version)) + +;; delay to defer side effects (artifact downloads) +(def basis (delay (b/create-basis {:project "deps.edn"}))) + +(defn clean [_] + (b/delete {:path "target"})) + +(defn jar [_] + (b/write-pom {:class-dir class-dir + :lib lib + :version version + :basis @basis + :license "Apache-2.0" + :src-dirs ["src"] + :pom-data + [[:licenses + [:license + [:name "Apache-2.0"] + [:url "https://www.apache.org/licenses/LICENSE-2.0.txt"];] + [:distribution "repo"]]]]}) + (b/copy-dir {:src-dirs ["src" "resources"] + :target-dir class-dir}) + (b/jar {:class-dir class-dir + :jar-file jar-file}) + {:jar-file jar-file :version version}) + +(defn install + "Clean, generate a jar and install the jar into the local Maven repository." + [opts] + (clean opts) + (let [{:keys [version jar-file]} (jar opts)] + (b/install {:class-dir class-dir + :lib lib + :version version + :basis @basis + :jar-file jar-file}) + (println (str "Installed " jar-file " to local Maven repository.")) + opts)) + +(defn publish + "Generates a jar with all project sources and resources and publishes it to + Clojars." + [opts] + (clean opts) + (let [{:keys [jar-file]} (jar opts)] + (println (str "Publishing " jar-file " to Clojars!")) + ((requiring-resolve 'deps-deploy.deps-deploy/deploy) + (merge {:installer :remote + :sign-releases? false + :artifact jar-file + :pom-file (b/pom-path {:lib lib :class-dir class-dir})} + opts)) + ;; TODO put a catch here if the version already exists? + (println "Published.") + opts)) diff --git a/deps.edn b/deps.edn index 9d98aed..33a57cd 100644 --- a/deps.edn +++ b/deps.edn @@ -1,28 +1,31 @@ {:paths ["src"] - :deps {org.clojure/clojure {:mvn/version "1.11.1"} - potemkin/potemkin {:mvn/version "0.4.6"} - generateme/fastmath {:mvn/version "2.2.1" + :deps {potemkin/potemkin {:mvn/version "0.4.7"} + generateme/fastmath {:mvn/version "2.4.0" :exclusions [com.github.haifengl/smile-mkl org.slf4j/slf4j-api]} - - io.lacuna/bifurcan {:mvn/version "0.2.0-alpha6"} - org.flatland/ordered {:mvn/version "1.15.11"} - com.github.jordanlewis/data.union-find {:git/url "https://github.com/jordanlewis/data.union-find" - :git/tag "1.0.0" - :git/sha "0e8a06f"} - pangloss/pure-conditioning {:git/url "https://github.com/pangloss/pure-conditioning" - :git/sha "61fa43215e0fce0fe83808b9e30c2bb4c170ffbd"} - - com.phronemophobic/membrane {:mvn/version "0.9.31.8-beta"} - com.phronemophobic.membrane/skialib-macosx-aarch64 {:mvn/version "0.9.31.0-beta"} - com.phronemophobic.membrane/skialib-macosx-x86-64 {:mvn/version "0.9.31.0-beta"} - com.phronemophobic.membrane/skialib-linux-x86-64 {:mvn/version "0.9.31.0-beta"}} + io.lacuna/bifurcan {:mvn/version "0.2.0-alpha7"} + org.flatland/ordered {:mvn/version "1.15.12"} + com.github.pangloss/data.union-find {:mvn/version "1.0.66"} + com.github.pangloss/pure-conditioning {:mvn/version "1.0.35"} + com.github.pangloss/transducers {:mvn/version "1.0.31"}} :aliases {:test {:extra-paths ["test"] :jvm-opts ["-XX:-OmitStackTraceInFastThrow"] :extra-deps {aysylu/loom {:mvn/version "1.0.2"}}} :dev {:extra-paths ["dev"]} :dev/dw {:extra-deps {fipp/fipp {:mvn/version "0.6.26"} - zprint/zprint {:mvn/version "1.2.7"} - com.github.jpmonettas/flow-storm-dbg {:mvn/version "3.6.9"} - com.github.jpmonettas/flow-storm-inst {:mvn/version "3.6.9"}}}}} + zprint/zprint {:mvn/version "1.2.9"} + com.github.jpmonettas/flow-storm-debugger + {:git/url "https://github.com/jpmonettas/flow-storm-debugger" + :git/sha "7a6caa0ab5662ed230bf4c7195f3e0cea99141c7"} + pangloss/transducers {:local/root "../../dev/transducers"}}} + :membrane {:extra-deps + {com.phronemophobic/membrane {:mvn/version "0.9.31.8-beta"} + com.phronemophobic.membrane/skialib-macosx-aarch64 {:mvn/version "0.9.31.0-beta"} + com.phronemophobic.membrane/skialib-macosx-x86-64 {:mvn/version "0.9.31.0-beta"} + com.phronemophobic.membrane/skialib-linux-x86-64 {:mvn/version "0.9.31.0-beta"}}} + :build + ;; build.clj tool. + {:deps {io.github.clojure/tools.build {:git/tag "v0.10.3" :git/sha "15ead66"} + slipset/deps-deploy {:mvn/version "0.2.0"}} + :ns-default build}}} diff --git a/src/fermor/core.clj b/src/fermor/core.clj index fcd2552..f3857cf 100644 --- a/src/fermor/core.clj +++ b/src/fermor/core.clj @@ -433,7 +433,7 @@ (if (keyword? labels) (if-let [get-edges (->traversal (get-graph (first r)) labels)] (map (comp f get-edges) r) - (map (constantly []) r)) + (map (constantly (f [])) r)) (let [labels (ensure-seq labels) get-edges* (into [] (keep #(->traversal (get-graph (first r)) %)) @@ -763,7 +763,7 @@ (lazy-seq (cons (->> v get-siblings - (filter #(not= v %))) + (filter #(not (= v %)))) (sibling-seq vs))))] (sibling-seq r))) ([to-parent from-parent r] @@ -1269,7 +1269,7 @@ "Filter for items in the route not equal to v." {:see-also ["is"]} [v r] - (filter #(not= v %) (ensure-seq r))) + (filter #(not (= v %)) (ensure-seq r))) (defn one-of "Filter for items in the route equal to one of the items in vs." @@ -1302,7 +1302,7 @@ "Remove items matching the KindId or id predicate." [id-pred r] (if (or (id? id-pred) (keyword? id-pred)) - (filter #(not= id-pred (element-id %)) r) + (filter #(not (= id-pred (element-id %))) r) (remove (comp id-pred element-id) r))) (defn with-set @@ -1556,6 +1556,11 @@ xs seen)))] (step coll #{})))) +(defn detect + "Find the first item that matches the given predicate." + [f coll] + (some (fn [x] (when (f x) x)) coll)) + (defn subgraph "Build a graph of only the edges in the paths of the route. You must call with-path on elements that are fed into the part of the route that you want to diff --git a/src/fermor/descend.clj b/src/fermor/descend.clj index ed0d697..27a86d4 100644 --- a/src/fermor/descend.clj +++ b/src/fermor/descend.clj @@ -60,23 +60,23 @@ #(Cons. e nil) [false true true] ; :loop - (let [more #(Concat. (*descend (when path (conj path e)) control f results nri (inc recur-depth)) - (*descend path control f more nri (inc recur-depth)))] - (if (= nri (mod recur-depth (inc nri))) + (let [more #(Concat. (*descend (when path (conj path e)) control f results nri (unchecked-inc recur-depth)) + (*descend path control f more nri (unchecked-inc recur-depth)))] + (if (= nri (mod recur-depth (unchecked-inc nri))) #(Cons. (NoResult. path recur-depth) more) more)) [false true false] ; :chain - (if (= nri (mod recur-depth (inc nri))) + (if (= nri (mod recur-depth (unchecked-inc nri))) #(Cons. (NoResult. path recur-depth) - (*descend (when path (conj path e)) control f results nri (inc recur-depth))) - (recur (when path (conj path e)) control f results nri (inc recur-depth))) + (*descend (when path (conj path e)) control f results nri (unchecked-inc recur-depth))) + (recur (when path (conj path e)) control f results nri (unchecked-inc recur-depth))) [false false true] ; (:ignore false nil) - (if (= nri (mod recur-depth (inc nri))) + (if (= nri (mod recur-depth (unchecked-inc nri))) #(Cons. (NoResult. path recur-depth) - (*descend path control f more nri (inc recur-depth))) - (recur path control f more nri (inc recur-depth))) + (*descend path control f more nri (unchecked-inc recur-depth))) + (recur path control f more nri (unchecked-inc recur-depth))) [false false false]; :cut nil))))) @@ -113,23 +113,23 @@ #(Cons. e-path nil) [false true true] ; :loop - (let [more #(Concat. (*descents e-path control f results nri (inc recur-depth)) - (*descents path control f more nri (inc recur-depth)))] - (if (= nri (mod recur-depth (inc nri))) + (let [more #(Concat. (*descents e-path control f results nri (unchecked-inc recur-depth)) + (*descents path control f more nri (unchecked-inc recur-depth)))] + (if (= nri (mod recur-depth (unchecked-inc nri))) #(Cons. (NoResult. path recur-depth) more) more)) [false true false] ; :chain - (if (= nri (mod recur-depth (inc nri))) + (if (= nri (mod recur-depth (unchecked-inc nri))) #(Cons. (NoResult. path recur-depth) - (*descents e-path control f results nri (inc recur-depth))) - (recur e-path control f results nri (inc recur-depth))) + (*descents e-path control f results nri (unchecked-inc recur-depth))) + (recur e-path control f results nri (unchecked-inc recur-depth))) [false false true] ; (:ignore false nil) - (if (= nri (mod recur-depth (inc nri))) + (if (= nri (mod recur-depth (unchecked-inc nri))) #(Cons. (NoResult. path recur-depth) - (*descents path control f more nri (inc recur-depth))) - (recur path control f more nri (inc recur-depth))) + (*descents path control f more nri (unchecked-inc recur-depth))) + (recur path control f more nri (unchecked-inc recur-depth))) [false false false] ; :cut nil))))) diff --git a/src/fermor/graph.clj b/src/fermor/graph.clj index c16b3a8..84ac284 100644 --- a/src/fermor/graph.clj +++ b/src/fermor/graph.clj @@ -3,7 +3,8 @@ (:require [fermor.protocols :refer :all] [pure-conditioning :refer [condition error default optional]] [clojure.pprint :refer [simple-dispatch]]) - (:import (io.lacuna.bifurcan DirectedGraph DirectedAcyclicGraph IGraph IMap Map IEdge Set) + (:import (io.lacuna.bifurcan DirectedGraph DirectedAcyclicGraph IGraph IMap Map IEdge Set + LinearList Lists) (java.util.function BiFunction) (java.util Optional ArrayList Iterator) (clojure.lang IMeta))) @@ -217,15 +218,9 @@ GetEdge (-get-edge [g label from-id to-id] (when-let [edge (._getEdgeGraph g label)] - (try - (do - ;; The only way to test for edge existence seems to be to call .edge and see if it raises. - ;; FIXME: try to find a better way to do this. - (.edge ^IGraph edge from-id to-id) - ;; Don't cache the document in a mutable graph. - (->E label (->V g from-id nil nil) (->V g to-id nil nil) nil true nil)) - (catch IllegalArgumentException e - nil)))) + (let [e (.edge ^IGraph edge from-id to-id ::not-found)] + (when-not (= ::not-found e) + (->E label (->V g from-id nil nil) (->V g to-id nil nil) nil true nil))))) AllVertices (all-vertices [g] @@ -425,15 +420,11 @@ GetEdge (-get-edge [g label from-id to-id] (when-let [edge (._getEdgeGraph g label)] - (try - (->E label (->V g from-id nil nil) (->V g to-id nil nil) - ;; NOTE: .edge is to fetch the edge document, but if there is no edge - ;; document it will also raise an exception. If it's possible, it - ;; would be better if I could actually check for the edge existence. - (Optional/ofNullable (.edge ^IGraph edge from-id to-id)) - true nil) - (catch IllegalArgumentException e - nil)))) + (let [e (.edge ^IGraph edge from-id to-id ::not-found)] + (when-not (= ::not-found e) + (->E label (->V g from-id nil nil) (->V g to-id nil nil) + (Optional/ofNullable e) + true nil))))) GraphEdgesPrepared (-out-edges-prepared2 [g label] @@ -652,14 +643,10 @@ (let [g (get-graph (.out_v e)) edges (.get (-edges g) (.label e))] (when (.isPresent edges) - (try - (let [edges ^IGraph (.get edges) - edge (.edge edges - (element-id (.out_v e)) - (element-id (.in_v e)))] - edge) - (catch IllegalArgumentException e - ;; thrown if the edge no longer exists in the graph. + (let [edges ^IGraph (.get edges)] + (.edge edges + (element-id (.out_v e)) + (element-id (.in_v e)) nil)))))) (defn labels @@ -719,9 +706,9 @@ (-out-edges [v labels] (--out-edges v labels)) - (-in-edges [^V v] + (-in-edges [v] (--in-edges v (labels (.graph v)))) - (-in-edges [^V v labels] + (-in-edges [v labels] (--in-edges v labels)) VertexEdgeCount @@ -731,15 +718,15 @@ (-out-edge-count [v labels] (--out-edge-count v labels)) - (-in-edge-count [^V v] + (-in-edge-count [v] (--in-edge-count v (labels (.graph v)))) - (-in-edge-count [^V v labels] + (-in-edge-count [v labels] (--in-edge-count v labels)) VertexEdgesPrepared (-out-edges-prepared [v labels] (--out-edges v labels)) - (-in-edges-prepared [^V v labels] + (-in-edges-prepared [v labels] (--in-edges v labels)) Element @@ -764,10 +751,10 @@ (defn edges-with-label? "Returns true if the given vertex has any edges with the given label." - ([^V v label] - (edges-with-label? v label (edge-graph (.graph v) label))) - ([^V v label ^IGraph edge] - (.isPresent (.indexOf edge (.id v))))) + ([v label] + (edges-with-label? v label (edge-graph (get-graph v) label))) + ([v label ^IGraph edge] + (.isPresent (.indexOf edge (element-id v))))) (defn vertices-with-edge "Return all vertices that have an edge with the given label." @@ -777,59 +764,61 @@ (map #(->V graph % nil nil)) (.vertices edge)))) -(defn- --out-edges [^V v labels] +(defn- --out-edges [v labels] (mapcat (fn [label] - (when-let [edge (edge-graph (.graph v) label)] + (when-let [edge (edge-graph (get-graph v) label)] (when (edges-with-label? v label edge) - (let [edges (.out edge (.id ^V v)) + (let [edges (.out edge (element-id v)) result (object-array (.size ^Set edges))] (loop [iter (.iterator ^Set edges) i 0] (if (.hasNext iter) (let [e (.next iter)] - (aset result i (->E label v (->V (.graph v) e nil nil) nil true nil)) + (aset result i (->E label v (->V (get-graph v) e nil nil) nil true nil)) (recur iter (unchecked-inc-int i))) result)))))) labels)) -(defn- --in-edges [^V v labels] +(defn- --in-edges [v labels] (mapcat (fn [label] - (when-let [edge (edge-graph (.graph v) label)] + (when-let [edge (edge-graph (get-graph v) label)] (when (edges-with-label? v label edge) - (let [edges (.in edge (.id ^V v)) + (let [edges (.in edge (element-id v)) result (object-array (.size ^Set edges))] (loop [iter (.iterator ^Set edges) i 0] (if (.hasNext iter) (let [e (.next iter)] - (aset result i (->E label (->V (.graph v) e nil nil) v nil false nil)) + (aset result i (->E label (->V (get-graph v) e nil nil) v nil false nil)) (recur iter (unchecked-inc-int i))) result)))))) labels)) -(defn- --out-edge-count [^V v labels] +(defn- --out-edge-count [v labels] (reduce (fn [n label] - (if-let [edge (edge-graph (.graph v) label)] + (if-let [edge (edge-graph (get-graph v) label)] (if (edges-with-label? v label edge) - (+ n (.size ^Set (.out edge (.id v)))) + (+ n (.size ^Set (.out edge (element-id v)))) n) n)) 0 labels)) -(defn- --in-edge-count [^V v labels] +(defn- --in-edge-count [v labels] (reduce (fn [n label] - (if-let [edge (edge-graph (.graph v) label)] + (if-let [edge (edge-graph (get-graph v) label)] (if (edges-with-label? v label edge) - (+ n (.size ^Set (.in edge (.id v)))) + (+ n (.size ^Set (.in edge (element-id v)))) n) n)) 0 labels)) (defn- --out-edges-prepared2 [g label] + ;; FIXME: this does not wrap edges with the expected wrapper, causing vertices + ;; from KindGraph or LoomGraph to produce unwrapped edge elements. (when-let [edge (edge-graph g label)] (fn [v] (when (edges-with-label? v label edge) - (let [edges (.out edge (.id ^V v)) + (let [edges (.out edge (element-id v)) result (object-array (.size ^Set edges))] (loop [iter (.iterator ^Set edges) i 0] @@ -843,7 +832,7 @@ (when-let [edge (edge-graph g label)] (fn [v] (when (edges-with-label? v label edge) - (let [edges (.in edge (.id ^V v)) + (let [edges (.in edge (element-id v)) result (object-array (.size ^Set edges))] (loop [iter (.iterator ^Set edges) i 0] @@ -853,6 +842,28 @@ (recur iter (unchecked-inc-int i))) result))))))) +(defn out-edges-prepared3 [g label ^IGraph edge v] + (when (edges-with-label? v label edge) + (let [edges (.out edge (element-id v)) + result (LinearList.)] + (loop [iter (.iterator ^Set edges)] + (if (.hasNext iter) + (let [e (.next iter)] + (.addLast result (->E label v (->V g e nil nil) nil true nil)) + (recur iter)) + result))))) + +(defn in-edges-prepared3 [g label ^IGraph edge v] + (when (edges-with-label? v label edge) + (let [edges (.in edge (element-id v)) + result (LinearList.)] + (loop [iter (.iterator ^Set edges)] + (if (.hasNext iter) + (let [e (.next iter)] + (.addLast result (->E label (->V g e nil nil) v nil false nil)) + (recur iter)) + result))))) + (defn print-edge* [^String as-out ^String as-in ^E e ^java.io.Writer w] (if *compact-edge-printing* (if (traversed-forward e) @@ -895,7 +906,7 @@ (if (linear? e) (.write w "(-v ") (.write w "(v ")) - (print-method (.id ^V e) w) + (print-method (element-id e) w) (when-not *compact-vertex-printing* (when-let [p (get-document e)] (.write w " ") diff --git a/src/fermor/graph/algo.clj b/src/fermor/graph/algo.clj index dd7cfd2..0a1b538 100644 --- a/src/fermor/graph/algo.clj +++ b/src/fermor/graph/algo.clj @@ -59,7 +59,7 @@ C (conj C z) stack (pop stack)] (if (= z x) - (let [set-of-components (if (or include-singletons? (not= 1 (count C))) + (let [set-of-components (if (or include-singletons? (not (= 1 (count C)))) (conj set-of-components C) set-of-components)] [stack set-of-components i L pre]) @@ -85,8 +85,9 @@ (if x (if (pre x) (recur stack set-of-components i L pre vertices) + ;; FIXME: instead of a pile of args and returns here, use a state object with mutable primitive members (let [[stack set-of-components i L pre] (scc get-successors include-singletons? stack set-of-components i L pre x)] - (recur stack set-of-components i L pre vertices))) + (recur stack set-of-components (long i) L pre vertices))) set-of-components))) (defn shortest-path diff --git a/src/fermor/kind_graph.clj b/src/fermor/kind_graph.clj index 5a5272f..3bb8aef 100644 --- a/src/fermor/kind_graph.clj +++ b/src/fermor/kind_graph.clj @@ -18,6 +18,8 @@ (equals [a b] (and (instance? KVertex b) (= element (.element ^KVertex b)))) (hashCode [e] (.hashCode element)) + Vertex + Element (element-id ^KindId [v] (element-id element)) (get-graph [v] (get-graph element)) diff --git a/src/fermor/transducers.clj b/src/fermor/transducers.clj new file mode 100644 index 0000000..9d90527 --- /dev/null +++ b/src/fermor/transducers.clj @@ -0,0 +1,606 @@ +(ns fermor.transducers + (:require [pure-conditioning :refer [condition manage lazy-conditions error default]] + [potemkin :refer [import-vars import-def]] + [flatland.ordered.set :refer [ordered-set]] + [fermor.protocols :as proto :refer [wrappable? Wrappable -out-edges -in-edges + -out-edge-count -in-edge-count + to-forked to-linear + traversed-forward -label -unwrap + -out-edges-prepared -in-edges-prepared + -transpose -has-vertex? -get-edge]] + [fermor.descend :refer [*descend *descents extrude *no-result-interval*]] + [fermor.graph :as g + :refer [out-edges-prepared3 in-edges-prepared3]] + [fermor.kind-graph :refer [->KGraph]] + fermor.path + [pangloss.transducers :as tx] + [fermor.core :refer [ensure-seq]]) + (:import clojure.lang.IMeta + (io.lacuna.bifurcan LinearList Lists) + (fermor.protocols TraversalDirection KindId))) + + +(import-vars (fermor.protocols set-config! + ;; Graph + get-vertex all-vertices all-edges + ;; Predicates + graph? vertex? edge? element? linear? forked? path? + ;; MutableGraph + add-vertices add-edges set-documents + remove-vertices remove-edges remove-documents + ;; Element + element-id get-document get-graph exists? + ;; Edge + out-vertex in-vertex + ;; Path + reverse-path + ;; KindId + id k kind lookup) + ;; Bifurcan Graph + (fermor.graph dag-edge digraph-edge + undirected-edge build-graph vertices-with-edge + ;; read printed graph elements + v e-> e->in + -v -e-> -e->in) + ;; Path + (fermor.path with-path path subpath no-path no-path! cyclic-path? + path-vertices path-edges) + ;; Kind Graph + (fermor.kind-graph V E-> E->in) + (fermor.core + linear forked graph add-edge add-edges-from + add-edges-to get-edge add-edge! add-vertex add-vertex! remove-vertex + remove-vertex! add-vertices! add-edges! has-vertex? get-vertex! reload + set-document update-document update-document! + vertices edges unwrap + label out-edges in-edges out-edge-count in-edge-count both-edge-count + followed-forward? followed-reverse? go-back go-on + transpose subseq-route rsubseq-route + emit-and-continue emit emit-and-chain emit-and-cut continue chain ignore cut) + (pangloss.transducers + counted merged + cond-branch distinct-by lasts-by append + lookahead neg-lookahead branch grouped-by group-count + sorted-group-count group-by-count sorted-group-by-count + distinct-by sorted sorted-by section-map map*)) + +(defn fast-trav + "Steps to traverse a node's edges: + - once per route: + - get the node's graph + - get the matching set of edge-graphs for the graph + - look up the node in each edge-graph to find its edges + - catenate the results" + [get-edges labels] + (fn [rf] + (let [edge-graphs (volatile! []) + labels (if (keyword? labels) [labels] (ensure-seq labels)) + w (volatile! nil) + w-init (fn [vertex] + (let [g (get-graph vertex) + edge-graphs (into [] (map #(g/edge-graph g %)) labels) + ;; create a fully initialized worker + w' (fn [vertex] + (loop [ls labels egs edge-graphs + r (LinearList.)] + (if (seq ls) + (if-let [edges (get-edges g (first ls) (first egs) vertex)] + (Lists/concat r edges) + r) + (.forked r))))] + ;; replace this fn with the initialized worker + (vreset! w w') + ;; actually do the work + (w' vertex))) + work (fn [v] (@w v))] + (vreset! w w-init) + (fn + ;; init + ([] (rf)) + ;; complete + ([result] (rf result)) + ;; reduce item + ([result input] + (rf result (work input))))))) + +(defn with-paths [] (map with-path)) +(defn paths [] (map path)) + +(defn in-e* + ([] (map -in-edges)) + ;; TODO: in-edges-prepared3 should be a protocol with all extensibility features tied in. + ([labels] (fast-trav in-edges-prepared3 labels))) + +(defn in-e + ([] (comp (in-e*) cat)) + ([labels] (comp (in-e* labels) cat))) + +(defn out-e* + ([] (map -out-edges)) + ([labels] (fast-trav out-edges-prepared3 labels))) + +(defn out-e + ([] (comp (out-e*) cat)) + ([labels] (comp (out-e* labels) cat))) + +(defn both-e* + ([] (map (fn [v] (concat (-in-edges v) (-out-edges v))))) + ([labels] (branch (in-e* labels) (out-e* labels)))) + +(defn both-e + ([] (comp (both-e*) cat)) + ([labels] (comp (both-e* labels) cat))) + +(defn out-v [] (map out-vertex)) +(defn in-v [] (map in-vertex)) + +(defn other-v [] (map go-on)) +(defn same-v [] (map go-back)) + +(defn both-v [] + "Returns a lazy seq of vertices out of a collection of edges." + (mapcat #(vector (in-vertex %) (out-vertex %)))) + +(defn mapmap [f] + (map #(mapv f %))) + +(defn in* + ([] (comp (in-e*) (mapmap out-vertex))) + ([labels] (comp (in-e* labels) (mapmap out-vertex)))) + +(defn in + ([] (comp (in-e) (out-v))) + ([labels] (comp (in-e labels) (out-v)))) + +(defn out* + ([] (comp (out-e*) (mapmap in-vertex))) + ([labels] (comp (out-e* labels) (mapmap in-vertex)))) + +(defn out + ([] (comp (out-e) (in-v))) + ([labels] (comp (out-e labels) (in-v)))) + +(defn both* + ([] (comp (both-e*) (mapmap go-on))) + ([labels] (comp (both-e* labels) (mapmap go-on)))) + +(defn both + ([] (comp (both-e) (other-v))) + ([labels] (comp (both-e labels) (other-v)))) + +(defn in-sorted [labels sort-by-f] + (comp (in* labels) (map #(sort-by sort-by-f %)) cat)) + +(defn out-sorted [labels sort-by-f] + (comp (out* labels) (map #(sort-by sort-by-f %)) cat)) + +(defn documents [] + (map get-document)) + +(defn element-ids [] + (map element-id)) + +(defn has-property [k v] + (filter (fn [e] (= v (get (get-document e) k))))) + +(defn make-pairs + ([f] (map (fn [v] [v (f v)]))) + ([f0 f1] (map (fn [v] [(f0 v) (f1 v)])))) + +(defn section + ([xform] + (tx/section xform)) + ([f xform] + ;; NOTE: the original did mapcat on the result of f, but it's much more + ;; flexible to use map and allow the user to add cat if needed. + (comp + (tx/section xform) + (map f)))) + +(defn context [f xform] + (comp + (tx/section (branch + (map identity) + (tx/section xform))) + (map (fn [[v section]] (f v section))))) + +(defn sorted-section + ([xform] + (tx/section (comp xform sorted))) + ([sort-by-f xform] + (tx/section (comp xform (sorted-by sort-by-f))))) + +#_ +(into [] + (sorted-section (mapcat (constantly [10 3 2 1 9]))) + (range 3)) + +;; (into [] (context vector (mapcat range)) (range 10)) + +(defn with + "Filters the route for elements where the result of calling the function children + (fn [e]) are equal to v. If v is a set, then check that the result of + calling children is in the set." + [children v] + (if (set? v) + (filter (fn [e] (v (children e)))) + (filter (fn [e] (= v (children e)))))) + +(defn is + "Filter for items in the route equal to v." + {:see-also ["isn't"]} + [v] + (filter #(= v %))) + +(defn isn't + "Filter for items in the route not equal to v." + {:see-also ["is"]} + [v] + (filter #(not (= v %)))) + +(defn one-of + "Filter for items in the route equal to one of the items in vs." + [vs] + (filter (if (set? vs) vs (set vs)))) + +(defn none-of + "Filter for items in the route equal to one of the items in vs." + [vs] + (remove (if (set? vs) vs (set vs)))) + + +(comment + (into [] (neg-lookahead (is 'x)) [1 2 'x 3]) + (into [] (lookahead (is 'x)) [1 2 'x 3]) + + (into [] + (lookahead {:min 10 :max 20} + (comp + (mapcat range) + (filter even?))) + (range 50)) + (into [] + (neg-lookahead {:min 10 :max 20} + (comp + (mapcat range) + (filter even?))) + (range 50))) + +(defn degree + ([v] + (tx/counted (both-e) [v])) + ([v labels] + (tx/counted (both-e labels) [v]))) + +(defn in-degree + ([v] + (count (-in-edges v))) + ([v labels] + (count (-in-edges v labels)))) + +(defn out-degree + ([v] + (count (-out-edges v))) + ([v labels] + (count (-out-edges v labels)))) + + +(let [partition (partition-all 32)] + (defn chunked [xform coll] + (letfn [(cont [[block & more]] + (when block + (let [n (int (count block)) + b (chunk-buffer n)] + (dotimes [i n] + (chunk-append b (nth block i))) + (chunk-cons (chunk b) (lazy-seq (cont more))))))] + (cont (eduction (comp xform partition) coll))))) + + +;; The graph traversal code below can not be in the form of transducers because laziness is +;; critical for this type of work. However individual steps here are transducers. Instead +;; of a function to get children, xchildren is a transducer which will be instantiated with +;; a single element collection. +;; +;; This seems to have a roughly 3x performance penalty compared with the lazy +;; seq version in rough initial testing: 50M descents in 12s vs 4s. This may +;; change/invert as the work to get children gets heavier + +(defn unwrapping-path [xform] + ;; TODO: not quite sure what this should do... + nil) + + +(defn descend + "A power-tool for recursively traversing the graph. See also: descents, all, deepest + + The arity 3 version omits the control function. It is like the arity 4 version + where the control function always returns :loop-and-emit. + + Arguments: + + `path`: The starting path that will be appended to as the function descends deeper into the graph. + Should be either nil or a vector. If nil, path will not be tracked. + `control`: A function that guides the descent. Should be a `(fn [path current])`. See below for valid return values. + `children`: A function that produces child elements for the current element: Should be a `(fn [path current])`. + `coll`: The starting collection. Elements in the starting collection will be passed to the control + function and may be emitted. + + Table of Valid Control Return Values: + + ;; [emit children siblings reset-path] + (def emit-and-continue [true true true false]) + (def emit [true false true false]) + (def emit-and-chain [true true false false]) + (def emit-and-cut [true false false false]) + (def continue [false true true false]) + (def ignore [false false true false]) + (def chain [false true false false]) + (def cut [false false false false]) + + The control signal is a vector of 4 booleans: + 0. emit: control whether the current element or path is emitted into the result setBit + 1. children: control whether to descend to the current element's children + 2. siblings: control whether to continue to traverse to the current element's siblings + 3. reset-path: if true, the path vector will be reset to [], meaning that any future emitted or control path will not have previous history in it. + + Hidden cycle protection: + + This section describes a failsafe to prevent descend from being caught + permanently in a graph cycle that is producing no results. If you expect + cycles, you are probably better off looking at the path that is passed to the + control and children functions to detect a repeating pattern based on your + traversal logic. This function will by default prevent traversing more than + *cut-no-results* (10,000,000) levels deep while returning no matching results. + Every *no-results-interval* (10,000) child levels, it will call the + *no-results* (fn [chk-buffer no-result down right]) function to allow it to + produce a resolution or to continue the search. Some standard resolution + functions are included: descend/cut-no-results, descend/continue-no-results, and + descend/value-for-no-results. Return their return value. You can modify the behavior + of this system by binding the following dynamic vars: + + descend/*cut-no-results* + descend/*no-results-interval* + descend/*no-results* + + Handling cycles: + + Cycles that are included in the results can be handled outside descend + because the results produced are lazy. See prevent-cycles or no-cycles! + below." + {:see-also ["descents" "all" "deepest" "all-paths" "deepest-paths"]} + ([path xchildren coll] + (lazy-seq (extrude (*descend path (fn [path item] (chunked xchildren [[path item]])) coll)))) + ([path control xchildren coll] + (lazy-seq (extrude (*descend path control (fn [path item] (chunked xchildren [[path item]])) coll *no-result-interval* 0))))) + + +(defn descents + "Descents is a variant of descend which returns the entire descent + path as a vector rather than just the resulting element. + + Note that the descent path is not the same as using with-path to produce + proper paths. The descent path only includes the actual elements that are + passed into the children function in the course of operation. + + Please see `descend` for details. In descents, the initial path is not optional." + {:see-also ["descend" "all" "deepest" "all-paths" "deepest-paths"]} + ([path xchildren coll] + (lazy-seq (extrude (*descents path (fn [path item] (chunked xchildren [[path item]])) coll)))) + ([path control xchildren coll] + (lazy-seq (extrude (*descents path control (fn [path item] (chunked xchildren [[path item]])) coll *no-result-interval* 0))))) + +(defn- ev-pred [f1 f2] + (fn + ([a] + (and (f1 a) (f2 a))) + ([a b] + (and (f1 a b) (f2 a b))))) + +(defn drop-path [xform] + (fn [rf] + (let [xform (xform rf)] + (fn + ([] (xform)) + ([result] + (xform result)) + ([result [path input]] + (xform result input)))))) + +(defn when-path [pred xform] + (fn [rf] + (let [xform (xform rf)] + (fn + ([] (xform)) + ([result] + (xform result)) + ([result [path input]] + (if (pred path input) + (xform result [path input]) + result)))))) + +(defn when-path! [pred xform] + (when-path pred (drop-path xform))) + +(defn- build-all + "Does everything just as its name implies." + [desc* control cut-cycles? pred path-pred element-pred xchildren r] + (let [paths (when (or cut-cycles? path-pred pred (identical? descents desc*)) + (if cut-cycles? + (ordered-set) + [])) + depth-pred (when-let [n (cond (nat-int? path-pred) path-pred (nat-int? pred) pred)] + (fn dpred [p] (< (count p) n))) + path-pred (if (nat-int? path-pred) nil path-pred) + path-pred (if (and path-pred depth-pred) + (ev-pred path-pred depth-pred) + (or path-pred depth-pred)) + ppe (cond (and (fn? path-pred) element-pred) (fn eppred [path e] (and (path-pred path) (element-pred e))) + (fn? path-pred) (fn ppred [path e] (path-pred path)) + element-pred (fn epred [path e] (element-pred e))) + pred (if cut-cycles? + (if (fn? pred) + (fn cppred [p e] (and (not (p e)) (pred p e))) + (fn cpred [p e] (not (p e))))) + pred (cond (and (fn? pred) ppe) (ev-pred pred ppe) + (fn? pred) pred + ppe ppe) + desc** (if control + (partial desc* paths control) + (partial desc* paths)) + xchildren (unwrapping-path xchildren)] + (if pred + (desc** (when-path pred xchildren) (ensure-seq r)) + (desc** xchildren (ensure-seq r))))) + +(defn all + "Produces a lazy sequence of every element in the route and all of their + children. Cuts cycles. + + `pred` is a `(fn [path element])` that returns true to continue iterating. + + `pred` or `path-pred` may be a natural integer, meaning the maximum path + length allowed before iterating. Note that the internal path is only the + elements seen by the iteration and is not the same as the more complete path + produced by `with-path`." + ([xchildren r] + (build-all descend nil true nil nil nil xchildren r)) + ([pred xchildren r] + (build-all descend nil true pred nil nil xchildren r)) + ([path-pred element-pred xchildren r] + (build-all descend nil true nil path-pred element-pred xchildren r))) + +(defn all-with-cycles + "Produces a lazy sequence of every element in the route and all of their + children. Does not cut cycles. + + See `all` for details on arities." + ([children r] + (build-all descend nil false nil nil nil children (ensure-seq r))) + ([children pred r] + (build-all descend nil false pred nil nil children (ensure-seq r))) + ([children path-pred el-pred r] + (build-all descend nil false nil path-pred el-pred children (ensure-seq r)))) + +(defn- deepest-control [xchildren] + (fn [p e] (if (seq (chunked xchildren [e])) continue emit))) + +(defn deepest + "Produces a lazy sequence of every leaf node reachable by traversing all of + the children of every element in the route. Cuts cycles. + + See `all` for details on arities." + ([children r] + (build-all descend (deepest-control children) true nil nil nil children r)) + ([pred children r] + (build-all descend (deepest-control children) true pred nil nil children r)) + ([path-pred element-pred children r] + (build-all descend (deepest-control children) true nil path-pred element-pred children r))) + + +(defn all-paths + "Produces a lazy sequence of paths to every element in the route and all of + their children. Cuts cycles. + + See `all` for details on arities." + ([xchildren r] + (build-all descents nil true nil nil nil xchildren r)) + ([pred xchildren r] + (build-all descents nil true pred nil nil xchildren r)) + ([path-pred element-pred xchildren r] + (build-all descents nil true nil path-pred element-pred xchildren r))) + +(defn all-paths-to + "Produce a lazy sequence of all paths to every element where pred returns true. + + Once a path is returned, that path will be cut and no further searching will happen. + + If there are multiple paths to the same element where pred returns true, all + of those paths will be returned. + + Cuts cycles" + [pred children r] + (descents (ordered-set) + (fn control [path e] (if (pred path e) emit continue)) + (when-path! (fn [path e] (not (path e))) children) + r)) + +(defn search + "Produce a lazy sequence of all elements where pred returns true. + + Once an element is returned, its children will not be seached. + + If there are multiple paths to the same result, the result will be returned + multiple times. + + Cuts cycles" + [pred children r] + (descend #{} + (fn control [path e] (if (pred path e) emit continue)) + (fn [path e] (when-not (path e) (children e))) + r)) + +(defn all-paths-with-cycles + "Produces a lazy sequence of paths to every element in the route and all of + their children. Does not cut cycles. + + See `all` for details on arities." + ([children r] + (build-all descents nil false nil nil nil children r)) + ([pred children r] + (build-all descents nil false pred nil nil children r)) + ([path-pred element-pred children r] + (build-all descents nil false nil path-pred element-pred children r))) + +(defn deepest-paths + "Produces a lazy sequence of paths to every leaf node reachable by traversing + all of the children of every element in the route. Cuts cycles. + + See `all` for details on arities." + ([children r] + (build-all descents (deepest-control children) true nil nil nil children r)) + ([pred children r] + (build-all descents (deepest-control children) true pred nil nil children r)) + ([path-pred element-pred children r] + (build-all descents (deepest-control children) true nil path-pred element-pred children r))) + +(defn- all-cycles-control [path e] + (if (= e (first path)) + emit-and-cut + continue)) + +(defn all-cycles + "Produces a lazy sequence of elements that have a cyclic path. + + See `all` for details on arities." + ;; force a path pred to turn on ordered-sets in build-all. + ([children r] + (build-all descend all-cycles-control true nil (constantly true) nil children r)) + ([pred children r] + (build-all descend all-cycles-control true pred (constantly true) nil children r)) + ([path-pred element-pred children r] + (build-all descend all-cycles-control true nil + (or path-pred (constantly true)) element-pred children r))) + +(defn all-cycle-paths + "Produces a lazy sequence of cyclic paths. + + See `all` for details on arities." + ;; force a path pred to turn on ordered-sets in build-all. + ([children r] + (build-all descents all-cycles-control true nil (constantly true) nil children r)) + ([pred children r] + (build-all descents all-cycles-control true pred (constantly true) nil children r)) + ([path-pred element-pred children r] + (build-all descents all-cycles-control true nil + (or path-pred (constantly true)) element-pred children r))) + +(defn is-cycle + "Matches only if the current element is a member of the results from f." + [children r] + (lookahead #(all-cycles 1 children %) r)) + +(defn no-cycle + "Matches only if the current element is not a member of the results from f." + [children r] + (neg-lookahead #(all-cycles 1 children %) r)) diff --git a/test/fermor/gremlin_examples_test.clj b/test/fermor/gremlin_examples_test.clj index 224d38e..2980e35 100644 --- a/test/fermor/gremlin_examples_test.clj +++ b/test/fermor/gremlin_examples_test.clj @@ -245,12 +245,12 @@ {4 #{(V :product 8) (V :product 10)}, 5 #{(V :product 9) (V :product 2)}, 6 #{(V :product 1)}}] - [(V :person :bob) - {2 #{(V :product 6) (V :product 8) (V :product 10)}, - 3 #{(V :product 7) (V :product 9)}}] [(V :person :jill) {2 #{(V :product 3) (V :product 1) (V :product 5)}, 3 #{(V :product 7) (V :product 9)}}] + [(V :person :bob) + {2 #{(V :product 6) (V :product 8) (V :product 10)}, + 3 #{(V :product 7) (V :product 9)}}] [(V :person :jack) {2 #{(V :product 6) (V :product 8) (V :product 10)}, 3 #{(V :product 2) (V :product 4)}}]] diff --git a/test/fermor/transducers_test.clj b/test/fermor/transducers_test.clj new file mode 100644 index 0000000..f51a5a3 --- /dev/null +++ b/test/fermor/transducers_test.clj @@ -0,0 +1,302 @@ +(ns fermor.transducers-test + (:require [clojure.test :as t :refer [deftest testing is]] + [fermor.transducers :as g :refer :all :exclude [is]] + [fermor.descend :refer [*no-result-interval* *cut-no-results* *no-results* cut-no-results value-for-no-results + continue-no-results]])) + +(deftest edges-are-eq + (let [g (-> (graph) + (add-edges :loom [[:a :b 4]]) + (add-edges :xyz [[:c :d]]) + (add-edges :nope [[:d :b]]) + (add-vertices [[:a {:info "ok!"}]]) + forked)] + (is (= [(e->in :a :loom [4] :b)] [(e-> :a :loom [4] :b)])) + (is (= [(e-> :a :loom [4] :b)] [(e->in :a :loom [4] :b)])))) + + + +(deftest edge-flavours + (let [g (-> (build-graph {:edge-builder {:w (fermor.graph/add-unique-weighted-edge 0.0) + :p fermor.graph/add-parallel-edge}}) + (add-edges :normal [[:a :b {:k 9}] + [:a :b {:x 1}] + [:b :c {:bc :ski}] + [:b :c]]) + (add-edges :p [[:a :b {:x 10}] + [:b :c {:x 11}] + [:b :c {:x 11}] + [:a :b {:x 2}] + [:a :b] + [:x :y] + [:a :b {:x 3}] + [:a :b {:x 4}] + [:a :b {:k 2}] + [:a :b {:k 9}]]) + (add-edges :w [[:a :b 10] + [:b :c 11] + [:a :b 2] + [:a :b 2] + [:a :b 2] + [:a :b 2] + [:a :b 5]]) + forked)] + (is (= #{:a :b :c :x :y} (into #{} (map element-id) (vertices g)))) + (is (= #{nil {:x 1}} + (into #{} (comp (out-e :normal) (documents)) (vertices g)))) + (is (= #{{:parallel/count 7 + 0 {:x 10} + 1 {:x 2} + 2 nil + 3 {:x 3} + 4 {:x 4} + 5 {:k 2} + 6 {:k 9}} + {:parallel/count 1 0 nil} + {:parallel/count 2 + 0 {:x 11} + 1 {:x 11}}} + (into #{} (comp (out-e :p) (documents)) (vertices g)))) + (is (= #{11.0 23.0} + (into #{} (comp (out-e :w) (documents)) (vertices g)))) + (is (= #{:b :c} + (into #{} (comp (out :w) (element-ids)) (vertices g)))) + (is (= #{:a :b :c :x :y} + (into #{} (comp (both) (element-ids)) (vertices g)))) + + (is (= #{#{:y} #{:b} #{:x} #{:c :a}} + (into #{} (comp (section #(apply set %) + (comp + (both*) + (map* (element-ids))))) + (vertices g)))) + (is (= #{#{:y} #{:b} #{:x} #{:c :a}} + (into #{} (comp (section set (comp (both) (element-ids)))) (vertices g)))) + (is (= #{#{:y} #{:b} #{:x} #{:c :a}} + (into #{} (comp (section set (comp (both) (element-ids)))) (vertices g)))))) + + + +(def g (-> (build-graph {:edge-builder {:w (fermor.graph/add-unique-weighted-edge 0.0) + :p fermor.graph/add-parallel-edge}}) + (add-edges :normal [[:a :b {:k 9}] + [:a :b {:x 1}] + [:b :c {:bc :ski}] + [:b :c]]) + (add-edges :p [[:a :b {:x 10}] + [:b :c {:x 11}] + [:b :c {:x 11}] + [:a :b {:x 2}] + [:a :b] + [:x :y] + [:a :b {:x 3}] + [:a :b {:x 4}] + [:a :b {:k 2}] + [:a :b {:k 9}]]) + (add-edges :w [[:a :b 10] + [:b :c 11] + [:a :b 2] + [:a :b 2] + [:a :b 2] + [:a :b 2] + [:a :b 5]]) + forked)) + +(deftest extrude-basic-descend-calls + (testing "finite" + (is (= [2] + (descend [] (mapcat (constantly [])) [2]))) + (is (= [0 1 2 3 -1 0 1 2 3 -2] + (descend [] (drop-path (mapcat (fn [v] (when (zero? v) [1 2 3])))) [0 -1 0 -2])))) + (testing "stack overflow caused by concat in heavy left" + (is (= [2 1 1 1 1] + (take 5 (descend [] (drop-path (mapcat (constantly [1]))) [2])))) + (is (= [1 1 1] + (take 3 (drop 500000 (descend [] (map (constantly 1)) [2])))))) + (testing "stack overflow caused by concat in heavy right" + (is (= [0 1 2 3 4] + (take 5 (descend [] (mapcat (constantly [])) (range))))) + (is (= [500000 500001 500002] + (take 3 (drop 500000 (descend [] (mapcat (constantly [])) (range))))))) + (testing "mixed tree" + (is (= [0 1 0 2 0 1 0 3 0 1 0 2 0 1 0 4 0 1 0 2] + (take 20 (descend [] (drop-path (mapcat range)) (range))))) + (is (= 18 + (apply max (take 500000 (descend [] (drop-path (mapcat range)) (range)))))) + (is (= [0 1 "1:0" :3 2 "2:0" :3 "2:1" :3 3 "3:0" :3 "3:1" :3 "3:2" :3 4 "4:0" :3 "4:1"] + (take 20 (descend [] + (drop-path + (cond-branch + string? (map (fn [v] (keyword (str (count v))))) + number? (mapcat (fn [v] (map (fn [n] (str v ":" n)) + (range v)))))) + (range))))))) + +(defn alot [x] + ;; NOTE: unlike the purely lazy version, this does not handle infinite sequences because + ;; the built-in cat transducer is greedy. Fortunately I think infinite pathways in + ;; a graph are not infinite in a single step so I hope this will not have an impact + ;; in practice. + (range 100000)) + +;; NOTE: this idea of replacing all traversals with transducers unfortunately has faltered +;; at this point. Transducers are eager and I was not able to see any way to make +;; them compatible with the various infinitely recursive loops that graphs get into +;; when you start doing these types of descents through them. +#_ +(deftest descend-with-control + (testing "loop" + (is (= [] (descend [] (fn [path v] continue) (mapcat (fn [v] (when (= 0 v) [1 2 3]))) [0 -1 0 -2]))) + + (is (= [1 2 3 -1 1 2 3 -2] + (descend [] + (fn [path v] + (case (int v) 0 continue emit)) + (unwrapping-path (mapcat (fn [v] (when (= 0 v) [1 2 3])))) + [0 -1 0 -2]))) + + (is (= [1 2 3 -1 1 2 3 -2] + (descend [] + (fn [path v] + (case (int v) 0 continue emit-and-continue)) + (unwrapping-path (mapcat (fn [v] (when (= 0 v) [1 2 3])))) + [0 -1 0 -2]))) + + (is (= [0 -1 0 -2] + (descend [] + (fn [path v] emit) + (unwrapping-path (mapcat (fn [v] (when (= 0 v) [1 2 3])))) + [0 -1 0 -2]))) + + (is (= [0 1 -1 0 1 -2] + (descend [] + (fn [path v] + (case (int v) 2 cut emit-and-continue)) + (unwrapping-path (mapcat (fn [v] (when (= 0 v) [1 2 3])))) + [0 -1 0 -2]))) + + (is (= [0 1 2 -1 0 1 2 -2] + (descend [] + (fn [path v] + (case (int v) 2 emit-and-cut emit-and-continue)) + (unwrapping-path (mapcat (fn [v] (when (= 0 v) [1 2 3])))) + [0 -1 0 -2]))) + + (is (= [0 1 0 1] + (descend [] + (fn [path v] + (case (int v) 0 emit-and-continue 1 emit-and-continue 2 cut ignore)) + (unwrapping-path (mapcat (fn [v] (when (= 0 v) [1 2 3])))) + [0 -1 0 -2]))) + + (binding [*no-result-interval* 100 + *cut-no-results* 1000] + (is (= (range 10) + (take 200 (descend [] + (fn [path v] (if (> 10 v) emit-and-continue ignore)) + (mapcat (constantly [])) + (range)))))) + + + + + (binding [*no-results* (fn [chk-buf depth down right] + (cut-no-results))] + (is (= (range 10) + (take 20 (descend [] + (fn [path v] (if (> 10 v) emit-and-continue ignore)) + (mapcat (constantly [])) + (range)))))) + + (binding [*no-results* (fn [chk-buf depth down right] + (value-for-no-results chk-buf :nothing down right))] + (is (= [0 1 2 3 4 5 6 7 8 9 :nothing :nothing :nothing :nothing :nothing :nothing :nothing :nothing :nothing :nothing] + (take 20 (descend [] + (fn [path v] (if (> 10 v) emit-and-continue ignore)) + (mapcat (constantly [])) + (range)))))) + + (binding [*no-results* (fn [chk-buf depth down right] + (value-for-no-results chk-buf :nothing nil nil))] + (is (= [0 1 2 3 4 5 6 7 8 9 :nothing] + (take 20 (descend [] + (fn [path v] (if (> 10 v) emit-and-continue ignore)) + (mapcat (constantly [])) + (range)))))) + + (binding [*no-result-interval* 100 + *cut-no-results* 100] + (is (= [100 100 100] + (take 3 (descend [] + (fn [path v] (if (= 100 v) emit-and-continue ignore)) + (mapcat alot) + (range)))))) + + (binding [*no-result-interval* 99 + *cut-no-results* 99] + (is (= [] + (take 3 (descend [] + (fn [path v] (if (= 100 v) emit-and-continue ignore)) + (mapcat alot) + (range)))))) + + (let [lazy-coll (binding [*no-result-interval* 100 + *cut-no-results* 100] + (seq (descend [] + (fn [path v] (if (= 100 v) emit-and-continue ignore)) + (mapcat alot) + (range))))] + (binding [*no-result-interval* 99 + *cut-no-results* 99] + ;; The inner binding should be ignored when the seq is executed because + ;; it's already all baked into the seq when it was first resolved. + (is (= [100 100 100] (take 3 lazy-coll))))) + + (binding [*no-result-interval* 10 + *no-results* (fn [b v down right] + (continue-no-results nil right))] + (is (= 1110 + (count (descend [] + ;; this control block is intentionally terrible to force worst case behaviours + (fn [path v] + (if (and (>= 2 (count path)) + (every? #(> 10 %) path)) + (if (> 10 v) emit-and-continue continue) + ignore)) + ;; infinite space in every direction + (mapcat alot) + (range)))))) + + (is (= (take 10 (repeat 0)) + (take 10 (descend nil + (constantly emit-and-chain) + (mapcat alot) + (range))))))) + + +(comment + (take 200 (chunked + (mapcat (fn [x] (range 1000))) + (range 5))) + + (chunk-buffer 10) + + + (take 11 (all (mapcat (fn [x] [[x 1] [x 2] [x 3]])) (range 3))) + + (deepest 3 (mapcat (fn [x] + (if (= 4 x) + [] + (range x)))) + (range 8)) + + all + + nil + + (time + (count + (take 1000 (descend [] + (when-path! (fn [p _] (> 2 (count p))) + (mapcat (fn [_](range 10)))) + [2])))))