Repository: johnmn3/injest Branch: main Commit: e9e2d965932b Files: 33 Total size: 147.9 KB Directory structure: gitextract_f4rxdrb9/ ├── .claude/ │ ├── bin/ │ │ └── clj-paren-repair-claude-hook │ ├── hooks/ │ │ └── session-start.sh │ └── settings.json ├── .clj-kondo/ │ └── config.edn ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── build.clj ├── deps.edn ├── docs/ │ └── shootout.md ├── resources/ │ └── clj-kondo.exports/ │ └── net.clojars.john/ │ └── injest/ │ ├── config.edn │ └── injest/ │ └── path.clj ├── scripts/ │ └── ccweb-setup.sh ├── src/ │ └── injest/ │ ├── classical.cljc │ ├── data.cljc │ ├── impl.cljc │ ├── path.cljc │ ├── report/ │ │ └── path.cljc │ ├── report.cljc │ ├── state.cljc │ ├── test.clj │ ├── test.cljs │ └── util.cljc └── test/ └── injest/ ├── classical_test.cljc ├── clj_kondo_test.sh ├── edge_cases_test.cljc ├── equivalence_test.cljc ├── macro_expansion_test.clj ├── parallelism_test.cljc ├── path_test.cljc ├── path_unit_test.cljc └── transducer_detection_test.cljc ================================================ FILE CONTENTS ================================================ ================================================ FILE: .claude/bin/clj-paren-repair-claude-hook ================================================ #!/usr/bin/env bash # clj-paren-repair-claude-hook # # Wrapper that invokes bhauman/clojure-mcp-light's hook via babashka. # Reads Claude hook JSON from stdin, repairs unbalanced delimiters and # optionally formats Clojure files with cljfmt. set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" BUNDLED_DIR="$SCRIPT_DIR/../bundled/clojure-mcp-light" # Check bb is available if ! command -v bb &>/dev/null; then # Pass through — no repair available cat exit 0 fi # Forward stdin to the real hook implementation exec bb --config "$BUNDLED_DIR/bb.edn" -m clojure-mcp-light.hook -- "$@" ================================================ FILE: .claude/hooks/session-start.sh ================================================ #!/usr/bin/env bash set -euo pipefail # Only run in ccweb (remote) environments if [ "${CLAUDE_CODE_REMOTE:-}" != "true" ]; then exit 0 fi SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" # Run the main setup script "$PROJECT_DIR/scripts/ccweb-setup.sh" # Export PATH and convenience functions into CLAUDE_ENV_FILE # so every subsequent Bash call has them if [ -n "${CLAUDE_ENV_FILE:-}" ]; then cat >> "$CLAUDE_ENV_FILE" <<'ENVEOF' export NODE_USE_ENV_PROXY=1 export PATH="$HOME/.local/bin:$PATH" # shadow-compile: bypass Pomegranate by computing classpath via clojure -Spath shadow-compile() { local build="$1" local alias="${2:-:app}" local cp cp="$(clojure -M"$alias" -Spath)" java -cp "$cp" clojure.main -m shadow.cljs.devtools.cli compile "$build" } ENVEOF fi ================================================ FILE: .claude/settings.json ================================================ { "hooks": { "SessionStart": [{ "hooks": [{ "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/session-start.sh" }] }], "PreToolUse": [{ "matcher": "Write|Edit", "hooks": [{ "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/bin/clj-paren-repair-claude-hook --cljfmt" }] }], "PostToolUse": [{ "matcher": "Write|Edit", "hooks": [{ "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/bin/clj-paren-repair-claude-hook --cljfmt" }] }] } } ================================================ FILE: .clj-kondo/config.edn ================================================ {:config-paths ["../resources/clj-kondo.exports/net.clojars.john/injest"] :output {:exclude-files ["src/injest/test.clj" "src/injest/test.cljs"]}} ================================================ FILE: .gitignore ================================================ /target /classes /checkouts *.jar *.class /.cljs_node_repl /.calva/output-window/ /.cpcache /.lein-* /.lsp/sqlite*.db /.nrepl-history /.nrepl-port /.rebel_readline_history /.socket-repl-port .hgignore .hg/ linux-install-* .clj-kondo/.cache .clj-kondo/net.clojars.john .lsp out cljs-test-runner-out ================================================ FILE: CHANGELOG.md ================================================ # Change Log All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). ## [0.1.0-beta.9] - 2026-03-31 - fix clj-kondo macroexpand hook StackOverflowError: `defn` -> `defmacro` for `+>` and `+>>` in exported hook (fixes clj-kondo/clj-kondo#2798) - move clj-kondo exports from `src/clj-kondo/` to `resources/` per standard classpath packaging convention - add missing macroexpand hooks for `|>` and `|>>` - add `"resources"` to `:paths` in deps.edn - add comprehensive test suite: 103 tests, 420 assertions covering macro expansion, path navigation, transducer detection, classical namespace, equivalence proofs, edge cases, and parallelism ## [0.1.0-beta.7] - 2022-02-12 - went to beta - added monitoring ## [0.1.0-alpha.24] - 2021-10-23 - improve linter - update docs ## [0.1.0-alpha.23] - 2021-10-22 - enable `cat` - fix cljs - update docs ## [0.1.0-alpha.22] - 2021-10-09 - add lint defs for clj-kondo - remove .devcontainer ## [0.1.0-alpha.15] - 2021-10-02 - fix bug in `=>>` with small sequences - more docks and docs/shootout.md - discussion on comparing `|>>` and `=>>` happened on clojureverse: [Fight Night](https://clojureverse.org/t/parallel-transducing-context-fight-night-pipeline-vs-fold/8208) ## [0.1.0-alpha.13] - 2021-09-25 - reverted names back to `=>>` and `|>>` - fixed bug with `=>>` fold impl, limiting smallest partition to parallelism count ## [0.1.0-alpha.12] - 2021-09-22 - Added tests - Adopted Sean Corfield's deps-new lib template ## [0.1.0-alpha.9] - 2021-09-20 ### Changed - Major code cleanup [0.1.0-alpha.9]: https://github.com/johnmn3/injest/compare/v0.1-alpha.8...v0.1-alpha.9 [0.1.0-alpha.9]: https://github.com/johnmn3/injest/compare/v0.1-alpha.9...0.1.0-alpha.13 ================================================ FILE: LICENSE ================================================ Copyright (C) 2021-2026 John Newman Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ================================================ FILE: README.md ================================================ # `injest`: `+>` `+>>` `x>>` `=>>` [![Clojars Project](https://img.shields.io/clojars/v/net.clojars.john/injest.svg)](https://clojars.org/net.clojars.john/injest) [![Cljdoc](https://cljdoc.org/badge/net.clojars.john/injest)](https://cljdoc.org/d/net.clojars.john/injest) [![project chat](https://img.shields.io/badge/zulip-join_chat-brightgreen.svg)](https://clojurians.zulipchat.com/#streams/302003/injest) Clojure's [threading macros](https://clojure.org/guides/threading_macros) (the `->` and `->>` [thrushes](http://blog.fogus.me/2010/09/28/thrush-in-clojure-redux/)) are great for navigating into data and transforming sequences. `injest`'s [_path thread_](#path-threads) macros `+>` and `+>>` are just like `->` and `->>` but with expanded path navigating abilities similar to `get-in`. [Transducers](https://clojure.org/reference/transducers) are great for performing sequence transformations efficiently. `x>>` combines the efficiency of transducers with the better ergonomics of `+>>`. Thread performance can be further extended by automatically parallelizing work with `=>>`. `injest` macros achieve this by scanning forms for transducers and `comp`ing them together into a function that either `sequence`s or parallel `fold`s the values flowing in the thread through the transducers. ## Getting Started ### deps.edn Place the following in the `:deps` map of your `deps.edn` file: ```clojure ... net.clojars.john/injest {:mvn/version "0.1.0-beta.9"} ... ``` ### Quickstart To try it in a repl right now with `criterium` and `net.cgrand.xforms`, drop this in your shell: ```clojure clj -Sdeps \ '{:deps {net.clojars.john/injest {:mvn/version "0.1.0-beta.9"} criterium/criterium {:mvn/version "0.4.6"} net.cgrand/xforms {:mvn/version "0.19.2"}}}' ``` ### Requiring Then require the `injest` macros in your project: ```clojure (ns ... (:require [injest.path :refer [+> +>> x>> =>>]] ... ``` To just use `x>>` or `=>>` with the classical thread behavior, without the additional [_path thread_](#path-threads) semantics, you can require in the `injest.classical` namespace instead of the `injest.path` namespace: ```clojure (ns ... (:require [injest.classical :refer [x>> =>>]] ... ``` Having these two `:require` options allows individuals and organizations to adopt a la carte these orthogonal value propositions of _improved performance_ and _improved navigation_. # Path Threads `injest.path` allows for more intuitive path navigation, like you're used to with the `(-> m :a :b :c)` idiom. We refer to these as _path threads_. Ergonomically, path threads provide a semantic superset of the behaviors found in `->` and `->>`. In other words, there is generally nothing you can do with `->` that you can't do with `+>`. All the thread macros in `injest.path` have these path thread semantics. ## As a replacement for `get-in`, `get` and `nth` In path threads, naked integers and strings become lookups on the value being passed in, making those tokens useful again in threads. You can index into sequences with integers, like you would with `nth`, and replace `get`/`get-in` for most cases involving access in heterogeneous map nestings: ```clojure (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c :res}]}])}] (+> m 1 2 "b" 2 :c name)) ;=> "res" ``` Here, we're looking up `1` in the map, then getting the third element of the sequence returned, then looking up `"b"` in the returned map, then getting the third element of the returned vector, then looking up `:c` in the returned map, and then finally calling name on the returned keyword value. In the above form, you could replace `+>` with either `+>>`, `x>>` or `=>>`, and you will still get the same result. `+>>` is simply the thread-last version of `+>` and `x>>` and `=>>` are transducing and parallel versions of `+>>`. ## Lambda wrapping Path threads allow you to thread values through anonymous functions, like `#(- 10 % 1)` or `(fn [x] (- 10 x 1))`, without having to wrap them in an extra enclosing set of parenthesis: ```clojure (x> 10 range rest 2 #(- 10 % 1)) ;=> 6 ``` Or, extending our prior example: ```clojure (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c :bob}]}])}] (x>> m 1 2 "b" 2 :c name #(println "hi " % "!"))) ;=> "hi bob!" ``` This has the added benefit of conveying to the reader that the author intends for the anonymous function to only take one parameter. In the classical thread syntax, the reader would have to scan all the way to the end of `(#(... ` in order to know if an extra parameter is being passed in. This also prevents people from creating unmaintainable abstractions involving the threading of values into a literal lambda definition - a [common](https://stackoverflow.com/questions/7838326/function-call-in-threading-macro) [source](https://stackoverflow.com/questions/25317235/thread-first-array-map-literal-to-anonymous-function-in-clojure) [of](https://stackoverflow.com/questions/29897115/clojure-threading-first-macro-with-math-pow-or-any-other-multiple-args-functi) [errors](https://stackoverflow.com/questions/60027298/clojure-custom-function-for-threading-macro). ## Backwards compatability `+>` and `+>>` have the same laziness semantics as `->` and `->>`. So, if you find yourself wanting to migrate a _path thread_ away from a transducer/parallel context, back to the more lazy semantics, but you want to keep the path navigation semantics, you can simply replace the `x>>` or `=>>` macros with the `+>>` macro we required in above. Path navigating will continue to work: ```clojure (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c :bob}]}])}] (+>> m 1 2 "b" 2 :c name #(println "hi " % "!"))) ;=> "hi bob!" ``` You can also just use `+>` and `+>>` on their own, without the transducifying macros, if you only want the more convenient ergonomics. As stated above, you can also require `x>>` and `=>>` in from `injest.classical` and, in the event you want to revert back to `->>`, you will be able to do that knowing that no one has added any _path thread_ semantics to the thread that would also need to be converted to the classical syntax. # `x>>` Auto Transducification Why? Well, for one, speed. Observe: ```clojure (->> (range 10000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) ;; (mapv dec) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) time) ``` Returns: ```clojure "Elapsed time: 8275.319295 msecs" 5000054999994 ``` Whereas: ```clojure (x>> (range 10000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) ;; (mapv dec) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) time) ``` Returns: ```clojure "Elapsed time: 2913.851103 msecs" 5000054999994 ``` Two to three times the speed with basically the same code. The more transducers you can get lined up contiguously, the less boxing you’ll have in your thread. > Note: These times reflect the execution environment provided by Github's browser-based vscode runtime. My local box performs much better and yours likely will too. Let’s uncomment the `(mapv dec)` that is currently commented out in both the threads above. Because `mapv` is not a transducer, items get boxed halfway through our thread. As a result our performance degrades slightly for `x>>`. First, let's see it with `->>`: ```clojure (->> (range 10000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (mapv dec) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) time) "Elapsed time: 6947.00928 msecs" 44999977000016 ``` Hmm, `->>` actually goes faster now, perhaps due to `mapv` removing some laziness. The more lazy semantics are less predictable in that way. But now, for `x>>`: ```clojure (x>> (range 10000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (mapv dec) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) time) "Elapsed time: 3706.701192 msecs" 44999977000016 ``` So we lost some speed due to the boxing, but we’re still doing a worthy bit better than the default thread macro. So keep in mind, if you want to maximize performance, try to align your transducers contiguously. > Note: In addition to improved speed, transducers also provide improved memory efficiency over finite sequences. So `x>>` may lower your memory usage as well. ### Available Transducers | These are the core functions that are available to use as transducers in a `x>>` thread-last: | | --- | | `take-nth`, `disj!`, `dissoc!`, `distinct`, `keep-indexed`, `random-sample`, `map-indexed`, `map`, `replace`, `drop`, `remove`, `cat`, `partition-all`, `interpose`, `mapcat`, `dedupe`, `drop-while`, `partition-by`, `take-while`, `take`, `keep`, `filter`, `halt-when` | ## `=>>` Auto Parallelization `injest` provides a parallel version of `x>>` as well. `=>>` leverages Clojure's parallel [`fold`](https://clojuredocs.org/clojure.core.reducers/fold) [reducer](https://clojure.org/reference/reducers#_using_reducers) in order to execute stateless transducers over a [Fork/Join pool](http://gee.cs.oswego.edu/dl/papers/fj.pdf). Remaining stateful transducers are `comp`ed and threaded just like `x>>`. It doesn't work well for small workloads though, so for demonstration purposes let's augment our above threads: ```clojure (require '[clojure.edn :as edn]) (defn work-1000 [work-fn] (range (last (repeatedly 1000 work-fn)))) (defn ->>work [input] (work-1000 (fn [] (->> input (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) str (take 3) (apply str) edn/read-string)))) (defn x>>work [input] (work-1000 (fn [] (x>> input (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) str (take 3) (apply str) edn/read-string)))) ``` Same deal as before but we're just doing a little extra work in our thread, repeating it a thousand times and then preparing the results for handoff to the next stage of execution. Now let's run the classical `->>` macro: ```clojure (->> (range 100) (repeat 10) (map ->>work) (map ->>work) (map ->>work) (map ->>work) (map ->>work) (map ->>work) last count time) ; "Elapsed time: 18309.397391 msecs" ;=> 234 ``` Just over 18 seconds. Now let's try the `x>>` version: ```clojure (x>> (range 100) (repeat 10) (map x>>work) (map x>>work) (map x>>work) (map x>>work) (map x>>work) (map x>>work) last count time) ; "Elapsed time: 6252.224178 msecs" ;=> 234 ``` Just over 6 seconds. Much better. Now let's try the parallel `=>>` version: ```clojure (=>> (range 100) (repeat 10) (map x>>work) (map x>>work) (map x>>work) (map x>>work) (map x>>work) (map x>>work) last count time) ; "Elapsed time: 3142.75057 msecs" ;=> 234 ``` Just over 3 seconds. Much, much better! Again, in local dev your times may look a bit different. On my Macbook Pro here, those times are `11812.604504`, `5096.267348` and `933.940569` msecs. So, in other words, `=>>` can sometimes be 5 times faster than `x>>` and 10 times faster than `->>`, depending on the shape of your workloads and the number of cores you have available. > There is also a parallel thread macro (`|>>`) that uses `core.async/pipeline` for parallelization. It's still available for folks interested in improving it, but is not recomended for general use. `=>>` performs better in most cases. A soon-to-be-updated analysis ([shootout.md](https://github.com/johnmn3/injest/blob/main/docs/shootout.md)) compares the differences between `|>>` and `=>>`. ### Available Parallel Transducers | These are the core functions that are available to use as parallel transducers in a `=>>` thread-last: | | --- | | `dedupe`, `disj!`, `dissoc!`, `filter`, `keep`, `map`, `random-sample`, `remove`, `replace`, `take-while`, `halt-when`, `mapcat`, `cat` | ## Clojurescript ~In Clojurescript we don't yet have parallel thread macro implementations but for `x>>`~ > Update: The parallel (`=>>`) thread macro has been implemented in [`cljs-thread`](https://github.com/johnmn3/cljs-thread?tab=readme-ov-file). We'll get into the Clojurescript version of `=>>` below, but first let's look at the single threaded `x>>`. The performance gains for `x>>` are even more pronounced than in Clojure. On my macbook pro, with an initial value of `(range 1000000)` in the above thread from our first example, the default threading macro `->>` produces: ```clojure (->> (range 1000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) time) "Elapsed time: 3523.186678 msecs" 50005499994 ``` While the `x>>` version produces: ```clojure (x>> (range 1000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) time) "Elapsed time: 574.145888 msecs" 50005499994 ``` That's a _six times_ speedup! Perhaps that speedup would not be so large if we tested both versions in _advanced_ compile mode. Then the difference in speed might come closer to the Clojure version. In any case, this is some very low-hanging performance fruit. ### `=>>` in Clojurescript So, suppose you have some non-trivial work: ```clojure (defn flip [n] (apply comp (take n (cycle [inc dec])))) ``` On a single thread, in Chrome, this takes between 16 and 20 seconds (on this computer): ```clojure (->> (range) (map (flip 100)) (map (flip 100)) (map (flip 100)) (take 1000000) (apply +) time) ``` On Safari and Firefox, that will take between 60 and 70 seconds. Let's try it with `=>>`: ```clojure (=>> (range) (map (flip 100)) (map (flip 100)) (map (flip 100)) (take 1000000) (apply +) time) ``` On Chrome, that'll take only about 8 to 10 seconds. On Safari it takes about 30 seconds and in Firefox it takes around 20 seconds. So in Chrome and Safari, you can roughly double your speed and in Firefox you can go three or more times faster. By changing only one character, we can double or triple our performance, all while leaving the main thread free to render at 60 frames per second. Notice also how it's lazy :) See the [`cljs-thread`](https://github.com/johnmn3/cljs-thread) repo to learn more about how to set things up with the web workers. > Note: On the main/screen thread, `=>>` returns a promise. `=>>` defaults to a chunk size of 512. ## Extending `injest` The `injest.state` namespaces provides the `reg-xf!` and `reg-pxf!` macros that can take one or more transducers. Only stateless transducers (or, more precisely, transducers that can be used safely within a parallel `fold` or `pipeline` context) should be registered with `reg-pxf!`. `injest`'s thread macros will then include those functions when deciding which forms should be treated as transducers. You should only need to call `reg-xf!` in one of your initially loaded namesapces. ```clojure (require '[injest.state :as i.s]) (require '[net.cgrand.xforms :as x]) (i.s/reg-xf! x/reduce) (x>> (range 10000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (x/reduce +) first time) ``` Even better! ```clojure "Elapsed time: 2889.771067 msecs" 5000054999994 ``` In Clojurescript, you can add another Clojure (`*.clj`) namespace to your project and register there with the `regxf!` function and explicitly namespaced symbols. ```clojure (i.s/regxf! 'my.cljs.xforms.library/sliding-window) ``` Or, if a transducer library like `net.cgrand.xforms` exports the same namespaces and names for both Clojure and Clojurescript, you can just `(i.s/reg-xf! x/reduce)` in a Clojure namespace in your project and then it will be available to the `x>>`/`=>>` threads in both your Clojure and Clojurescript namespaces. ## Reporting Instrumentation You can optionally instrument the `x>>` and `=>>` macros for profiling code in a deployed runtime environment like so: ```clojure (ns ... (:require [injest.report :as r] [injest.report.path :as injest :refer [+> +>> x>> =>>]])) ``` Then in some core namespace, just register a report handler and then turn it on: ```clojure (r/add-report-tap! println 60) ;; <- or tap>, log/info, etc (r/report! true) ``` If you don't provide `add-report-tap!` a second seconds parameter it will default to 10 seconds. The above expressions will handle reporting events with the `println` function, called once every 60 seconds. Then, in any namespace, be sure to require the macros from the `injest.report.path` namespace: ```clojure (ns ... (:require [injest.report.path :as injest :refer [+> +>> x>> =>>]])) ``` Then you can use `x>>` and `=>>` like you normally would, but you will see a report on all instances in the repl: ```clojure {:namespace "injest.test" :line 15 :column 5 :x>> "x>> is 1.08 times faster than =>>" :=>> "=>> is 2.67 times faster than +>>"} {:namespace "injest.test" :line 38 :column 3 :+>> "+>> is 2.5 times faster than x>>"} {:namespace "injest.test" :line 44 :column 5 :=>> "=>> is 1.9 times faster than x>>" :x>> "x>> is 1.4 times faster than +>>"} ``` As you can see, the first line of a given report result is the namespace, along with `?line=` and the line number and `&col=` and the column number. For the `x>>` variant, only `x>>` and `+>>` are compared. When `=>>` is used, all three of `=>>`, `x>>` and `+>>` are compared. This allows you to leverage the instrumented versions of the macros in order to assess which one is most appropriate for the runtime load in your actually running application. # Caveats It should be noted as well: * Because transducers have different laziness semantics, you can't be as liberal with your consumption, so test on live data before using this as a drop-in replacement for the default thread macros. If you have any problems, feature requests or ideas, feel free to drop a note in the issues or discuss it in the clojureverse [thread](https://clojureverse.org/t/x-x-auto-transducifying-thread-macros/8122/9). # References Some other perfomance-related investigations you may be interested in: * [cgrand/xforms](https://github.com/cgrand/xforms) - More transducers and reducing functions for Clojure(script)! * [clj-fast](https://github.com/bsless/clj-fast) - optimized core functions * [structural](https://github.com/joinr/structural) - efficient destructuring Inspiration for the lambda wrapping came from this ask.clojure request: [should-the-threading-macros-handle-lambdas](https://ask.clojure.org/index.php/9023/should-the-threading-macros-handle-lambdas) Inspiration for the `fold` implementation of `=>>` came from [reborg/parallel](https://github.com/reborg/parallel#ptransduce)'s `p/transduce` # Get Involved Want to implement the `somex>>` macro? Just copy how I did it and feel free to submit a PR. If you see a difficiency, file an issue here or swing by and join the discussion on the [zulip channel](https://clojurians.zulipchat.com/#streams/302003/injest). # License Distributed under the [Zero-Clause BSD License (0BSD)](https://opensource.org/licenses/0BSD). ================================================ FILE: build.clj ================================================ (ns build (:refer-clojure :exclude [test]) (:require [org.corfield.build :as bb])) (def lib 'net.clojars.john/injest) (def version "0.1.0-beta.9") ;; clojure -T:build ci ;; clojure -T:build deploy (def url "https://github.com/johnmn3/injest") (def scm {:url url :connection "scm:git:git://github.com/johnmn3/injest.git" :developerConnection "scm:git:ssh://git@github.com/johnmn3/injest.git" :tag version}) (def pom-data [[:licenses [:license [:name "Zero-Clause BSD"] [:url "https://opensource.org/licenses/0BSD"]]]]) (defn test "Run the tests." [opts] (bb/run-tests opts)) (defn ci "Run the CI pipeline of tests (and build the JAR)." [opts] (-> opts (assoc :lib lib :version version :scm scm :pom-data pom-data) (bb/run-tests) (bb/clean) (bb/jar))) (defn deploy "Deploy the JAR to Clojars." [opts] (-> opts (assoc :lib lib :version version) (bb/deploy))) ================================================ FILE: deps.edn ================================================ {:paths ["src" "resources"] :deps {org.clojure/clojure {:mvn/version "1.12.4"} org.clojure/clojurescript {:mvn/version "1.12.134"} org.clojure/core.async {:mvn/version "1.8.741"}} :aliases {:test {:extra-paths ["test"] :extra-deps {org.clojure/test.check {:mvn/version "1.1.1"} io.github.cognitect-labs/test-runner {:git/tag "v0.5.1" :git/sha "5e91ee0"}}} :cljs-test {:extra-paths ["test"] :extra-deps {olical/cljs-test-runner {:mvn/version "3.8.1"}} :main-opts ["-m" "cljs-test-runner.main" "-x" "node"]} :build {:deps {io.github.seancorfield/build-clj {:git/tag "v0.9.2" :git/sha "9c9f078"} io.github.clojure/tools.build {:git/tag "v0.10.13" :git/sha "ae52edfe"} org.clojure/tools.analyzer {:mvn/version "1.2.2"}} :ns-default build}}} ================================================ FILE: docs/shootout.md ================================================ ## Parallel Transducing Context Shootout: `|>>` vs `=>>` Welcome to the parallel transducer context shootout! Here you'll find comparative benchmarks between `|>>` _('pipeline-thread-last')_ and `=>>` _('fold-thread-last')_. You can learn more about these `injest` macros in the [readme](https://github.com/johnmn3/injest/blob/main/README.md). In this comparative analysis, we explore a few different scenarios on both a 4 core machine and a 16 core machine. First, let's define some testing functions: ```clojure (require '[clojure.edn :as edn]) (defn work-1000 [work-fn] (range (last (repeatedly 1000 work-fn)))) (defn x>>work [input] (work-1000 (fn [] (x>> input (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) str (take 3) (apply str) edn/read-string)))) ;; and one extra macro for returning a value for the number of seconds passed: (defmacro time-val [& body] `(x>> (time ~@body) with-out-str (drop 15) reverse (drop 8) reverse (apply str) edn/read-string (* 0.001))) ``` You may recognize those test functions from the [readme](https://github.com/johnmn3/injest/blob/main/README.md). Now let's exercise them: ```clojure (dotimes [i 50] (println (=>> (range 100) (repeat i) (map x>>work) (map x>>work) (map x>>work) (map x>>work) (map x>>work) (map x>>work) time-val))) ;; and (dotimes [i 50] (println (|>> (range 100) (repeat i) (map x>>work) (map x>>work) (map x>>work) (map x>>work) (map x>>work) (map x>>work) time-val))) ``` With 4 cores: Screen Shot 1 With 16 cores: Screen Shot 2 In the above example, all we're doing is increasing sequence size while keeping the workload the same, so `|>>` and `=>>` are tracking pretty closely to one another. If we want to measure different workloads, we'll need to get a little fancier with our testing functions. ```clojure (defn work [n] (time-val (->> (range n) (mapv (fn [_] (x>> (range n) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +))))))) (defn run-|>> [l w] (|>> (range l) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))))) (defn run-=>> [l w] (=>> (range l) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))))) ``` We start with a `work` function that becomes increasingly more expensive as `n` rises. We then define run functions `run-|>>` and `run-=>>` that take a sequence length `l` and a work width `w`. Each run function exercises the work function 16 times. This way, we can get a sense of how sequence size vs workload size affects performance characteristics. Let's look at a "medium" sized work load: ```clojure (dotimes [n 10] (println (time-val (last (run-|>> 100 (* n 100)))))) ;; and (dotimes [n 10] (println (time-val (last (run-=>> 100 (* n 100)))))) ``` Here, we're saying `100 (* n 100)` is a sequence 100 elements long, where `n` increases by 100 on each step. Let's see how they compare. On 4 cores: Screen Shot 3 On 16 cores: Screen Shot 4 In this example, the `|>>` pipeline thread does a little better in the high core count scenario. In the low core count scenario, they're almost identical. Now let's try a small, constant size workload with an increasingly larger sequence: ```clojure (dotimes [n 10] (println (time-val (last (run-|>> (* n 1000) 10))))) ;; and (dotimes [n 10] (println (time-val (last (run-=>> (* n 1000) 10))))) ``` On 4 cores: Screen Shot 5 On 16 cores: Screen Shot 6 Much to my surprise, `|>>` won out with this particular workload on both 4 and 16 cores. How far can we take that? Let's try it with a really big sequence and a really small workload: ```clojure (dotimes [n 10] (println (time-val (last (run-|>> (* n 10000) 1))))) ;; and (dotimes [n 10] (println (time-val (last (run-=>> (* n 10000) 1))))) ``` On 4 cores: Screen Shot 7 On 16 cores: Screen Shot 8 On both core counts, `=>>` wins out slightly. Here, we can see that `|>>` starts to fall behind when threads are not optimized for heavy workloads. What about the opposite scenario? Let's try a small, constant size sequence with an increasingly, extremely large workload per item: ```clojure (dotimes [n 4] (println (time-val (last (run-|>> 10 (* n 1000)))))) ;; and (dotimes [n 4] (println (time-val (last (run-=>> 10 (* n 1000)))))) ``` We're only doing 4 runs here because the results take a while. On 4 cores: Screen Shot 9 On 16 cores: Screen Shot 10 As you can see, this is where `|>>` really shines: With super heavy work and a very high core count, `pipeline` starts to show significant efficiencies. Given these characteristics, one might ask, _"Why not always use `|>>` then?"_ Unfortunately, `|>>` falls over with extremely large sequences with small, heterogeneous workloads. `injest` is designed to allow users to mix and match threads with transformation functions that are fully lazy, transducable and/or parallelizable. Under the hood, this sometimes involves passing some results to a `sequence` operation, then to a `pipeline` operation, then to a lazy `(apply foo)` operation, etc. I believe that in these heterogeneous workload scenarios, the thread communications for `|>>` is causing a traffic jam. Still under investigation though. For example, let's look at this test scenario: ```clojure (dotimes [n 10] (|>> (range (* n 100000)) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) time-val println)) ;; and (dotimes [n 10] (=>> (range (* n 100000)) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) time-val println)) ``` On 4 cores: > todo On 16 cores: Screen Shot 12 And that issue only compounds as the sequence size rises. So, let's be honest: at least half of the sequence transformation threads that we usually build with `->>` in Clojure are _not_ homogenous, heavily loaded threads. So, if a given thread is only _just starting_ to seem like it could benefit from parallelization, then it's a good chance that `|>>` will be a footgun for you, while `=>>` may pay dividends - so in general I recommend reaching for `=>>` first. However, once your threads' workloads starts to become _embarrasingly parallel_, then it makes sense to try out `|>>`, to see if it can get you even farther - especially with more available cores. I know, you're wondering, what do these tests look like against the single threaded transducing `x>>` and classical, lazy `->>` macros? Let's add a test case for that: ```clojure (defn lazy-work [n] (time-val (->> (range n) (mapv (fn [_] (->> (range n) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +))))))) (defn run-x>> [l w] (x>> (range l) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))) (map (fn [_] (work w))))) (defn run-->> [l w] (->> (range l) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))) (map (fn [_] (lazy-work w))))) ``` Now, looking at our "medium" sized work load above: ```clojure (dotimes [n 10] (println (time-val (last (run-x>> 100 (* n 100)))))) ;; and (dotimes [n 10] (println (time-val (last (run-->> 100 (* n 100)))))) ``` And adding those to our times, we get: On 4 cores: Screen Shot 13 On 16 cores: Screen Shot 14 As you can see, it would have taken a _very_ long time for the lazy version to ever finish all ten iterations. Let's see it with the small sequence, large work version: ```clojure (dotimes [n 4] (println (time-val (last (run-x>> 10 (* n 1000)))))) ;; and (dotimes [n 4] (println (time-val (last (run-->> 10 (* n 1000)))))) ``` On 4 cores: Screen Shot 15 On 16 cores: Screen Shot 16 Aha!, We've discovered that `=>>` is breaking for _very_ small sequences (here 10). But only when it is shorter than the number of cores - in this case, in the 16 core version being greater than the number of items available in the sequence. We'll see if we can optimize this in our parallelism strategy. Let's see how these comparisons fair in the very large sequence cases: > todo Now let's see the case of an extremely large sequence with heterogeneous data and work: On 4 cores: Screen Shot 17 On 16 cores: Screen Shot 18 Here we can see that, with this kind of workload, the best we can do is try to keep up with the single threaded transducer version - which the `=>>` version does a pretty good job of. ================================================ FILE: resources/clj-kondo.exports/net.clojars.john/injest/config.edn ================================================ {:lint-as {injest.core/x> clojure.core/-> injest.core/x>> clojure.core/->> injest.core/=> clojure.core/-> injest.core/=>> clojure.core/->> injest.core/|> clojure.core/-> injest.core/|>> clojure.core/->> injest.path/+> clojure.core/-> injest.path/+>> clojure.core/->> injest.path/x> clojure.core/-> injest.path/x>> clojure.core/->> injest.path/=> clojure.core/-> injest.path/=>> clojure.core/->> injest.path/|> clojure.core/-> injest.path/|>> clojure.core/->> injest.classical/x> clojure.core/-> injest.classical/x>> clojure.core/->> injest.classical/=> clojure.core/-> injest.classical/=>> clojure.core/->> injest.classical/|> clojure.core/-> injest.classical/|>> clojure.core/->> injest.report.path/+> clojure.core/-> injest.report.path/+>> clojure.core/->> injest.report.path/x>> clojure.core/->> injest.report.path/=>> clojure.core/->>} :hooks {:macroexpand {injest.path/+> injest.path/+> injest.path/+>> injest.path/+>> injest.path/x> injest.path/+> injest.path/x>> injest.path/+>> injest.path/=> injest.path/+> injest.path/=>> injest.path/+>> injest.path/|> injest.path/+> injest.path/|>> injest.path/+>>}} :linters {:unused-binding {:level :off} :type-mismatch {:level :off}}} ================================================ FILE: resources/clj-kondo.exports/net.clojars.john/injest/injest/path.clj ================================================ (ns injest.path) (def protected-fns #{`fn 'fn 'fn* 'partial}) (defn path-> [form x] (cond (and (seq? form) (not (protected-fns (first form)))) (with-meta `(~(first form) ~x ~@(next form)) (meta form)) (or (string? form) (nil? form) (boolean? form)) (list 'clojure.core/get x form) (int? form) (list 'clojure.core/get x form) :else (list form x))) (defn path->> [form x] (cond (and (seq? form) (not (protected-fns (first form)))) (with-meta `(~(first form) ~@(next form) ~x) (meta form)) (or (string? form) (nil? form) (boolean? form)) (list 'clojure.core/get x form) (int? form) (list 'clojure.core/get x form) :else (list form x))) (defmacro +> [x & forms] (loop [x x, forms forms] (if forms (recur (path-> (first forms) x) (next forms)) x))) (defmacro +>> [x & forms] (loop [x x, forms forms] (if forms (recur (path->> (first forms) x) (next forms)) x))) ================================================ FILE: scripts/ccweb-setup.sh ================================================ #!/usr/bin/env bash set -euo pipefail # ccweb-setup.sh — Heavy lifter for ccweb + Clojure proxy support # # Parses $HTTPS_PROXY to generate ~/.m2/settings.xml, installs tools, # downloads deps, and prepares the environment for Clojure development # behind the ccweb egress proxy. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" ############################################################################### # 1. Generate ~/.m2/settings.xml from $HTTPS_PROXY ############################################################################### generate_maven_settings() { if [ -z "${HTTPS_PROXY:-}" ]; then echo "[ccweb-setup] No HTTPS_PROXY set, skipping Maven settings generation" return 0 fi # Parse proxy URL: http://user:pass@host:port local proxy_url="$HTTPS_PROXY" # Strip protocol local no_proto="${proxy_url#http://}" no_proto="${no_proto#https://}" local userinfo="" local hostport="" if [[ "$no_proto" == *"@"* ]]; then userinfo="${no_proto%%@*}" hostport="${no_proto##*@}" else hostport="$no_proto" fi local host="${hostport%%:*}" local port="${hostport##*:}" # Strip trailing slash from port port="${port%%/*}" local username="" local password="" if [ -n "$userinfo" ]; then username="${userinfo%%:*}" password="${userinfo#*:}" fi mkdir -p ~/.m2 cat > ~/.m2/settings.xml < httpsProxy true https ${host} ${port} ${username} ${password} httpProxy true http ${host} ${port} ${username} ${password} XMLEOF echo "[ccweb-setup] Generated ~/.m2/settings.xml (proxy: ${host}:${port})" } ############################################################################### # 2. Install Node.js (if not present) ############################################################################### install_node() { if command -v node &>/dev/null; then echo "[ccweb-setup] Node.js already installed: $(node --version)" return 0 fi echo "[ccweb-setup] Installing Node.js 24..." curl -fsSL https://deb.nodesource.com/setup_24.x | bash - apt-get install -y nodejs export NODE_USE_ENV_PROXY=1 echo "[ccweb-setup] Node.js installed: $(node --version)" } ############################################################################### # 3. Install Babashka (for paren repair hook) ############################################################################### install_babashka() { if command -v bb &>/dev/null; then echo "[ccweb-setup] Babashka already installed: $(bb --version)" return 0 fi echo "[ccweb-setup] Installing Babashka..." curl -fsSL https://raw.githubusercontent.com/babashka/babashka/master/install | bash echo "[ccweb-setup] Babashka installed: $(bb --version)" } ############################################################################### # 4. Install Clojure CLI (if not present) ############################################################################### install_clojure() { if command -v clojure &>/dev/null; then echo "[ccweb-setup] Clojure CLI already installed: $(clojure --version 2>&1 | head -1)" return 0 fi echo "[ccweb-setup] Installing Clojure CLI..." curl -fsSL https://download.clojure.org/install/linux-install-1.12.0.1530.sh | bash echo "[ccweb-setup] Clojure CLI installed" } ############################################################################### # 5. Download deps for each alias ############################################################################### download_deps() { echo "[ccweb-setup] Downloading project dependencies..." cd "$PROJECT_DIR" # Main deps clojure -P 2>/dev/null || true # Test deps clojure -P -M:test 2>/dev/null || true # CLJS test deps clojure -P -M:cljs-test 2>/dev/null || true # Build deps clojure -P -M:build 2>/dev/null || true echo "[ccweb-setup] Dependencies downloaded" } ############################################################################### # 6. npm install (if package.json exists) ############################################################################### npm_install() { if [ -f "$PROJECT_DIR/package.json" ]; then echo "[ccweb-setup] Running npm install..." cd "$PROJECT_DIR" export NODE_USE_ENV_PROXY=1 npm install echo "[ccweb-setup] npm install complete" fi } ############################################################################### # 7. Generate classpath.edn for shadow-cljs (if shadow-cljs.edn exists) ############################################################################### generate_classpath() { if [ -f "$PROJECT_DIR/shadow-cljs.edn" ]; then echo "[ccweb-setup] Generating classpath.edn for shadow-cljs..." cd "$PROJECT_DIR" local cp cp="$(clojure -Spath 2>/dev/null)" || true if [ -n "$cp" ]; then echo "{:classpath \"$cp\"}" > "$PROJECT_DIR/classpath.edn" echo "[ccweb-setup] classpath.edn generated" fi fi } ############################################################################### # Main ############################################################################### echo "[ccweb-setup] Starting ccweb + Clojure setup..." generate_maven_settings install_node install_babashka install_clojure download_deps npm_install generate_classpath echo "[ccweb-setup] Setup complete!" ================================================ FILE: src/injest/classical.cljc ================================================ (ns injest.classical (:require [injest.impl :as i]) #?(:cljs (:require-macros [injest.classical]))) (defmacro x> "Just like -> but first composes transducers into a function that sequences the thread values through the transducers." [x & thread] `(-> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/xfn i/transducable?)))) (defmacro x>> "Just like ->> but first composes transducers into a function that sequences the thread values through the transducers." [x & thread] `(->> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/xfn i/transducable?)))) #?(:cljs (defmacro |> "Just like x>, for now" [& args] `(x> ~@args)) :clj (defmacro |> "Just like x> but first composes stateless transducers into a function that pipelines in parallel the values flowing through the thread. Remaining stateful transducers are composed just like x>." [x & thread] `(x> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/pipeline-xfn i/par-transducable?))))) #?(:cljs (defmacro |>> "Just like x>>, for now" [& args] `(x>> ~@args)) :clj (defmacro |>> "Just like x>> but first composes stateless transducers into a function that pipelines in parallel the values flowing through the thread. Remaining stateful transducers are composed just like x>>." [x & thread] `(x>> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/pipeline-xfn i/par-transducable?))))) #?(:cljs (defmacro => "Just like x>, for now" [& args] `(x> ~@args)) :clj (defmacro => "Just like x> but first composes stateless transducers into a function that `r/fold`s in parallel the values flowing through the thread. Remaining stateful transducers are composed just like x>." [x & thread] `(x> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/fold-xfn i/par-transducable?))))) #?(:cljs (defmacro =>> "Just like x>>, for now" [& args] `(x>> ~@args)) :clj (defmacro =>> "Just like x>> but first composes stateless transducers into a function that `r/fold`s in parallel the values flowing through the thread. Remaining stateful transducers are composed just like x>>." [x & thread] `(x>> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/fold-xfn i/par-transducable?))))) ================================================ FILE: src/injest/data.cljc ================================================ (ns injest.data) (def par-regs #{'cljs.core/dedupe 'cljs.core/disj! 'cljs.core/dissoc! 'cljs.core/filter 'cljs.core/keep 'cljs.core/map 'cljs.core/random-sample 'cljs.core/remove 'cljs.core/replace 'cljs.core/take-while 'cljs.core/halt-when 'cljs.core/mapcat 'cljs.core/cat 'clojure.core/dedupe 'clojure.core/disj! 'clojure.core/dissoc! 'clojure.core/filter 'clojure.core/keep 'clojure.core/map 'clojure.core/random-sample 'clojure.core/remove 'clojure.core/replace 'clojure.core/take-while 'clojure.core/halt-when 'clojure.core/mapcat 'clojure.core/cat}) (def def-regs #{'cljs.core/mapcat 'cljs.core/disj! 'cljs.core/dissoc! 'cljs.core/keep 'cljs.core/filter 'cljs.core/take-while 'cljs.core/drop-while 'cljs.core/keep-indexed 'cljs.core/take 'cljs.core/partition-all 'cljs.core/distinct 'cljs.core/dedupe 'cljs.core/take-nth 'cljs.core/map 'cljs.core/partition-by 'cljs.core/remove 'cljs.core/cat 'cljs.core/replace 'cljs.core/random-sample 'cljs.core/interpose 'cljs.core/map-indexed 'cljs.core/drop 'cljs.core/halt-when 'clojure.core/take-nth 'clojure.core/disj! 'clojure.core/dissoc! 'clojure.core/distinct 'clojure.core/keep-indexed 'clojure.core/random-sample 'clojure.core/map-indexed 'clojure.core/map 'clojure.core/replace 'clojure.core/drop 'clojure.core/remove 'clojure.core/cat 'clojure.core/partition-all 'clojure.core/interpose 'clojure.core/mapcat 'clojure.core/dedupe 'clojure.core/drop-while 'clojure.core/partition-by 'clojure.core/take-while 'clojure.core/take 'clojure.core/keep 'clojure.core/filter 'clojure.core/halt-when}) ================================================ FILE: src/injest/impl.cljc ================================================ (ns injest.impl (:require #?(:clj [clojure.core.async :as a :refer [chan to-chan! pipeline > form first (contains? @s/transducables))))) (defn par-transducable? [form] (or (= form cat) (when (sequential? form) (->> form first (contains? @s/par-transducables))))) (defn compose-transducer-group [xfs] (->> xfs (map #(if-not (coll? %) % (if (= 1 (count %)) (first %) (apply (first %) (rest %))))) (apply comp))) (defn xfn [xf-group] (let [ts (compose-transducer-group xf-group)] (fn [args] (sequence ts args)))) #?(:cljs (def fold-xfn xfn) :clj (defn fold-xfn [xf-group] (let [ts (compose-transducer-group xf-group)] (fn [args] (r/fold 512 (r/monoid into conj) (ts conj) (vec args)))))) #?(:cljs (def pipeline-xfn xfn) :clj (defn pipeline-xfn [xf-group] (let [p (+ 2 (.. Runtime getRuntime availableProcessors)) ts (compose-transducer-group xf-group)] (fn [args] (let [results (chan)] (pipeline p results ts (to-chan! args)) (> thread (u/qualify-thread env) (partition-by #(t-pred %)) (mapv #(if-not (and (t-pred (first %)) (not (< (count %) minimum-group-size))) % (list (list `(~t-fn ~(mapv vec %)))))) (apply concat))) (defn get-or-nth [m-or-v aval] (if (associative? m-or-v) (get m-or-v aval) (nth m-or-v aval))) (comment (get-or-nth {0 :a 2 :b} 2) ;=> :b (get-or-nth [:a :b :c] 2) ;=> :c (get-or-nth `(x y z) 2) ;=> injest.path/z (get-or-nth {0 :a nil 2} nil) ;=> 2 (get-or-nth {0 :a false 2} false) ;=> 2 :end) (def protected-fns #{`fn 'fn 'fn* 'partial}) (defn path-> [form x] (cond (and (seq? form) (not (protected-fns (first form)))) (with-meta `(~(first form) ~x ~@(next form)) (meta form)) (or (string? form) (nil? form) (boolean? form)) (list x form) (int? form) (list `get-or-nth x form) :else (list form x))) (defn path->> [form x] (cond (and (seq? form) (not (protected-fns (first form)))) (with-meta `(~(first form) ~@(next form) ~x) (meta form)) (or (string? form) (nil? form) (boolean? form)) (list x form) (int? form) (list `get-or-nth x form) :else (list form x))) ================================================ FILE: src/injest/path.cljc ================================================ (ns injest.path (:require [injest.impl :as i]) #?(:cljs (:require-macros [injest.path]))) ;; non-transducer versions, with path navigation, for untransducifying a transducified path thread (defmacro +> "Just like -> but for ints will index into vectors and sequences and `get` into maps, whereas for strings, booleans and nils, will be passed to the thread-value as a lookup param. Also wraps lambdas. As in: (let [m {1 {\"b\" [0 1 {:c :res}]}}] (+> m 1 \"b\" 2 :c name #(str \"hi\" % \"!\"))) ;=> \"hi res!\"" [x & forms] (loop [x x, forms forms] (if forms (recur (i/path-> (first forms) x) (next forms)) x))) (defmacro +>> "Just like ->> but for ints will index into vectors and sequences and `get` into maps, whereas for strings, booleans and nils, will be passed to the thread-value as a lookup param. Also wraps lambdas. As in: (let [m {1 {\"b\" [0 1 {:c :res}]}}] (+>> m 1 \"b\" 2 :c name #(str \"hi\" % \"!\"))) ;=> \"hi res!\"" [x & forms] (loop [x x, forms forms] (if forms (recur (i/path->> (first forms) x) (next forms)) x))) ;; transducer version (defmacro x> "Just like +> but first composes transducers into a function that sequences the thread values through the transducers." [x & thread] `(+> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/xfn i/transducable?)))) (defmacro x>> "Just like +>> but first composes transducers into a function that sequences the thread values through the transducers." [x & thread] `(+>> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/xfn i/transducable?)))) ;; parallel transducer version #?(:cljs (defmacro |> "Just like x>, for now" [& args] `(x> ~@args)) :clj (defmacro |> "Just like x> but first composes stateless transducers functions into a function that pipelines in parallel the thread values flowing through the thread. Remaining stateful transducers are composed just like x>." [x & thread] `(x> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/pipeline-xfn i/par-transducable?))))) #?(:cljs (defmacro |>> "Just like x>>, for now" [& args] `(x>> ~@args)) :clj (defmacro |>> "Just like x>> but first composes stateless transducers functions into a function that pipelines in parallel the thread values flowing through the thread. Remaining stateful transducers are composed just like x>>." [x & thread] `(x>> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/pipeline-xfn i/par-transducable?))))) #?(:cljs (defmacro => "Just like x>, for now" [& args] `(x>> ~@args)) :clj (defmacro => "Just like x> but first composes stateless transducers into a function that `r/fold`s in parallel the values flowing through the thread. Remaining stateful transducers are composed just like x>>." [x & thread] `(x> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/fold-xfn i/par-transducable?))))) #?(:cljs (defmacro =>> "Just like x>>, for now" [& args] `(x>> ~@args)) :clj (defmacro =>> "Just like x>> but first composes stateless transducers into a function that `r/fold`s in parallel the values flowing through the thread. Remaining stateful transducers are composed just like x>>." [x & thread] `(x>> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/fold-xfn i/par-transducable?))))) ================================================ FILE: src/injest/report/path.cljc ================================================ (ns injest.report.path (:require [injest.path :as p] [injest.report :as r]) #?(:cljs (:require-macros [injest.report.path]))) ;; non-transducer versions, with path navigation, for untransducifying a transducified path thread (defmacro +> "Just like ->> but for ints will index into vectors and sequences and `get` into maps, whereas for strings, booleans and nils, will be passed to the thread-value as a lookup param. Also wraps lambdas. As in: (let [m {1 {\"b\" [0 1 {:c :res}]}}] (+> m 1 \"b\" 2 :c name #(str \"hi\" % \"!\"))) ;=> \"hi res!\"" [x & forms] `(p/+> ~x ~@forms)) (defmacro +>> "Just like ->> but for ints will index into vectors and sequences and `get` into maps, whereas for strings, booleans and nils, will be passed to the thread-value as a lookup param. Also wraps lambdas. As in: (let [m {1 {\"b\" [0 1 {:c :res}]}}] (+>> m 1 \"b\" 2 :c name #(str \"hi\" % \"!\"))) ;=> \"hi res!\"" [x & forms] `(p/+>> ~x ~@forms)) (defmacro get-namespace [] (str *ns*)) ;; transducer version (defmacro x>> "Just like +>> but first composes transducers into a function that sequences the thread values through the transducers." [x & thread] `(if-not @r/report-live? (injest.path/x>> ~x ~@thread) (let [a?# (= 0 (rand-int 2)) ans# (get-namespace) k# (r/flc ans# ~(meta &form))] (if a?# (r/monitor k# injest.path/x>> ~(concat [x] thread)) (r/monitor k# injest.path/+>> ~(concat [x] thread)))))) ;; parallel transducer version #?(:cljs (defmacro =>> "Just like x>>, for now" [& args] `(x>> ~@args)) :clj (defmacro =>> "Just like x>> but first composes stateless transducers into a function that `r/fold`s in parallel the values flowing through the thread. Remaining stateful transducers are composed just like x>>." [x & thread] `(if-not @r/report-live? (injest.path/x>> ~x ~@thread) (let [n# (rand-int 3) ans# (get-namespace) k# (r/flc ans# ~(meta &form))] (case n# 0 (r/monitor k# injest.path/=>> ~(concat [x] thread)) 1 (r/monitor k# injest.path/x>> ~(concat [x] thread)) 2 (r/monitor k# injest.path/+>> ~(concat [x] thread))))))) ================================================ FILE: src/injest/report.cljc ================================================ (ns injest.report #?(:cljs (:require-macros [injest.report]))) (def mon (atom {})) (def report-live? (atom false)) (def report-taps (atom {})) (defn flc [f form-meta] (let [{:as m :keys [line column]} form-meta] (str "{:namespace \"" f "\"\n :line " line "\n :column " column))) (defn now [] #?(:clj (.toEpochMilli (java.time.Instant/now)) :cljs (.now js/Date))) (defmacro tv [& body] `(let [t1# (now) res# ~@body t2# (now)] {:res res# :time (- t2# t1#)})) (defn add-time [times new-time] (let [the-times (take 99 (or times '()))] (vec (conj the-times new-time)))) (defmacro monitor [k applicator body] `(let [res# (tv (~applicator ~@body)) t# (:time res#) result# (:res res#)] (swap! mon update-in [~k ~(str applicator)] #(do {:times (add-time (:times %) (:time res#)) :time (int (* 1.0 (/ (apply + (:times %)) (inc (count (:times %))))))})) result#)) ;; render report (defn round [n] (float (/ (int (* 100 n)) 100))) (defn unzero [n] (if (or (nil? n) (= 0 n) (= 0.0 n)) 1 n)) (defn render-v [v] (let [t1 (some-> v (get "injest.path/+>>") :time) t2 (some-> v (get "injest.path/x>>") :time) t3 (some-> v (get "injest.path/=>>") :time) s-ts (->> [{:t (unzero t3) :s "=>>"} {:t (unzero t2) :s "x>>"} {:t (unzero t1) :s "+>>"}] (sort-by :t) reverse) max-ts (last s-ts) min-ts (first s-ts) mid-ts (second s-ts) diff1 (round (* 1.0 (/ (:t min-ts) (:t mid-ts)))) diff2 (round (* 1.0 (/ (:t mid-ts) (:t max-ts))))] (if-not t3 (if (= diff1 1.0) (str " :" (:s min-ts) " \"" (:s min-ts) " and " (:s mid-ts) " are basically the same speed\"") (str " :" (:s mid-ts) " \"" (:s mid-ts) " is " diff1 " times faster than " (:s min-ts) "\"")) (str (if (= diff2 1.0) (str " :" (:s max-ts) " \"" (:s max-ts) " and " (:s mid-ts) " are basically the same speed\"") (str " :" (:s max-ts) " \"" (:s max-ts) " is " diff2 " times faster than " (:s mid-ts) "\"")) "\n" ;"\n and \n" (if (= diff1 1.0) (str " :" (:s mid-ts) " \"" (:s mid-ts) " and " (:s min-ts) " are basically the same speed\"") (str " :" (:s mid-ts) " \"" (:s mid-ts) " is " diff1 " times faster than " (:s min-ts) "\"}")))))) (defn report [] (->> @mon (mapv (fn [[k v]] (str k "\n" (render-v v)))) sort (reduce #(str %1 "\n\n" %2)))) (defn set-report-interval [callback ms] #?(:clj (future (while true (do (Thread/sleep ms) (when @report-live? (callback))))) :cljs identity #_(js/setInterval #(when @report-live? (callback)) ms))) (defn report! [bool] (when (false? bool) (->> @report-taps vals (mapv #?(:clj future-cancel :cljs #(js/clearInterval %)))) (reset! report-taps {})) (reset! report-live? (boolean bool))) (defn add-report-tap! [handler & [seconds]] (let [f (set-report-interval #(handler (report)) (or (* 1000 seconds) 10000))] (swap! report-taps assoc handler f))) ================================================ FILE: src/injest/state.cljc ================================================ (ns injest.state (:require [injest.util :as u] [injest.data :as d]) #?(:cljs (:require-macros [injest.state]))) (def transducables (atom #{})) (def par-transducables (atom #{})) (defmacro reg-xf! [& xfs] `(swap! transducables into ~(->> xfs (mapv #(u/qualify-sym % &env))))) (defn regxf! [& xfs] (swap! transducables into xfs)) (defmacro reg-pxf! [& xfs] `(swap! par-transducables into ~(->> xfs (mapv #(u/qualify-sym % &env))))) (defn regpxf! [& xfs] (swap! par-transducables into xfs)) (apply regxf! d/def-regs) (apply regpxf! d/par-regs) ; (regxf! 'clojure.core/map) ; or (reg-xf! map) ; Must be called from Clojure ================================================ FILE: src/injest/test.clj ================================================ (ns injest.test (:require [injest.state :as i.s] [injest.report :as r] [injest.report.path :as injest :refer [+> +>> x>> =>>]])) (comment (r/add-report-tap! println 20) (r/report! true) (r/report! false) (dotimes [_ 10] (=>> (range 1000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) ;; (mapv dec) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) ;; (x/reduce +) ;; first (apply +) time)) (macroexpand '(=>> (range 1000000) (map inc) (apply +) time)) (x>> (range 1000000) (map inc) (apply +) time) (dotimes [_ 10] (=>> (range 1000000) (map inc) (filter odd?) (map (partial + 10)) (filter even?) (apply +) time)) (->> (range 1000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) ;; (mapv dec) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) ;; (x/reduce +) ;; first (apply +) time) :end ) ================================================ FILE: src/injest/test.cljs ================================================ (ns injest.test (:require [injest.state :as i.s] [injest.report :as r] [injest.report.path :as injest :refer [+> +>> x>> =>>]])) (comment ;; reporting not yet working in cljs (r/add-report-tap! println) (r/report! true) (r/report! false) ;; these aren't workign in cljs (i.s/regxf! 'cljs.core/map) (i.s/reg-xf! map) (require '[clojure.edn :as edn]) ;; (require '[net.cgrand.xforms :as x]) ;; (reg-xf `x/reduce) ;; copied from test.clj, recorded times need to be updated for cljs (->> (range 1000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) ;; (mapv dec) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) time) (x>> (range 1000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) ;; (mapv dec) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) ;; (x/reduce +) ;; first (apply +) time) ;; work utilities (defn work-1000 [work-fn] (range (last (repeatedly 1000 work-fn)))) (defn ->>work [input] (work-1000 (fn [] (->> input (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) str (take 3) (apply str) edn/read-string)))) (defn x>>work [input] (work-1000 (fn [] (x>> input (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) str (take 3) (apply str) edn/read-string)))) (->> (range 100) (repeat 10) (map ->>work) (map ->>work) (map ->>work) (map ->>work) (map ->>work) (map ->>work) last count time) ; "Elapsed time: 18309.397391 msecs" ; 234 (x>> (range 100) (repeat 10) (map x>>work) (map x>>work) (map x>>work) (map x>>work) (map x>>work) (map x>>work) last count time) ; "Elapsed time: 6252.224178 msecs" ; 234 (=>> (range 100) (repeat 10) (map ->>work) (map ->>work) (map ->>work) (map ->>work) (map ->>work) (map ->>work) last count time) ; "Elapsed time: 8976.963402 msecs" ; 234 (=>> (range 100) (repeat 10) (map x>>work) (map x>>work) (map x>>work) (map x>>work) (map x>>work) (map x>>work) last count time) ; "Elapsed time: 2862.172838 msecs" ; 234 :end) ;; path thread tests (comment (x>> [1 2 3] (map #(do [% %])) cat) (x>> [1 2 3] (map #(do [% %])) (cat)) (let [m {1 {"b" [0 1 {:c :res}]}}] (x> m 1 "b" 2 :c)) (x> {0 :a 2 :b} 2) ;=> :b (x> [0 2 5] 2 #(- 10 % 1)) ;=> 4 (x> [0 1 2 3 4] rest 2 #(- 10 % 1)) ;=> 6 (x> 10 range rest 2 #(- 10 % 1)) ;=> 6 (x> [:a :b :c] 2) ;=> :c (x> `(x y z) 2) ;=> injest.path/z (x> {0 :a nil 2} nil) ;=> 2 (x> {0 :a false 2} false) ;=> 2 (x>> {0 :a 2 :b} 2) ;=> :b (x>> [:a :b :c] 2) ;=> :c (x>> `(x y z) 2) ;=> injest.path/z (x>> {0 :a nil 2} nil) ;=> 2 (x>> {0 :a false 2} false) ;=> 2 ; non-transducer, with path navigation, for untransducifying a transducified path thread (+> {0 :a 2 :b} 2) ;=> :b (+> [:a :b :c] 2) ;=> :c (+> `(x y z) 2) ;=> injest.path/z (+> {0 :a nil 2} nil) ;=> 2 (+> {0 :a false 2} false) ;=> 2 (+>> {0 :a 2 :b} 2) ;=> :b (+>> [:a :b :c] 2) ;=> :c (+>> `(x y z) 2) ;=> injest.path/z (+>> {0 :a nil 2} nil) ;=> 2 (+>> {0 :a false 2} false) ;=> 2 (let [m {1 {"b" [0 1 {:c :res}]}}] (x> m 1 "b" 2 :c name)) ;=> "res" (let [m {1 {"b" [0 1 {:c :res}]}}] (x>> m 1 "b" 2 :c name)) ;=> "res" (let [m {1 {"b" [0 1 {:c :res}]}}] (+> m 1 "b" 2 :c name)) ;=> "res" (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c :res}]}])}] (+>> m 1 2 "b" 2 :c name)) ;=> "res" (x>> (range 1000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +) time) ;; "Elapsed time: 6735.604664 msecs" ;; 5000054999994 :end ) ================================================ FILE: src/injest/util.cljc ================================================ (ns injest.util (:require [cljs.analyzer.api :as api])) (def safe-resolve #?(:clj resolve :cljs identity)) (defn qualify-sym [x env] (if-not env `(quote ~(symbol (safe-resolve x))) `(symbol (quote ~(some-> x ((partial cljs.analyzer.api/resolve env)) :name symbol))))) (defn qualify-form [x env] (if-not (:ns env) (list (symbol (safe-resolve x))) (list (some-> x ((partial cljs.analyzer.api/resolve env)) :name str symbol)))) (defn qualify-thread [env thread] (mapv (fn w [x] (if (= x 'cat) (qualify-form x env) (if (and (list? x) (symbol? (first x)) (not (#{'fn 'fn*} (first x)))) (-> x first (qualify-form env) (concat (rest x))) x))) thread)) ================================================ FILE: test/injest/classical_test.cljc ================================================ (ns injest.classical-test "Functional tests for injest.classical — standard (non-path) threading with transducer composition, pipeline parallelism, and fold parallelism." (:require #?(:clj [clojure.test :refer [deftest testing is]] :cljs [cljs.test :refer-macros [deftest testing is]]) [injest.classical :as c])) ;; =================================================================== ;; x> — thread-first with transducer composition ;; =================================================================== (deftest classical-x>-basic (testing "x> with no transducers acts like ->" (is (= 3 (c/x> 1 (+ 2)))) (is (= 4 (c/x> 1 (+ 2) (+ 1))))) (testing "x> threads as first arg" (is (= [1 2 3] (c/x> [1 2] (conj 3)))) (is (= {:a 1 :b 2} (c/x> {:a 1} (assoc :b 2)))))) (deftest classical-x>-with-transducers (testing "x> composes map transducer" (is (= [2 3 4] (vec (c/x> [1 2 3] (map inc)))))) (testing "x> composes map and filter" (is (= [2 4] (vec (c/x> [1 2 3 4] (map inc) (filter even?)))))) (testing "x> with transducer and vec" (is (= [2 4] (c/x> [1 2 3 4] (map inc) (filter even?) vec))))) ;; =================================================================== ;; x>> — thread-last with transducer composition ;; =================================================================== (deftest classical-x>>-basic (testing "x>> with no transducers acts like ->>" (is (= 3 (c/x>> 1 (+ 2)))) (is (= [1 2 3 99] (c/x>> 99 (conj [1 2 3]))))) (testing "x>> threads as last arg" (is (= '(0 1 2) (c/x>> 3 (range)))))) (deftest classical-x>>-with-transducers (testing "x>> composes map transducer" (is (= [2 3 4] (vec (c/x>> [1 2 3] (map inc)))))) (testing "x>> composes map and filter" (is (= [2 4] (vec (c/x>> [1 2 3 4] (map inc) (filter even?)))))) (testing "x>> full pipeline" (is (= 1044 (c/x>> (range 100) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +))))) (testing "x>> with transducers then apply" (is (= 9 (c/x>> [1 2 3] (map inc) (apply +))))) (testing "x>> with non-transducer then transducers" (is (= [1 2 3 4 5] (vec (c/x>> 5 (range) (map inc))))))) ;; =================================================================== ;; |> — pipeline parallel, thread-first ;; =================================================================== (deftest classical-|>-basic (testing "|> with stateless transducers produces correct results" (is (= [2 3 4] (vec (c/|> [1 2 3] (map inc)))))) (testing "|> with map and filter" (is (= [2 4] (vec (c/|> [1 2 3 4] (map inc) (filter even?)))))) (testing "|> with no transducers acts like ->" (is (= 3 (c/|> 1 (+ 2)))))) ;; =================================================================== ;; |>> — pipeline parallel, thread-last ;; =================================================================== (deftest classical-|>>-basic (testing "|>> with stateless transducers produces correct results" (is (= [2 3 4] (vec (c/|>> [1 2 3] (map inc)))))) (testing "|>> full pipeline" (is (= 1044 (c/|>> (range 100) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +))))) (testing "|>> with apply" (is (= 9 (c/|>> [1 2 3] (map inc) (apply +)))))) ;; =================================================================== ;; => — fold parallel, thread-first ;; =================================================================== (deftest classical-=>-basic (testing "=> with stateless transducers produces correct results" (is (= [2 3 4] (vec (c/=> [1 2 3] (map inc)))))) (testing "=> with map and filter" (is (= [2 4] (vec (c/=> [1 2 3 4] (map inc) (filter even?)))))) (testing "=> with no transducers" (is (= 3 (c/=> 1 (+ 2)))))) ;; =================================================================== ;; =>> — fold parallel, thread-last ;; =================================================================== (deftest classical-=>>-basic (testing "=>> with stateless transducers produces correct results" (is (= [2 3 4] (vec (c/=>> [1 2 3] (map inc)))))) (testing "=>> full pipeline" (is (= 1044 (c/=>> (range 100) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +))))) (testing "=>> with apply" (is (= 9 (c/=>> [1 2 3] (map inc) (apply +)))))) ;; =================================================================== ;; Thread-first vs thread-last semantics ;; =================================================================== (deftest classical-threading-direction (testing "x> threads as first arg for multi-arg functions" (is (= [10 1 2 3] (c/x> 10 (vector 1 2 3))))) (testing "x>> threads as last arg for multi-arg functions" (is (= [1 2 3 10] (c/x>> 10 (vector 1 2 3))))) (testing "direction doesn't matter for single-arg functions" (is (= (c/x> 5 (inc) (dec) (str)) (c/x>> 5 (inc) (dec) (str)))))) ================================================ FILE: test/injest/clj_kondo_test.sh ================================================ #!/usr/bin/env bash # # Test that reproduces clj-kondo/clj-kondo#2798: # The macroexpand hook for x>> maps to +>>, but if +>> is defined as defn # instead of defmacro, clj-kondo's macroexpand convention passes &form and &env # as the first two args (which defmacro absorbs implicitly but defn does not). # This causes the original (x>> ...) call to leak through into the expanded # output, leading to infinite re-expansion -> StackOverflowError. # # This test: # 1. Verifies the bug: old config with defn +>/+>> causes clj-kondo to error # 2. Verifies the fix: new config with defmacro +>/+>> lints cleanly # # Usage: bash test/injest/clj_kondo_test.sh [/path/to/clj-kondo] set -euo pipefail CLJ_KONDO="${1:-clj-kondo}" SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" TMPDIR_ROOT="$(mktemp -d)" trap 'rm -rf "$TMPDIR_ROOT"' EXIT PASS=0 FAIL=0 pass() { echo " PASS: $1"; PASS=$((PASS + 1)); } fail() { echo " FAIL: $1"; FAIL=$((FAIL + 1)); } ############################################################################### # Create the sample source file that triggers the bug (from the issue report) ############################################################################### SAMPLE_SRC="$TMPDIR_ROOT/sample_src/clj_kondo_debug/core.clj" mkdir -p "$(dirname "$SAMPLE_SRC")" cat > "$SAMPLE_SRC" << 'CLOJURE' (ns clj-kondo-debug.core (:require [injest.path :as inj])) (defn foo [] (inj/x>> [1 2 3] (map inc))) (defn bar [] (inj/x> {:a 1 :b 2} :a)) (defn baz [] (inj/+>> [1 2 3] (map inc))) (defn quux [] (inj/+> {:a 1 :b 2} :a)) CLOJURE ############################################################################### # Helper: set up a .clj-kondo dir with given config and hook, then lint # Returns clj-kondo's exit code (0 = clean, nonzero = errors) ############################################################################### run_lint() { local label="$1" local hook_file="$2" local config_edn="$3" local workdir="$TMPDIR_ROOT/$label" mkdir -p "$workdir/.clj-kondo/imports/net.clojars.john/injest/injest" # Write the hook implementation cp "$hook_file" "$workdir/.clj-kondo/imports/net.clojars.john/injest/injest/path.clj" # Write the config cp "$config_edn" "$workdir/.clj-kondo/imports/net.clojars.john/injest/config.edn" # Point .clj-kondo to the imports cat > "$workdir/.clj-kondo/config.edn" << 'EOF' {:config-paths ["imports/net.clojars.john/injest"]} EOF # Run clj-kondo and capture output + exit code local output local rc=0 output=$("$CLJ_KONDO" --lint "$SAMPLE_SRC" --config-dir "$workdir/.clj-kondo" 2>&1) || rc=$? echo "$output" return $rc } ############################################################################### # Create the BROKEN hook (defn instead of defmacro) - the old code ############################################################################### BROKEN_HOOK="$TMPDIR_ROOT/broken_hook.clj" cat > "$BROKEN_HOOK" << 'CLOJURE' (ns injest.path) (def protected-fns #{`fn 'fn 'fn* 'partial}) (defn get-or-nth [m-or-v aval] (if (associative? m-or-v) (get m-or-v aval) (nth m-or-v aval))) (defn path-> [form x] (cond (and (seq? form) (not (protected-fns (first form)))) (with-meta `(~(first form) ~x ~@(next form)) (meta form)) (or (string? form) (nil? form) (boolean? form)) (list x form) (int? form) (list 'injest.path/get-or-nth x form) :else (list form x))) (defn path->> [form x] (cond (and (seq? form) (not (protected-fns (first form)))) (with-meta `(~(first form) ~@(next form) ~x) (meta form)) (or (string? form) (nil? form) (boolean? form)) (list x form) (int? form) (list 'injest.path/get-or-nth x form) :else (list form x))) (defn +> [x & forms] (loop [x x, forms forms] (if forms (recur (path-> (first forms) x) (next forms)) x))) (defn +>> [x & forms] (loop [x x, forms forms] (if forms (recur (path->> (first forms) x) (next forms)) x))) CLOJURE ############################################################################### # Create the FIXED hook (defmacro instead of defn) - the new code ############################################################################### FIXED_HOOK="$TMPDIR_ROOT/fixed_hook.clj" cat > "$FIXED_HOOK" << 'CLOJURE' (ns injest.path) (def protected-fns #{`fn 'fn 'fn* 'partial}) (defn get-or-nth [m-or-v aval] (if (associative? m-or-v) (get m-or-v aval) (nth m-or-v aval))) (defn path-> [form x] (cond (and (seq? form) (not (protected-fns (first form)))) (with-meta `(~(first form) ~x ~@(next form)) (meta form)) (or (string? form) (nil? form) (boolean? form)) (list x form) (int? form) (list 'injest.path/get-or-nth x form) :else (list form x))) (defn path->> [form x] (cond (and (seq? form) (not (protected-fns (first form)))) (with-meta `(~(first form) ~@(next form) ~x) (meta form)) (or (string? form) (nil? form) (boolean? form)) (list x form) (int? form) (list 'injest.path/get-or-nth x form) :else (list form x))) (defmacro +> [x & forms] (loop [x x, forms forms] (if forms (recur (path-> (first forms) x) (next forms)) x))) (defmacro +>> [x & forms] (loop [x x, forms forms] (if forms (recur (path->> (first forms) x) (next forms)) x))) CLOJURE ############################################################################### # Config EDN (same for both, matches the project's config.edn) ############################################################################### CONFIG_EDN="$TMPDIR_ROOT/config.edn" cat > "$CONFIG_EDN" << 'CLOJURE' {:lint-as {injest.core/x> clojure.core/-> injest.core/x>> clojure.core/->> injest.core/=> clojure.core/-> injest.core/=>> clojure.core/->> injest.core/|> clojure.core/-> injest.core/|>> clojure.core/->> injest.path/+> clojure.core/-> injest.path/+>> clojure.core/->> injest.path/x> clojure.core/-> injest.path/x>> clojure.core/->> injest.path/=> clojure.core/-> injest.path/=>> clojure.core/->> injest.path/|> clojure.core/-> injest.path/|>> clojure.core/->>} :hooks {:macroexpand {injest.path/+> injest.path/+> injest.path/+>> injest.path/+>> injest.path/x> injest.path/+> injest.path/x>> injest.path/+>> injest.path/=> injest.path/+> injest.path/=>> injest.path/+>> injest.path/|> injest.path/+> injest.path/|>> injest.path/+>>}} :linters {:injest.path/+> {:level :error} :injest.path/+>> {:level :error} :unused-binding {:level :off}}} CLOJURE ############################################################################### # TEST 1: Broken hook (defn) should cause clj-kondo to error ############################################################################### echo "" echo "=== Test 1: Broken hook (defn +>/+>>) should cause clj-kondo error ===" echo "" BROKEN_OUTPUT=$(run_lint "broken" "$BROKEN_HOOK" "$CONFIG_EDN" 2>&1) && BROKEN_RC=0 || BROKEN_RC=$? echo "$BROKEN_OUTPUT" echo "" echo " Exit code: $BROKEN_RC" if [ "$BROKEN_RC" -ne 0 ]; then pass "Broken hook (defn) causes clj-kondo to report errors (confirms the bug)" else fail "Broken hook (defn) should have caused errors but didn't" fi ############################################################################### # TEST 2: Fixed hook (defmacro) should lint cleanly ############################################################################### echo "" echo "=== Test 2: Fixed hook (defmacro +>/+>>) should lint cleanly ===" echo "" FIXED_OUTPUT=$(run_lint "fixed" "$FIXED_HOOK" "$CONFIG_EDN" 2>&1) && FIXED_RC=0 || FIXED_RC=$? echo "$FIXED_OUTPUT" echo "" echo " Exit code: $FIXED_RC" if [ "$FIXED_RC" -eq 0 ]; then pass "Fixed hook (defmacro) lints cleanly (confirms the fix)" else fail "Fixed hook (defmacro) should have linted cleanly but got errors" fi ############################################################################### # TEST 3: Verify the actual project config from resources/ works ############################################################################### echo "" echo "=== Test 3: Project's resources/ config should lint cleanly ===" echo "" PROJECT_HOOK="$PROJECT_ROOT/resources/clj-kondo.exports/net.clojars.john/injest/injest/path.clj" PROJECT_CONFIG="$PROJECT_ROOT/resources/clj-kondo.exports/net.clojars.john/injest/config.edn" if [ -f "$PROJECT_HOOK" ] && [ -f "$PROJECT_CONFIG" ]; then PROJECT_OUTPUT=$(run_lint "project" "$PROJECT_HOOK" "$PROJECT_CONFIG" 2>&1) && PROJECT_RC=0 || PROJECT_RC=$? echo "$PROJECT_OUTPUT" echo "" echo " Exit code: $PROJECT_RC" if [ "$PROJECT_RC" -eq 0 ]; then pass "Project config from resources/ lints cleanly" else fail "Project config from resources/ should lint cleanly but got errors" fi else fail "Project config files not found at expected paths under resources/" fi ############################################################################### # Summary ############################################################################### echo "" echo "============================================" echo " Results: $PASS passed, $FAIL failed" echo "============================================" echo "" if [ "$FAIL" -gt 0 ]; then exit 1 fi ================================================ FILE: test/injest/edge_cases_test.cljc ================================================ (ns injest.edge-cases-test "Edge case tests: empty threads, nil inputs, single forms, nesting, large collections, deeply nested paths, identity, duplicates." (:require #?(:clj [clojure.test :refer [deftest testing is]] :cljs [cljs.test :refer-macros [deftest testing is]]) [injest.path :as p] [injest.classical :as c])) ;; =================================================================== ;; Empty threads — (macro x) should return x unchanged ;; =================================================================== (deftest empty-thread-returns-identity (testing "+> with no forms returns x" (is (= 42 (p/+> 42))) (is (= nil (p/+> nil))) (is (= {:a 1} (p/+> {:a 1})))) (testing "+>> with no forms returns x" (is (= 42 (p/+>> 42))) (is (= "hello" (p/+>> "hello")))) (testing "x> with no forms returns x" (is (= [1 2 3] (p/x> [1 2 3])))) (testing "x>> with no forms returns x" (is (= [1 2 3] (p/x>> [1 2 3])))) (testing "|> with no forms returns x" (is (= :foo (p/|> :foo)))) (testing "|>> with no forms returns x" (is (= :foo (p/|>> :foo)))) (testing "=> with no forms returns x" (is (= 99 (p/=> 99)))) (testing "=>> with no forms returns x" (is (= 99 (p/=>> 99)))) (testing "classical x> with no forms returns x" (is (= 42 (c/x> 42)))) (testing "classical x>> with no forms returns x" (is (= 42 (c/x>> 42))))) ;; =================================================================== ;; Single-form threads ;; =================================================================== (deftest single-form-thread (testing "+> with single keyword lookup" (is (= 1 (p/+> {:a 1} :a)))) (testing "+>> with single keyword lookup" (is (= 1 (p/+>> {:a 1} :a)))) (testing "+> with single integer index" (is (= :b (p/+> [:a :b :c] 1)))) (testing "+>> with single integer index" (is (= :b (p/+>> [:a :b :c] 1)))) (testing "+> with single string lookup" (is (= 42 (p/+> {"k" 42} "k")))) (testing "+> with single fn" (is (= 2 (p/+> 1 inc)))) (testing "x>> with single transducer" (is (= [2 3 4] (vec (p/x>> [1 2 3] (map inc)))))) (testing "classical x>> with single transducer" (is (= [2 3 4] (vec (c/x>> [1 2 3] (map inc))))))) ;; =================================================================== ;; Nil as initial value ;; =================================================================== (deftest nil-initial-value (testing "+> with nil and keyword returns nil (nil doesn't have keys)" (is (nil? (p/+> nil :a)))) (testing "+>> with nil and keyword returns nil" (is (nil? (p/+>> nil :a)))) (testing "x>> with nil and map passes nil through" (is (= [nil nil nil] (vec (p/x>> [nil nil nil] (map identity))))))) ;; =================================================================== ;; Empty collection inputs ;; =================================================================== (deftest empty-collection-input (testing "x>> with empty vector" (is (= [] (vec (p/x>> [] (map inc)))))) (testing "x>> with empty list" (is (= [] (vec (p/x>> '() (map inc)))))) (testing "|>> with empty vector" (is (= [] (vec (p/|>> [] (map inc)))))) (testing "=>> with empty vector" (is (= [] (vec (p/=>> [] (map inc)))))) (testing "classical x>> with empty vector" (is (= [] (vec (c/x>> [] (map inc))))))) ;; =================================================================== ;; Large collection correctness ;; =================================================================== (deftest large-collection (testing "x>> with 100k elements" (is (= (apply + (map inc (range 100000))) (p/x>> (range 100000) (map inc) (apply +))))) (testing "|>> with 100k elements" (is (= (apply + (map inc (range 100000))) (p/|>> (range 100000) (map inc) (apply +))))) (testing "=>> with 100k elements" (is (= (apply + (map inc (range 100000))) (p/=>> (range 100000) (map inc) (apply +)))))) ;; =================================================================== ;; Deeply nested path navigation ;; =================================================================== (deftest deeply-nested-paths (testing "+> navigating 5 levels deep" (is (= :found (p/+> {:a {:b {:c {:d {:e :found}}}}} :a :b :c :d :e)))) (testing "+>> navigating 5 levels deep" (is (= :found (p/+>> {:a {:b {:c {:d {:e :found}}}}} :a :b :c :d :e)))) (testing "+> mixed path types deep" (is (= :val (p/+> {0 [nil {:a {"key" [0 :val]}}]} 0 1 :a "key" 1)))) (testing "+>> mixed path types deep" (is (= :val (p/+>> {0 [nil {:a {"key" [0 :val]}}]} 0 1 :a "key" 1))))) ;; =================================================================== ;; Mixed path types in one thread ;; =================================================================== (deftest mixed-path-types (testing "integer, string, keyword, nil in one thread" (let [data {0 {"a" {:b {nil :result}}}}] (is (= :result (p/+> data 0 "a" :b nil))) (is (= :result (p/+>> data 0 "a" :b nil))))) (testing "integer, boolean, keyword in one thread" (let [data {1 {true {:k :val}}}] (is (= :val (p/+> data 1 true :k))) (is (= :val (p/+>> data 1 true :k))))) (testing "path navigation then function call" (is (= "1" (p/+> {:a 1} :a str))) (is (= "1" (p/+>> {:a 1} :a str))))) ;; =================================================================== ;; Identity passthrough ;; =================================================================== (deftest identity-passthrough (testing "+> with identity" (is (= 42 (p/+> 42 identity)))) (testing "+>> with identity" (is (= 42 (p/+>> 42 identity)))) (testing "x>> with (map identity) preserves collection" (is (= [1 2 3] (vec (p/x>> [1 2 3] (map identity))))))) ;; =================================================================== ;; Duplicate transducers ;; =================================================================== (deftest duplicate-transducers (testing "x>> with two (map inc) applies both" (is (= [3 4 5] (vec (p/x>> [1 2 3] (map inc) (map inc)))))) (testing "x>> with (filter even?) twice" (is (= [2 4] (vec (p/x>> [1 2 3 4 5] (filter even?) (filter even?)))))) (testing "|>> with duplicate transducers" (is (= [3 4 5] (vec (p/|>> [1 2 3] (map inc) (map inc))))))) ;; =================================================================== ;; Nested macro calls ;; =================================================================== (deftest nested-macro-calls (testing "+> inside +>" (is (= 2 (p/+> {:a {:b 2}} :a (p/+> :b))))) (testing "x>> as argument in x>>" (is (= 27 (p/x>> [1 2 3] (map inc) (map #(+ % (p/x>> [0 1 2] (map inc) (apply +)))) (apply +)))))) ;; =================================================================== ;; Transducer with non-transducer interleaved ;; =================================================================== (deftest interleaved-transducers-and-fns (testing "x>> with transducers then apply" (is (= 8 (p/x>> [1 2 3 4] (map inc) (filter odd?) (apply +))))) (testing "x>> with non-xf, xfs, non-xf" (is (= 10 (p/x>> 10 (range) (map inc) (filter even?) (apply max))))) (testing "classical x>> with transducers then apply" (is (= 8 (c/x>> [1 2 3 4] (map inc) (filter odd?) (apply +)))))) ;; =================================================================== ;; Boolean and nil as path keys (regression) ;; =================================================================== (deftest boolean-nil-path-keys (testing "+> with false key" (is (= :no (p/+> {true :yes false :no} false)))) (testing "+> with nil key" (is (= :none (p/+> {nil :none 0 :zero} nil)))) (testing "+>> with false key" (is (= :no (p/+>> {true :yes false :no} false)))) (testing "+>> with nil key" (is (= :none (p/+>> {nil :none 0 :zero} nil)))) (testing "chained boolean and nil lookups" (let [data {false {nil :deep}}] (is (= :deep (p/+> data false nil))) (is (= :deep (p/+>> data false nil)))))) ;; =================================================================== ;; Sequence types (vectors, lists, lazy seqs) ;; =================================================================== (deftest various-sequence-types (testing "x>> works on vectors" (is (= [2 3 4] (vec (p/x>> [1 2 3] (map inc)))))) (testing "x>> works on lists" (is (= [2 3 4] (vec (p/x>> '(1 2 3) (map inc)))))) (testing "x>> works on lazy seqs" (is (= [1 2 3 4 5] (vec (p/x>> (range 1 6) (map identity)))))) (testing "x>> works on sets (unordered)" (is (= #{2 3 4} (set (p/x>> #{1 2 3} (map inc))))))) ================================================ FILE: test/injest/equivalence_test.cljc ================================================ (ns injest.equivalence-test "Tests proving equivalence relationships between injest macro variants. These tests ensure the macro family is internally consistent." (:require #?(:clj [clojure.test :refer [deftest testing is]] :cljs [cljs.test :refer-macros [deftest testing is]]) [injest.path :as p] [injest.classical :as c])) ;; =================================================================== ;; x>> == +>> when no transducers present (path) ;; =================================================================== (deftest path-x>>-equals-+>>-no-transducers (testing "x>> with keywords equals +>> with keywords" (is (= (p/+>> {:a {:b 1}} :a :b) (p/x>> {:a {:b 1}} :a :b)))) (testing "x>> with integers equals +>> with integers" (is (= (p/+>> [0 [1 2 3]] 1 2) (p/x>> [0 [1 2 3]] 1 2)))) (testing "x>> with strings equals +>> with strings" (is (= (p/+>> {"a" {"b" :val}} "a" "b") (p/x>> {"a" {"b" :val}} "a" "b")))) (testing "x>> with fn calls equals +>> with fn calls" (is (= (p/+>> 1 (+ 2) (+ 3)) (p/x>> 1 (+ 2) (+ 3))))) (testing "x>> with mixed path types equals +>>" (is (= (p/+>> {1 {"b" [0 1 {:c :res}]}} 1 "b" 2 :c) (p/x>> {1 {"b" [0 1 {:c :res}]}} 1 "b" 2 :c))))) ;; =================================================================== ;; x> == +> when no transducers present (path) ;; =================================================================== (deftest path-x>-equals-+>-no-transducers (testing "x> with keywords equals +> with keywords" (is (= (p/+> {:a {:b 1}} :a :b) (p/x> {:a {:b 1}} :a :b)))) (testing "x> with integers equals +> with integers" (is (= (p/+> [0 [1 2 3]] 1 2) (p/x> [0 [1 2 3]] 1 2)))) (testing "x> with fn calls equals +> with fn calls" (is (= (p/+> 1 (+ 2) (+ 3)) (p/x> 1 (+ 2) (+ 3)))))) ;; =================================================================== ;; |>> == x>> for results (parallel produces same values) ;; =================================================================== (deftest pipeline-equals-sequential-results (testing "|>> produces same results as x>> for stateless transducers" (is (= (vec (p/x>> (range 1000) (map inc) (filter even?))) (vec (p/|>> (range 1000) (map inc) (filter even?)))))) (testing "|> produces same results as x>" (is (= (vec (p/x> [1 2 3 4 5] (map inc) (filter odd?))) (vec (p/|> [1 2 3 4 5] (map inc) (filter odd?)))))) (testing "|>> with full pipeline matches x>>" (is (= (p/x>> (range 100) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (map (partial + 10)) (filter even?) (apply +)) (p/|>> (range 100) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (map (partial + 10)) (filter even?) (apply +)))))) ;; =================================================================== ;; =>> == x>> for results (fold produces same values) ;; =================================================================== (deftest fold-equals-sequential-results (testing "=>> produces same results as x>> for map" (is (= (vec (p/x>> (range 1000) (map inc))) (vec (p/=>> (range 1000) (map inc)))))) (testing "=>> produces same results as x>> for filter" (is (= (vec (p/x>> (range 1000) (filter even?))) (vec (p/=>> (range 1000) (filter even?)))))) (testing "=> produces same results as x>" (is (= (vec (p/x> [1 2 3 4 5] (map inc) (filter odd?))) (vec (p/=> [1 2 3 4 5] (map inc) (filter odd?)))))) (testing "=>> with full pipeline matches x>>" (is (= (p/x>> (range 100) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (map (partial + 10)) (filter even?) (apply +)) (p/=>> (range 100) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (map (partial + 10)) (filter even?) (apply +)))))) ;; =================================================================== ;; |>> == x>> for stateful-only transducers (can't parallelize) ;; =================================================================== (deftest parallel-fallback-for-stateful (testing "|>> with partition-by (stateful) matches x>>" (is (= (vec (p/x>> (range 20) (partition-by #(< % 10)))) (vec (p/|>> (range 20) (partition-by #(< % 10))))))) (testing "=>> with partition-all (stateful) matches x>>" (is (= (vec (p/x>> (range 20) (partition-all 5))) (vec (p/=>> (range 20) (partition-all 5))))))) ;; =================================================================== ;; All 8 path macros produce same result for simple case ;; =================================================================== (deftest all-path-macros-equivalent-simple (let [data {:a [10 20 30]} expected 20] (testing "all 8 macros produce same result on simple path navigation" (is (= expected (p/+> data :a 1))) (is (= expected (p/+>> data :a 1))) (is (= expected (p/x> data :a 1))) (is (= expected (p/x>> data :a 1))) (is (= expected (p/|> data :a 1))) (is (= expected (p/|>> data :a 1))) (is (= expected (p/=> data :a 1))) (is (= expected (p/=>> data :a 1)))))) ;; =================================================================== ;; Classical x>> and path x>> agree on non-path data ;; =================================================================== (deftest classical-vs-path-on-flat-data (testing "classical x>> and path x>> agree when no path nav is used" (is (= (c/x>> [1 2 3] (map inc) (filter even?) (apply +)) (p/x>> [1 2 3] (map inc) (filter even?) (apply +))))) (testing "classical x>> and path x>> agree for standard fn calls" (is (= (c/x>> 5 (inc) (dec) (str)) (p/x>> 5 (inc) (dec) (str))))) (testing "classical x> and path x> agree for fn calls" (is (= (c/x> 1 (+ 2) (+ 3)) (p/x> 1 (+ 2) (+ 3))))) (testing "classical =>> and path =>> agree on flat transducer chains" (is (= (c/=>> (range 50) (map inc) (filter odd?) (apply +)) (p/=>> (range 50) (map inc) (filter odd?) (apply +))))) (testing "classical |>> and path |>> agree on flat transducer chains" (is (= (c/|>> (range 50) (map inc) (filter odd?) (apply +)) (p/|>> (range 50) (map inc) (filter odd?) (apply +)))))) ;; =================================================================== ;; Thread-first vs thread-last for single-arg functions ;; =================================================================== (deftest direction-irrelevant-for-single-arg (testing "+> and +>> agree for single-arg functions" (is (= (p/+> 5 inc dec str) (p/+>> 5 inc dec str)))) (testing "x> and x>> agree for single-arg functions" (is (= (p/x> 5 inc dec str) (p/x>> 5 inc dec str)))) (testing "classical x> and x>> agree for single-arg functions" (is (= (c/x> 5 (inc) (dec) (str)) (c/x>> 5 (inc) (dec) (str)))))) ================================================ FILE: test/injest/macro_expansion_test.clj ================================================ (ns injest.macro-expansion-test "Tests that verify macro expansion forms for all injest macros. Inspired by https://github.com/johnmn3/injest/issues/3 — macroexpand tests are the highest-signal way to verify a macro library." (:require [clojure.test :refer [deftest testing is]] #_:clj-kondo/ignore [injest.path :as p] #_:clj-kondo/ignore [injest.classical :as c] #_:clj-kondo/ignore [injest.impl :as i])) ;; --------------------------------------------------------------------------- ;; Helpers ;; --------------------------------------------------------------------------- (defn expansion-contains? "Returns true if the macroexpanded form (recursively) contains sym." [form sym] (cond (= form sym) true (coll? form) (some #(expansion-contains? % sym) form) :else false)) ;; =================================================================== ;; 1. injest.path/+> — path-aware thread-first, no transducers ;; =================================================================== (deftest path-thread-first-basic-expansion (testing "+> with a keyword expands to (keyword x)" (let [expanded (macroexpand-1 '(injest.path/+> m :a))] (is (= '(:a m) expanded)))) (testing "+> with a function call expands to (f x args...)" (let [expanded (macroexpand-1 '(injest.path/+> x (inc)))] ;; path-> sees (inc) as a seq, threads x as first arg (is (= '(inc x) expanded)))) (testing "+> with an integer expands to get-or-nth" (let [expanded (macroexpand-1 '(injest.path/+> v 2))] (is (expansion-contains? expanded 'injest.impl/get-or-nth)) (is (expansion-contains? expanded 'v)) (is (expansion-contains? expanded 2)))) (testing "+> with a string expands to (x string)" (let [expanded (macroexpand-1 '(injest.path/+> m "key"))] (is (= '(m "key") expanded)))) (testing "+> with nil expands to (x nil)" (let [expanded (macroexpand-1 '(injest.path/+> m nil))] (is (= '(m nil) expanded)))) (testing "+> with a boolean expands to (x bool)" (let [expanded (macroexpand-1 '(injest.path/+> m true))] (is (= '(m true) expanded))))) (deftest path-thread-first-multi-form-expansion (testing "+> with two forms threads sequentially" (let [expanded (macroexpand-1 '(injest.path/+> x :a :b))] (is (= '(:b (:a x)) expanded)))) (testing "+> with three forms threads sequentially" (let [expanded (macroexpand-1 '(injest.path/+> x :a :b :c))] (is (= '(:c (:b (:a x))) expanded)))) (testing "+> with mixed path types" (let [expanded (macroexpand-1 '(injest.path/+> m :a 0))] ;; :a applied first, then integer 0 (is (expansion-contains? expanded :a)) (is (expansion-contains? expanded 0)) (is (expansion-contains? expanded 'injest.impl/get-or-nth)))) (testing "+> with function call threads x as first arg" (let [expanded (macroexpand-1 '(injest.path/+> x (assoc :a 1) (dissoc :b)))] ;; Should be (dissoc (assoc x :a 1) :b) (is (= '(dissoc (assoc x :a 1) :b) expanded))))) ;; =================================================================== ;; 2. injest.path/+>> — path-aware thread-last, no transducers ;; =================================================================== (deftest path-thread-last-basic-expansion (testing "+>> with a keyword expands to (keyword x)" (let [expanded (macroexpand-1 '(injest.path/+>> m :a))] (is (= '(:a m) expanded)))) (testing "+>> with a function call threads x as last arg" (let [expanded (macroexpand-1 '(injest.path/+>> x (conj 1)))] (is (= '(conj 1 x) expanded)))) (testing "+>> with an integer expands to get-or-nth" (let [expanded (macroexpand-1 '(injest.path/+>> v 2))] (is (expansion-contains? expanded 'injest.impl/get-or-nth)))) (testing "+>> with a string expands to (x string)" (let [expanded (macroexpand-1 '(injest.path/+>> m "key"))] (is (= '(m "key") expanded)))) (testing "+>> with nil expands to (x nil)" (let [expanded (macroexpand-1 '(injest.path/+>> m nil))] (is (= '(m nil) expanded)))) (testing "+>> with a boolean expands to (x bool)" (let [expanded (macroexpand-1 '(injest.path/+>> m false))] (is (= '(m false) expanded))))) (deftest path-thread-last-multi-form-expansion (testing "+>> with two function calls threads as last arg" (let [expanded (macroexpand-1 '(injest.path/+>> x (conj 1) (conj 2)))] (is (= '(conj 2 (conj 1 x)) expanded)))) (testing "+>> thread-first vs thread-last difference" ;; The key difference: +> puts x first, +>> puts x last (let [first-expanded (macroexpand-1 '(injest.path/+> x (f a b))) last-expanded (macroexpand-1 '(injest.path/+>> x (f a b)))] (is (= '(f x a b) first-expanded)) (is (= '(f a b x) last-expanded))))) ;; =================================================================== ;; 3. Protected functions (fn, fn*, partial) — NOT threaded ;; =================================================================== (deftest protected-fn-expansion (testing "+> with fn wraps as (fn x) not (fn x ...)" (let [expanded (macroexpand-1 '(injest.path/+> 5 (fn [x] (inc x))))] ;; fn is in protected-fns, so form goes to :else branch: (form x) (is (= (list '(fn [x] (inc x)) 5) expanded)))) (testing "+> with partial wraps correctly" (let [expanded (macroexpand-1 '(injest.path/+> 5 (partial inc)))] (is (= (list '(partial inc) 5) expanded)))) (testing "+>> protected fns behave same as +> (no threading direction)" (let [expanded (macroexpand-1 '(injest.path/+>> 5 (fn [x] (inc x))))] (is (= (list '(fn [x] (inc x)) 5) expanded))))) ;; =================================================================== ;; 4. injest.path/x> — transducer-aware path thread-first ;; =================================================================== (deftest path-x-thread-first-expansion (testing "x> without transducers expands like +>" (let [expanded (macroexpand-1 '(injest.path/x> x :a :b))] ;; x> delegates to +> after pre-transducify-thread ;; Non-transducer forms pass through unchanged (is (expansion-contains? expanded 'injest.path/+>)))) (testing "x> with a transducer wraps it via xfn" (let [expanded (macroexpand-1 '(injest.path/x> coll (map inc) (filter odd?)))] ;; Should contain +> and a reference to xfn for the transducer group (is (expansion-contains? expanded 'injest.path/+>)) (is (expansion-contains? expanded 'injest.impl/xfn)))) (testing "x> with mixed transducer and non-transducer forms" (let [expanded (macroexpand-1 '(injest.path/x> data (map inc) :a))] (is (expansion-contains? expanded 'injest.path/+>)) (is (expansion-contains? expanded 'injest.impl/xfn)) (is (expansion-contains? expanded :a))))) ;; =================================================================== ;; 5. injest.path/x>> — transducer-aware path thread-last ;; =================================================================== (deftest path-x-thread-last-expansion (testing "x>> without transducers expands like +>>" (let [expanded (macroexpand-1 '(injest.path/x>> x :a :b))] (is (expansion-contains? expanded 'injest.path/+>>)))) (testing "x>> with transducers wraps them via xfn" (let [expanded (macroexpand-1 '(injest.path/x>> coll (map inc) (filter odd?)))] (is (expansion-contains? expanded 'injest.path/+>>)) (is (expansion-contains? expanded 'injest.impl/xfn)))) (testing "x>> with transducers then non-transducer" (let [expanded (macroexpand-1 '(injest.path/x>> [1 2 3] (map inc) (apply +)))] (is (expansion-contains? expanded 'injest.path/+>>)) (is (expansion-contains? expanded 'injest.impl/xfn)) ;; apply is not a transducer, should remain as-is (is (expansion-contains? expanded 'clojure.core/apply))))) ;; =================================================================== ;; 6. injest.path/|> — pipeline parallel, thread-first ;; =================================================================== (deftest path-pipeline-thread-first-expansion (testing "|> with stateless transducers uses pipeline-xfn" (let [expanded (macroexpand-1 '(injest.path/|> coll (map inc) (filter odd?)))] ;; |> pre-transducifies with pipeline-xfn for par-transducable? forms ;; then delegates to x> which uses xfn for remaining (is (expansion-contains? expanded 'injest.path/x>)))) (testing "|> without transducers falls through" (let [expanded (macroexpand-1 '(injest.path/|> x :a :b))] (is (expansion-contains? expanded 'injest.path/x>))))) ;; =================================================================== ;; 7. injest.path/|>> — pipeline parallel, thread-last ;; =================================================================== (deftest path-pipeline-thread-last-expansion (testing "|>> with stateless transducers" (let [expanded (macroexpand-1 '(injest.path/|>> coll (map inc) (filter odd?)))] (is (expansion-contains? expanded 'injest.path/x>>)))) (testing "|>> without transducers" (let [expanded (macroexpand-1 '(injest.path/|>> x :a :b))] (is (expansion-contains? expanded 'injest.path/x>>))))) ;; =================================================================== ;; 8. injest.path/=> — fold parallel, thread-first ;; =================================================================== (deftest path-fold-thread-first-expansion (testing "=> with stateless transducers uses fold-xfn" (let [expanded (macroexpand-1 '(injest.path/=> coll (map inc) (filter odd?)))] (is (expansion-contains? expanded 'injest.path/x>)))) (testing "=> without transducers" (let [expanded (macroexpand-1 '(injest.path/=> x :a :b))] (is (expansion-contains? expanded 'injest.path/x>))))) ;; =================================================================== ;; 9. injest.path/=>> — fold parallel, thread-last ;; =================================================================== (deftest path-fold-thread-last-expansion (testing "=>> with stateless transducers" (let [expanded (macroexpand-1 '(injest.path/=>> coll (map inc) (filter odd?)))] (is (expansion-contains? expanded 'injest.path/x>>)))) (testing "=>> without transducers" (let [expanded (macroexpand-1 '(injest.path/=>> x :a :b))] (is (expansion-contains? expanded 'injest.path/x>>))))) ;; =================================================================== ;; 10. injest.classical/x> — classical transducer thread-first ;; =================================================================== (deftest classical-x-thread-first-expansion (testing "classical x> expands to -> (not +>)" (let [expanded (macroexpand-1 '(injest.classical/x> x (map inc)))] ;; Classical x> uses clojure.core/-> not injest.path/+> (is (not (expansion-contains? expanded 'injest.path/+>))) (is (expansion-contains? expanded 'clojure.core/->)))) (testing "classical x> with non-transducers passes through to ->" (let [expanded (macroexpand-1 '(injest.classical/x> x (inc) (dec)))] (is (expansion-contains? expanded 'clojure.core/->))))) ;; =================================================================== ;; 11. injest.classical/x>> — classical transducer thread-last ;; =================================================================== (deftest classical-x-thread-last-expansion (testing "classical x>> expands to ->> (not +>>)" (let [expanded (macroexpand-1 '(injest.classical/x>> coll (map inc)))] (is (not (expansion-contains? expanded 'injest.path/+>>))) (is (expansion-contains? expanded 'clojure.core/->>)))) (testing "classical x>> with non-transducers passes through to ->>" (let [expanded (macroexpand-1 '(injest.classical/x>> x (conj 1) (conj 2)))] (is (expansion-contains? expanded 'clojure.core/->>))))) ;; =================================================================== ;; 12. injest.classical parallel variants ;; =================================================================== (deftest classical-pipeline-expansion (testing "classical |> delegates to classical x>" (let [expanded (macroexpand-1 '(injest.classical/|> coll (map inc)))] (is (expansion-contains? expanded 'injest.classical/x>)))) (testing "classical |>> delegates to classical x>>" (let [expanded (macroexpand-1 '(injest.classical/|>> coll (map inc)))] (is (expansion-contains? expanded 'injest.classical/x>>))))) (deftest classical-fold-expansion (testing "classical => delegates to classical x>" (let [expanded (macroexpand-1 '(injest.classical/=> coll (map inc)))] (is (expansion-contains? expanded 'injest.classical/x>)))) (testing "classical =>> delegates to classical x>>" (let [expanded (macroexpand-1 '(injest.classical/=>> coll (map inc)))] (is (expansion-contains? expanded 'injest.classical/x>>))))) ;; =================================================================== ;; 13. Empty and single-form expansion ;; =================================================================== (deftest empty-thread-expansion (testing "+> with no forms returns x" (is (= 'x (macroexpand-1 '(injest.path/+> x))))) (testing "+>> with no forms returns x" (is (= 'x (macroexpand-1 '(injest.path/+>> x)))))) (deftest single-form-expansion (testing "+> with single keyword" (is (= '(:a x) (macroexpand-1 '(injest.path/+> x :a))))) (testing "+>> with single keyword" (is (= '(:a x) (macroexpand-1 '(injest.path/+>> x :a))))) (testing "+> with single fn call" (is (= '(inc x) (macroexpand-1 '(injest.path/+> x (inc)))))) (testing "+>> with single fn call" (is (= '(inc x) (macroexpand-1 '(injest.path/+>> x (inc))))))) ;; =================================================================== ;; 14. Classical vs Path expansion structure comparison ;; =================================================================== (deftest classical-vs-path-expansion-structure (testing "classical x>> uses ->> while path x>> uses +>>" (let [classical (macroexpand-1 '(injest.classical/x>> coll (map inc) (filter odd?))) path (macroexpand-1 '(injest.path/x>> coll (map inc) (filter odd?)))] (is (expansion-contains? classical 'clojure.core/->>)) (is (not (expansion-contains? classical 'injest.path/+>>))) (is (expansion-contains? path 'injest.path/+>>)) (is (not (expansion-contains? path 'clojure.core/->>))))) (testing "classical x> uses -> while path x> uses +>" (let [classical (macroexpand-1 '(injest.classical/x> coll (map inc))) path (macroexpand-1 '(injest.path/x> coll (map inc)))] (is (expansion-contains? classical 'clojure.core/->)) (is (expansion-contains? path 'injest.path/+>))))) ;; =================================================================== ;; 15. Transducer grouping in expansion ;; =================================================================== (deftest transducer-grouping-expansion (testing "consecutive transducers are grouped together" (let [expanded (macroexpand-1 '(injest.path/x>> data (map inc) (filter odd?) (map str)))] ;; All three transducers should be composed into one xfn call (is (expansion-contains? expanded 'injest.impl/xfn)) (is (expansion-contains? expanded 'injest.path/+>>)))) (testing "non-transducer breaks grouping" (let [expanded (macroexpand-1 '(injest.path/x>> data (map inc) (apply +) (map str)))] ;; apply breaks the chain — should see xfn mentioned ;; for each separate group (is (expansion-contains? expanded 'injest.impl/xfn)) (is (expansion-contains? expanded 'clojure.core/apply))))) ================================================ FILE: test/injest/parallelism_test.cljc ================================================ (ns injest.parallelism-test "Tests for parallel execution variants: |>, |>>, =>, =>> Verifies correctness, order preservation, and equivalence to sequential." (:require #?(:clj [clojure.test :refer [deftest testing is]] :cljs [cljs.test :refer-macros [deftest testing is]]) [injest.path :as p] [injest.classical :as c])) ;; =================================================================== ;; |>> — pipeline parallel, thread-last (path) ;; =================================================================== (deftest path-pipeline-thread-last-correctness (testing "|>> with map on small input" (is (= [2 3 4 5 6] (vec (p/|>> [1 2 3 4 5] (map inc)))))) (testing "|>> with filter" (is (= [2 4 6 8 10] (vec (p/|>> (range 1 11) (filter even?)))))) (testing "|>> with map and filter composed" (is (= [3 5 7 9] (vec (p/|>> (range 1 10) (map inc) (filter odd?)))))) (testing "|>> with mapcat" (is (= [1 1 2 2 3 3] (vec (p/|>> [1 2 3] (mapcat #(vector % %))))))) (testing "|>> with apply at the end" (is (= 15 (p/|>> [1 2 3 4 5] (map identity) (apply +)))))) ;; =================================================================== ;; |> — pipeline parallel, thread-first (path) ;; =================================================================== (deftest path-pipeline-thread-first-correctness (testing "|> with map" (is (= [2 3 4] (vec (p/|> [1 2 3] (map inc)))))) (testing "|> with filter" (is (= [1 3 5] (vec (p/|> [1 2 3 4 5] (filter odd?)))))) (testing "|> mixed transducers and path" (is (= [2 3 4] (vec (p/|> {:data [1 2 3]} :data (map inc))))))) ;; =================================================================== ;; =>> — fold parallel, thread-last (path) ;; =================================================================== (deftest path-fold-thread-last-correctness (testing "=>> with map on small input" (is (= [2 3 4 5 6] (vec (p/=>> [1 2 3 4 5] (map inc)))))) (testing "=>> with filter" (is (= [2 4 6 8 10] (vec (p/=>> (range 1 11) (filter even?)))))) (testing "=>> with map and filter composed" (is (= [3 5 7 9] (vec (p/=>> (range 1 10) (map inc) (filter odd?)))))) (testing "=>> with apply at the end" (is (= 15 (p/=>> [1 2 3 4 5] (map identity) (apply +)))))) ;; =================================================================== ;; => — fold parallel, thread-first (path) ;; =================================================================== (deftest path-fold-thread-first-correctness (testing "=> with map" (is (= [2 3 4] (vec (p/=> [1 2 3] (map inc)))))) (testing "=> with filter" (is (= [1 3 5] (vec (p/=> [1 2 3 4 5] (filter odd?)))))) (testing "=> mixed transducers and path" (is (= [2 3 4] (vec (p/=> {:data [1 2 3]} :data (map inc))))))) ;; =================================================================== ;; Order preservation ;; =================================================================== (deftest order-preservation (testing "|>> preserves input order" (let [input (range 1000) result (vec (p/|>> input (map inc)))] (is (= (mapv inc input) result)))) (testing "=>> preserves input order" (let [input (range 1000) result (vec (p/=>> input (map inc)))] (is (= (mapv inc input) result)))) (testing "|> preserves input order" (let [input (vec (range 1000)) result (vec (p/|> input (map inc)))] (is (= (mapv inc input) result)))) (testing "=> preserves input order" (let [input (vec (range 1000)) result (vec (p/=> input (map inc)))] (is (= (mapv inc input) result))))) ;; =================================================================== ;; Classical parallel variants ;; =================================================================== (deftest classical-pipeline-correctness (testing "classical |>> with map" (is (= [2 3 4] (vec (c/|>> [1 2 3] (map inc)))))) (testing "classical |>> with filter" (is (= [2 4] (vec (c/|>> [1 2 3 4 5] (filter even?)))))) (testing "classical |> with map" (is (= [2 3 4] (vec (c/|> [1 2 3] (map inc))))))) (deftest classical-fold-correctness (testing "classical =>> with map" (is (= [2 3 4] (vec (c/=>> [1 2 3] (map inc)))))) (testing "classical =>> with filter" (is (= [2 4] (vec (c/=>> [1 2 3 4 5] (filter even?)))))) (testing "classical => with map" (is (= [2 3 4] (vec (c/=> [1 2 3] (map inc))))))) ;; =================================================================== ;; Larger parallel workloads ;; =================================================================== (deftest larger-parallel-workloads (testing "|>> handles 100k elements correctly" (let [expected (->> (range 100000) (map inc) (filter even?) vec)] (is (= expected (vec (p/|>> (range 100000) (map inc) (filter even?))))))) (testing "=>> handles 100k elements correctly" (let [expected (->> (range 100000) (map inc) (filter even?) vec)] (is (= expected (vec (p/=>> (range 100000) (map inc) (filter even?))))))) (testing "all parallel variants agree on large input" (let [input (range 10000) expected (vec (->> input (map inc) (filter odd?)))] (is (= expected (vec (p/x>> input (map inc) (filter odd?))))) (is (= expected (vec (p/|>> input (map inc) (filter odd?))))) (is (= expected (vec (p/=>> input (map inc) (filter odd?)))))))) ;; =================================================================== ;; Stateful transducers in parallel context ;; =================================================================== (deftest stateful-transducers-in-parallel (testing "|>> with partition-all (stateful, cannot be pipelined separately)" (is (= [[1 2] [3 4] [5]] (vec (p/|>> [1 2 3 4 5] (partition-all 2)))))) (testing "=>> with partition-all (stateful, cannot be folded separately)" (is (= [[1 2] [3 4] [5]] (vec (p/=>> [1 2 3 4 5] (partition-all 2)))))) (testing "|>> with take (stateful)" (is (= [0 1 2] (vec (p/|>> (range 100) (take 3)))))) (testing "=>> with take (stateful)" (is (= [0 1 2] (vec (p/=>> (range 100) (take 3)))))) (testing "|>> with mixed stateless and stateful" (is (= [[2 3] [4 5] [6]] (vec (p/|>> [1 2 3 4 5] (map inc) (partition-all 2)))))) (testing "=>> with mixed stateless and stateful" (is (= [[2 3] [4 5] [6]] (vec (p/=>> [1 2 3 4 5] (map inc) (partition-all 2))))))) ================================================ FILE: test/injest/path_test.cljc ================================================ (ns injest.path-test (:require #?(:clj [clojure.test :refer [deftest testing is]] :cljs [cljs.test :refer-macros [deftest testing is]]) [injest.path :as p])) (deftest readme-example (testing "example from the readme" (is (= 5000054999994 (p/x>> (range 10000000) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +)))))) (deftest lookup-value-by-integer-key-in-map (testing "Get value from map by integer key" (is (= :b (p/+> {0 :a 2 :b} 2))) (is (= :b (p/+>> {0 :a 2 :b} 2))) (is (= :b (p/x> {0 :a 2 :b} 2))) (is (= :b (p/x>> {0 :a 2 :b} 2))) (is (= :b (p/=> {0 :a 2 :b} 2))) (is (= :b (p/=>> {0 :a 2 :b} 2))))) (deftest index-into-vector (testing "Get value of index in vector" (is (= 5 (p/+> [0 2 5] 2))) (is (= 5 (p/+>> [0 2 5] 2))) (is (= 5 (p/x> [0 2 5] 2))) (is (= 5 (p/x>> [0 2 5] 2))) (is (= 5 (p/=> [0 2 5] 2))) (is (= 5 (p/=>> [0 2 5] 2))))) (deftest index-into-sequence (testing "Get value of index in sequence" (is (= 5 (p/+> '(0 2 5) 2))) (is (= 5 (p/+>> '(0 2 5) 2))) (is (= 5 (p/x> '(0 2 5) 2))) (is (= 5 (p/x>> '(0 2 5) 2))) (is (= 5 (p/=> '(0 2 5) 2))) (is (= 5 (p/=>> '(0 2 5) 2))))) (deftest lookup-key-by-string-in-map (testing "Get value by string key" (is (= 5 (p/+> {0 :a "s" 5} "s"))) (is (= 5 (p/+>> {0 :a "s" 5} "s"))) (is (= 5 (p/x> {0 :a "s" 5} "s"))) (is (= 5 (p/x>> {0 :a "s" 5} "s"))) (is (= 5 (p/=> {0 :a "s" 5} "s"))) (is (= 5 (p/=>> {0 :a "s" 5} "s"))))) (deftest lookup-key-by-key-in-map (testing "Get value by keyword key" (is (= 5 (p/+> {0 :a :k 5} :k))) (is (= 5 (p/+>> {0 :a :k 5} :k))) (is (= 5 (p/x> {0 :a :k 5} :k))) (is (= 5 (p/x>> {0 :a :k 5} :k))) (is (= 5 (p/=> {0 :a :k 5} :k))) (is (= 5 (p/=>> {0 :a :k 5} :k))))) (deftest lookup-key-by-nil-in-map (testing "Get value by nil key" (is (= 5 (p/+> {0 :a nil 5} nil))) (is (= 5 (p/+>> {0 :a nil 5} nil))) (is (= 5 (p/x> {0 :a nil 5} nil))) (is (= 5 (p/x>> {0 :a nil 5} nil))) (is (= 5 (p/=> {0 :a nil 5} nil))) (is (= 5 (p/=>> {0 :a nil 5} nil))))) (deftest lookup-key-by-boolean-in-map (testing "Get value by boolean key" (is (= 5 (p/+> {0 :a true 5} true))) (is (= 5 (p/+>> {0 :a true 5} true))) (is (= 5 (p/x> {0 :a true 5} true))) (is (= 5 (p/x>> {0 :a true 5} true))) (is (= 5 (p/=> {0 :a true 5} true))) (is (= 5 (p/=>> {0 :a true 5} true))))) (deftest lamda-wrapping (testing "wrap lambdas" (is (= 8 (p/+> 1 #(- 10 (+ % 1))))) (is (= 8 (p/+>> 1 #(- 10 (+ % 1))))) (is (= 8 (p/x> 1 #(- 10 (+ % 1))))) (is (= 8 (p/x>> 1 #(- 10 (+ % 1))))) (is (= 8 (p/=> 1 #(- 10 (+ % 1))))) (is (= 8 (p/=>> 1 #(- 10 (+ % 1))))))) (deftest all-thread-features (testing "test all the path features at once" (is (= "hi bob!" (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c {true {nil :bob}}}]}])}] (p/+> m 1 2 "b" 2 :c true nil name #(str "hi " % "!"))))) (is (= "hi bob!" (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c {true {nil :bob}}}]}])}] (p/+>> m 1 2 "b" 2 :c true nil name #(str "hi " % "!"))))) (is (= "hi bob!" (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c {true {nil :bob}}}]}])}] (p/x> m 1 2 "b" 2 :c true nil name #(str "hi " % "!"))))) (is (= "hi bob!" (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c {true {nil :bob}}}]}])}] (p/x>> m 1 2 "b" 2 :c true nil name #(str "hi " % "!"))))) (is (= "hi bob!" (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c {true {nil :bob}}}]}])}] (p/=> m 1 2 "b" 2 :c true nil name #(str "hi " % "!"))))) (is (= "hi bob!" (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c {true {nil :bob}}}]}])}] (p/=>> m 1 2 "b" 2 :c true nil name #(str "hi " % "!"))))))) (deftest thread-last-transducers (testing "exercise thread-last macros" (is (= 1044 (p/+>> (range 100) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +)))) (is (= 1044 (p/x>> (range 100) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +)))) (is (= 1044 (p/=>> (range 100) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +)))))) ;; =================================================================== ;; Thread-first transducer tests ;; =================================================================== (deftest thread-first-transducers (testing "x> with map transducer" (is (= [2 3 4] (vec (p/x> [1 2 3] (map inc)))))) (testing "x> with map and filter" (is (= [2 4] (vec (p/x> [1 2 3 4] (map inc) (filter even?)))))) (testing "x> with transducer and vec" (is (= [2 4] (p/x> [1 2 3 4] (map inc) (filter even?) vec)))) (testing "+> thread-first semantics with multi-arg fn" (is (= {:a 1 :b 2} (p/+> {:a 1} (assoc :b 2)))) (is (= {:a 1 :b 2 :c 3} (p/+> {:a 1} (assoc :b 2) (assoc :c 3))))) (testing "+> with conj threads as first arg" (is (= [1 2 3] (p/+> [1 2] (conj 3)))))) ;; =================================================================== ;; Thread-last semantics tests ;; =================================================================== (deftest thread-last-semantics (testing "+>> threads as last arg" (is (= [1 2 3 99] (p/+>> 99 (conj [1 2 3]))))) (testing "+>> with range" (is (= '(0 1 2) (p/+>> 3 (range))))) (testing "+>> with str" (is (= "hello world" (p/+>> " world" (str "hello")))))) ;; =================================================================== ;; Pipeline |> and |>> tests ;; =================================================================== (deftest pipeline-parallel-macros (testing "|>> with stateless transducers" (is (= [2 3 4] (vec (p/|>> [1 2 3] (map inc)))))) (testing "|>> with map and filter" (is (= [2 4] (vec (p/|>> [1 2 3 4] (map inc) (filter even?)))))) (testing "|>> full pipeline matches x>>" (is (= 1044 (p/|>> (range 100) (map inc) (filter odd?) (mapcat #(do [% (dec %)])) (partition-by #(= 0 (mod % 5))) (map (partial apply +)) (map (partial + 10)) (map #(do {:temp-value %})) (map :temp-value) (filter even?) (apply +))))) (testing "|> with map" (is (= [2 3 4] (vec (p/|> [1 2 3] (map inc))))))) ;; =================================================================== ;; Real-world data patterns ;; =================================================================== (deftest json-like-data-navigation (testing "navigating JSON-like nested data" (let [api-response {:status 200 :body {:users [{:name "Alice" :age 30} {:name "Bob" :age 25} {:name "Carol" :age 35}]}}] (is (= "Bob" (p/+> api-response :body :users 1 :name))) (is (= "Bob" (p/+>> api-response :body :users 1 :name))) (is (= 35 (p/+> api-response :body :users 2 :age)))))) (deftest config-tree-navigation (testing "navigating config-like nested data" (let [config {:db {:host "localhost" :port 5432 :pools {"main" {:size 10} "read" {:size 20}}}}] (is (= 5432 (p/+> config :db :port))) (is (= 10 (p/+> config :db :pools "main" :size))) (is (= 20 (p/+>> config :db :pools "read" :size)))))) (deftest transducer-pipeline-on-data (testing "extract and transform from nested data" (let [data {:items [{:price 10 :qty 2} {:price 20 :qty 1} {:price 5 :qty 4}]}] (is (= 60 (p/+> data :items (p/x>> (map #(* (:price %) (:qty %))) (apply +))))))) (testing "filter and aggregate" (is (= [2 4 6 8 10] (vec (p/x>> (range 1 11) (filter even?)))))) (testing "multi-step transducer composition" (is (= ["2" "4" "6"] (vec (p/x>> [1 2 3 4 5 6] (filter even?) (map str))))))) ================================================ FILE: test/injest/path_unit_test.cljc ================================================ (ns injest.path-unit-test "Unit tests for the path navigation primitives: path->, path->>, get-or-nth." (:require #?(:clj [clojure.test :refer [deftest testing is]] :cljs [cljs.test :refer-macros [deftest testing is]]) [injest.impl :as i])) ;; =================================================================== ;; get-or-nth ;; =================================================================== (deftest get-or-nth-on-maps (testing "integer key in map" (is (= :b (i/get-or-nth {0 :a 2 :b} 2)))) (testing "nil key in map" (is (= 2 (i/get-or-nth {0 :a nil 2} nil)))) (testing "false key in map" (is (= 2 (i/get-or-nth {0 :a false 2} false)))) (testing "true key in map" (is (= :yes (i/get-or-nth {true :yes false :no} true)))) (testing "string key in map" (is (= :val (i/get-or-nth {"key" :val} "key")))) (testing "missing key returns nil" (is (nil? (i/get-or-nth {0 :a 1 :b} 5))))) (deftest get-or-nth-on-vectors (testing "valid index" (is (= :c (i/get-or-nth [:a :b :c] 2)))) (testing "first element" (is (= :a (i/get-or-nth [:a :b :c] 0)))) (testing "last element" (is (= :c (i/get-or-nth [:a :b :c] 2)))) (testing "nested vector" (is (= [3 4] (i/get-or-nth [[1 2] [3 4]] 1))))) (deftest get-or-nth-on-sequences (testing "list indexing" (is (= 3 (i/get-or-nth '(1 2 3) 2)))) (testing "lazy seq indexing" (is (= 5 (i/get-or-nth (range 10) 5)))) (testing "first of sequence" (is (= 0 (i/get-or-nth (range 10) 0))))) ;; =================================================================== ;; path-> (thread-first path primitive) ;; =================================================================== (deftest path-thread-first-fn-call (testing "seq form threads x as first arg" (let [result (i/path-> '(f a b) 'x)] (is (= '(f x a b) result)))) (testing "single-element seq form threads x as only arg" (let [result (i/path-> '(inc) 'x)] (is (= '(inc x) result))))) (deftest path-thread-first-string (testing "string form becomes (x string)" (let [result (i/path-> "key" 'x)] (is (= '(x "key") result))))) (deftest path-thread-first-nil (testing "nil form becomes (x nil)" (let [result (i/path-> nil 'x)] (is (= '(x nil) result))))) (deftest path-thread-first-boolean (testing "true form becomes (x true)" (let [result (i/path-> true 'x)] (is (= '(x true) result)))) (testing "false form becomes (x false)" (let [result (i/path-> false 'x)] (is (= '(x false) result))))) (deftest path-thread-first-integer (testing "integer form becomes (get-or-nth x int)" (let [result (i/path-> 2 'x)] (is (= (list 'injest.impl/get-or-nth 'x 2) result))))) (deftest path-thread-first-keyword (testing "keyword form becomes (keyword x)" (let [result (i/path-> :a 'x)] (is (= '(:a x) result))))) (deftest path-thread-first-symbol (testing "bare symbol form becomes (symbol x)" (let [result (i/path-> 'inc 'x)] (is (= '(inc x) result))))) (deftest path-thread-first-protected-fns (testing "fn form is NOT threaded into" (let [result (i/path-> '(fn [y] (inc y)) 'x)] (is (= (list '(fn [y] (inc y)) 'x) result)))) (testing "partial form is NOT threaded into" (let [result (i/path-> '(partial inc) 'x)] (is (= (list '(partial inc) 'x) result))))) ;; =================================================================== ;; path->> (thread-last path primitive) ;; =================================================================== (deftest path-thread-last-fn-call (testing "seq form threads x as last arg" (let [result (i/path->> '(f a b) 'x)] (is (= '(f a b x) result)))) (testing "single-element seq form threads x as only arg" (let [result (i/path->> '(inc) 'x)] (is (= '(inc x) result))))) (deftest path-thread-last-string (testing "string form becomes (x string) — same as thread-first" (let [result (i/path->> "key" 'x)] (is (= '(x "key") result))))) (deftest path-thread-last-nil (testing "nil form becomes (x nil) — same as thread-first" (let [result (i/path->> nil 'x)] (is (= '(x nil) result))))) (deftest path-thread-last-boolean (testing "boolean forms same as thread-first" (is (= '(x true) (i/path->> true 'x))) (is (= '(x false) (i/path->> false 'x))))) (deftest path-thread-last-integer (testing "integer form becomes (get-or-nth x int) — same as thread-first" (let [result (i/path->> 2 'x)] (is (= (list 'injest.impl/get-or-nth 'x 2) result))))) (deftest path-thread-last-keyword (testing "keyword form becomes (keyword x)" (let [result (i/path->> :a 'x)] (is (= '(:a x) result))))) ;; =================================================================== ;; path-> vs path->> difference ;; =================================================================== (deftest path-thread-direction-difference (testing "multi-arg fn call: -> threads first, ->> threads last" (is (= '(f x a b) (i/path-> '(f a b) 'x))) (is (= '(f a b x) (i/path->> '(f a b) 'x)))) (testing "for non-seq forms (string, nil, bool, int), both directions are identical" (is (= (i/path-> "key" 'x) (i/path->> "key" 'x))) (is (= (i/path-> nil 'x) (i/path->> nil 'x))) (is (= (i/path-> true 'x) (i/path->> true 'x))) (is (= (i/path-> 2 'x) (i/path->> 2 'x))) (is (= (i/path-> :a 'x) (i/path->> :a 'x))))) ================================================ FILE: test/injest/transducer_detection_test.cljc ================================================ (ns injest.transducer-detection-test "Unit tests for transducer detection, registration, and thread grouping." (:require #?(:clj [clojure.test :refer [deftest testing is]] :cljs [cljs.test :refer-macros [deftest testing is]]) [injest.impl :as i] [injest.state :as s] [injest.data :as d])) ;; =================================================================== ;; transducable? predicate ;; =================================================================== (deftest transducable?-positive-cases (testing "standard transducers are detected" (is (i/transducable? '(clojure.core/map inc))) (is (i/transducable? '(clojure.core/filter odd?))) (is (i/transducable? '(clojure.core/mapcat identity))) (is (i/transducable? '(clojure.core/keep identity))) (is (i/transducable? '(clojure.core/remove nil?)))) (testing "stateful transducers are also transducable" (is (i/transducable? '(clojure.core/partition-by identity))) (is (i/transducable? '(clojure.core/partition-all 3))) (is (i/transducable? '(clojure.core/take 5))) (is (i/transducable? '(clojure.core/drop 2))) (is (i/transducable? '(clojure.core/distinct))) (is (i/transducable? '(clojure.core/dedupe)))) (testing "cat is transducable (special case)" (is (i/transducable? cat)))) (deftest transducable?-negative-cases (testing "non-transducer functions are not detected" (is (not (i/transducable? '(clojure.core/apply +)))) (is (not (i/transducable? '(clojure.core/reduce +)))) (is (not (i/transducable? '(clojure.core/into []))))) (testing "non-sequential forms are not transducable" (is (not (i/transducable? :keyword))) (is (not (i/transducable? 42))) (is (not (i/transducable? "string"))) (is (not (i/transducable? 'symbol)))) (testing "nil is not transducable" (is (not (i/transducable? nil))))) ;; =================================================================== ;; par-transducable? predicate ;; =================================================================== (deftest par-transducable?-positive-cases (testing "stateless transducers are par-transducable" (is (i/par-transducable? '(clojure.core/map inc))) (is (i/par-transducable? '(clojure.core/filter odd?))) (is (i/par-transducable? '(clojure.core/mapcat identity))) (is (i/par-transducable? '(clojure.core/keep identity))) (is (i/par-transducable? '(clojure.core/remove nil?))) (is (i/par-transducable? '(clojure.core/dedupe)))) (testing "cat is par-transducable (special case)" (is (i/par-transducable? cat)))) (deftest par-transducable?-negative-cases (testing "stateful transducers are NOT par-transducable" (is (not (i/par-transducable? '(clojure.core/partition-by identity)))) (is (not (i/par-transducable? '(clojure.core/partition-all 3)))) (is (not (i/par-transducable? '(clojure.core/take 5)))) (is (not (i/par-transducable? '(clojure.core/drop 2)))) (is (not (i/par-transducable? '(clojure.core/take-nth 3)))) (is (not (i/par-transducable? '(clojure.core/drop-while pos?))))) (testing "non-transducer functions are not par-transducable" (is (not (i/par-transducable? '(clojure.core/apply +)))) (is (not (i/par-transducable? '(clojure.core/reduce +)))))) ;; =================================================================== ;; Registration sets ;; =================================================================== (deftest default-registrations (testing "def-regs is a superset of par-regs" (is (every? (fn [sym] (contains? d/def-regs sym)) d/par-regs))) (testing "def-regs contains stateful transducers not in par-regs" (is (contains? d/def-regs 'clojure.core/partition-by)) (is (contains? d/def-regs 'clojure.core/partition-all)) (is (contains? d/def-regs 'clojure.core/take)) (is (contains? d/def-regs 'clojure.core/drop)) (is (not (contains? d/par-regs 'clojure.core/partition-by))) (is (not (contains? d/par-regs 'clojure.core/partition-all))) (is (not (contains? d/par-regs 'clojure.core/take))) (is (not (contains? d/par-regs 'clojure.core/drop)))) (testing "atoms are populated from data" (is (contains? @s/transducables 'clojure.core/map)) (is (contains? @s/transducables 'clojure.core/filter)) (is (contains? @s/transducables 'clojure.core/partition-by)) (is (contains? @s/par-transducables 'clojure.core/map)) (is (contains? @s/par-transducables 'clojure.core/filter)) (is (not (contains? @s/par-transducables 'clojure.core/partition-by))))) (deftest custom-registration (testing "regxf! adds to transducables atom" (let [before (contains? @s/transducables 'my.ns/custom-xf)] (is (not before)) (s/regxf! 'my.ns/custom-xf) (is (contains? @s/transducables 'my.ns/custom-xf)) ;; cleanup (swap! s/transducables disj 'my.ns/custom-xf))) (testing "regpxf! adds to par-transducables atom" (let [before (contains? @s/par-transducables 'my.ns/custom-pxf)] (is (not before)) (s/regpxf! 'my.ns/custom-pxf) (is (contains? @s/par-transducables 'my.ns/custom-pxf)) ;; cleanup (swap! s/par-transducables disj 'my.ns/custom-pxf)))) ;; =================================================================== ;; compose-transducer-group ;; =================================================================== (deftest compose-transducer-group-tests (testing "single transducer without args" (let [xf (i/compose-transducer-group [[map inc]])] (is (= [2 3 4] (into [] xf [1 2 3]))))) (testing "single transducer with args" (let [xf (i/compose-transducer-group [[filter odd?]])] (is (= [1 3 5] (into [] xf [1 2 3 4 5]))))) (testing "multiple transducers composed" (let [xf (i/compose-transducer-group [[map inc] [filter even?]])] (is (= [2 4 6] (into [] xf [1 2 3 4 5]))))) (testing "transducer with multiple args" (let [xf (i/compose-transducer-group [[partition-all 2]])] (is (= [[1 2] [3 4] [5]] (into [] xf [1 2 3 4 5])))))) ;; =================================================================== ;; xfn — sequential transducer executor ;; =================================================================== (deftest xfn-tests (testing "xfn with map" (let [f (i/xfn [[map inc]])] (is (= [2 3 4] (vec (f [1 2 3])))))) (testing "xfn with map and filter composed" (let [f (i/xfn [[map inc] [filter even?]])] (is (= [2 4] (vec (f [1 2 3 4])))))) (testing "xfn preserves order" (let [f (i/xfn [[map inc]])] (is (= (range 2 102) (vec (f (range 1 101)))))))) ;; =================================================================== ;; fold-xfn — parallel fold executor (falls back to xfn in CLJS) ;; =================================================================== (deftest fold-xfn-tests (testing "fold-xfn produces same results as xfn for map" (let [seq-f (i/xfn [[map inc]]) fold-f (i/fold-xfn [[map inc]])] (is (= (vec (seq-f (range 100))) (vec (fold-f (range 100))))))) (testing "fold-xfn produces same results as xfn for filter" (let [seq-f (i/xfn [[filter even?]]) fold-f (i/fold-xfn [[filter even?]])] (is (= (vec (seq-f (range 100))) (vec (fold-f (range 100))))))) (testing "fold-xfn with composed transducers" (let [seq-f (i/xfn [[map inc] [filter odd?]]) fold-f (i/fold-xfn [[map inc] [filter odd?]])] (is (= (vec (seq-f (range 100))) (vec (fold-f (range 100)))))))) ;; =================================================================== ;; pipeline-xfn — async pipeline executor (falls back to xfn in CLJS) ;; =================================================================== (deftest pipeline-xfn-tests (testing "pipeline-xfn produces same results as xfn for map" (let [seq-f (i/xfn [[map inc]]) pipeline-f (i/pipeline-xfn [[map inc]])] (is (= (vec (seq-f (range 100))) (vec (pipeline-f (range 100))))))) (testing "pipeline-xfn produces same results for filter" (let [seq-f (i/xfn [[filter even?]]) pipeline-f (i/pipeline-xfn [[filter even?]])] (is (= (vec (seq-f (range 100))) (vec (pipeline-f (range 100))))))) (testing "pipeline-xfn preserves order" (let [f (i/pipeline-xfn [[map inc]])] (is (= (range 1 101) (vec (f (range 100))))))))