Repository: morloc-project/morloc Branch: master Commit: cc330baae2e1 Files: 2281 Total size: 3.9 MB Directory structure: gitextract_3x8i50ut/ ├── .dockerignore ├── .github/ │ └── workflows/ │ ├── release.yml │ └── test.yml ├── .gitignore ├── ChangeLog.md ├── LICENSE ├── README.md ├── bench/ │ ├── Bench.hs │ └── test-data/ │ ├── complex-types.loc │ ├── interop.loc │ └── simple.loc ├── container/ │ ├── Makefile │ ├── full/ │ │ ├── Dockerfile │ │ └── assets/ │ │ ├── README │ │ ├── bashrc │ │ └── vimrc │ ├── static-build/ │ │ ├── Dockerfile │ │ └── build.sh │ ├── test/ │ │ ├── Dockerfile │ │ └── assets/ │ │ └── bashrc │ └── tiny/ │ └── Dockerfile ├── data/ │ ├── lang/ │ │ ├── c/ │ │ │ └── lang.yaml │ │ ├── cpp/ │ │ │ ├── cppmorloc.cpp │ │ │ ├── cppmorloc.hpp │ │ │ ├── init.sh │ │ │ ├── lang.yaml │ │ │ ├── mlc_arrow.hpp │ │ │ ├── mlc_tensor.hpp │ │ │ ├── morloc_pch.hpp │ │ │ ├── nanoarrow/ │ │ │ │ ├── nanoarrow.c │ │ │ │ └── nanoarrow.h │ │ │ └── pool.cpp │ │ ├── julia/ │ │ │ ├── MorlocRuntime.jl │ │ │ ├── init.sh │ │ │ ├── juliabridge.c │ │ │ ├── lang.yaml │ │ │ └── pool.jl │ │ ├── languages.yaml │ │ ├── py/ │ │ │ ├── Makefile │ │ │ ├── init.sh │ │ │ ├── lang.yaml │ │ │ ├── pool.py │ │ │ ├── pymorloc.c │ │ │ └── setup.py │ │ └── r/ │ │ ├── init.sh │ │ ├── lang.yaml │ │ ├── pool.R │ │ └── rmorloc.c │ ├── misc/ │ │ └── mlccpptypes.hpp │ ├── morloc/ │ │ └── morloc.h │ └── rust/ │ ├── .gitignore │ ├── Cargo.toml │ ├── morloc-manager/ │ │ ├── Cargo.toml │ │ └── src/ │ │ ├── config.rs │ │ ├── container.rs │ │ ├── doctor.rs │ │ ├── environment.rs │ │ ├── error.rs │ │ ├── freeze.rs │ │ ├── main.rs │ │ ├── selinux.rs │ │ ├── serve.rs │ │ └── types.rs │ ├── morloc-manifest/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── morloc-nexus/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ └── src/ │ │ ├── dispatch.rs │ │ ├── help.rs │ │ ├── main.rs │ │ ├── manifest.rs │ │ └── process.rs │ └── morloc-runtime/ │ ├── Cargo.toml │ ├── build.rs │ ├── cbindgen.toml │ └── src/ │ ├── arrow_ffi.rs │ ├── cache.rs │ ├── cli.rs │ ├── cschema.rs │ ├── daemon_ffi.rs │ ├── error.rs │ ├── eval_ffi.rs │ ├── ffi.rs │ ├── hash.rs │ ├── http_ffi.rs │ ├── intrinsics.rs │ ├── ipc.rs │ ├── ipc_ffi.rs │ ├── json.rs │ ├── json_ffi.rs │ ├── lib.rs │ ├── manifest_ffi.rs │ ├── mpack.rs │ ├── packet.rs │ ├── packet_ffi.rs │ ├── pool_ffi.rs │ ├── router_ffi.rs │ ├── schema.rs │ ├── shm.rs │ ├── slurm_ffi.rs │ ├── utility.rs │ └── voidstar.rs ├── exe/ │ └── morloc-codegen-generic/ │ └── Main.hs ├── executable/ │ ├── CppPrinter.hs │ ├── CppTranslator.hs │ ├── Main.hs │ ├── Subcommands.hs │ └── UI.hs ├── fourmolu.yaml ├── hie.yaml ├── library/ │ ├── Morloc/ │ │ ├── BaseTypes.hs │ │ ├── CodeGenerator/ │ │ │ ├── Docstrings.hs │ │ │ ├── Emit.hs │ │ │ ├── Express.hs │ │ │ ├── Grammars/ │ │ │ │ ├── Common.hs │ │ │ │ ├── Macro.hs │ │ │ │ └── Translator/ │ │ │ │ ├── Generic.hs │ │ │ │ ├── Imperative.hs │ │ │ │ └── PseudoCode.hs │ │ │ ├── Infer.hs │ │ │ ├── LambdaEval.hs │ │ │ ├── LanguageDescriptor.hs │ │ │ ├── Namespace.hs │ │ │ ├── Nexus.hs │ │ │ ├── Parameterize.hs │ │ │ ├── Realize.hs │ │ │ ├── Reduce.hs │ │ │ ├── Segment.hs │ │ │ ├── Serial.hs │ │ │ ├── Serialize.hs │ │ │ └── SystemConfig.hs │ │ ├── Completion.hs │ │ ├── Config.hs │ │ ├── Data/ │ │ │ ├── Annotated.hs │ │ │ ├── Bifoldable.hs │ │ │ ├── Bifunctor.hs │ │ │ ├── DAG.hs │ │ │ ├── Doc.hs │ │ │ ├── GMap.hs │ │ │ ├── Json.hs │ │ │ ├── Map/ │ │ │ │ └── Extra.hs │ │ │ ├── Map.hs │ │ │ └── Text.hs │ │ ├── DataFiles.hs │ │ ├── Frontend/ │ │ │ ├── API.hs │ │ │ ├── AST.hs │ │ │ ├── CST.hs │ │ │ ├── Desugar.hs │ │ │ ├── Lexer.hs │ │ │ ├── Link.hs │ │ │ ├── Merge.hs │ │ │ ├── Namespace.hs │ │ │ ├── Parser.hs │ │ │ ├── Parser.y │ │ │ ├── Restructure.hs │ │ │ ├── Token.hs │ │ │ ├── Treeify.hs │ │ │ ├── Typecheck.hs │ │ │ └── Valuecheck.hs │ │ ├── Internal.hs │ │ ├── LangRegistry.hs │ │ ├── Language.hs │ │ ├── Module.hs │ │ ├── Monad.hs │ │ ├── Namespace/ │ │ │ ├── Expr.hs │ │ │ ├── Prim.hs │ │ │ ├── State.hs │ │ │ └── Type.hs │ │ ├── ProgramBuilder/ │ │ │ ├── Build.hs │ │ │ └── Install.hs │ │ ├── Quasi.hs │ │ ├── System.hs │ │ ├── TypeEval.hs │ │ ├── Typecheck/ │ │ │ ├── Internal.hs │ │ │ └── NatSolver.hs │ │ └── Version.hs │ └── Morloc.hs ├── metrics/ │ ├── README.md │ └── scripts/ │ ├── collect-metrics.sh │ └── compare-metrics.sh ├── package.yaml ├── scripts/ │ ├── build-rust.sh │ └── bump-version.sh ├── spec/ │ ├── SPEC.md │ ├── compiler/ │ │ ├── COMPILER.md │ │ ├── build.md │ │ ├── codegen.md │ │ ├── parsing.md │ │ ├── pipeline.md │ │ └── typechecking.md │ ├── interop/ │ │ ├── INTEROP.md │ │ ├── foreign-functions.md │ │ ├── implementation-selection.md │ │ ├── serialization.md │ │ └── type-mappings.md │ ├── language/ │ │ ├── LANGUAGE.md │ │ ├── annotations.md │ │ ├── declarations.md │ │ ├── expressions.md │ │ ├── lexical-structure.md │ │ └── operators.md │ ├── modules/ │ │ ├── MODULES.md │ │ ├── imports-and-exports.md │ │ ├── packages.md │ │ └── resolution.md │ ├── runtime/ │ │ ├── RUNTIME.md │ │ ├── cli.md │ │ ├── execution-model.md │ │ ├── ipc.md │ │ └── manifest.md │ └── types/ │ ├── TYPES.md │ ├── inference.md │ ├── polymorphism.md │ ├── primitive-types.md │ ├── records.md │ ├── subtyping.md │ ├── type-declarations.md │ └── typeclasses.md ├── stack.yaml └── test-suite/ ├── .gitignore ├── GoldenMakefileTests.hs ├── Main.hs ├── PropertyTests.hs ├── UnitTypeTests.hs ├── cmorloc-tests/ │ ├── Makefile │ ├── test.c │ └── test.h ├── concurrency-tests/ │ ├── bidi-py-r.loc │ ├── bidi-r-py.loc │ ├── concurrent-uni.loc │ ├── deep-callback.loc │ ├── helpers/ │ │ ├── py_funcs.py │ │ └── r_funcs.R │ └── run-tests.sh ├── daemon-tests/ │ ├── arithmetic.loc │ ├── helpers.py │ ├── pure.loc │ ├── run-tests.sh │ └── strings.loc ├── error-message-tests/ │ ├── README │ ├── bad-source/ │ │ ├── Makefile │ │ └── foo.loc │ ├── cyclical-import/ │ │ └── foo.loc │ ├── list-error-1/ │ │ ├── Makefile │ │ └── foo.loc │ ├── missing-import-module/ │ │ └── foo.loc │ ├── missing-import-term/ │ │ ├── barm.loc │ │ └── main.loc │ ├── pool-crash-cpp/ │ │ ├── Makefile │ │ ├── foo.hpp │ │ ├── main │ │ └── main.loc │ ├── runtime-error-cpp/ │ │ ├── Makefile │ │ ├── foo.hpp │ │ └── main.loc │ ├── runtime-error-cross-lang/ │ │ ├── Makefile │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── runtime-error-py/ │ │ ├── Makefile │ │ ├── foo.py │ │ └── main.loc │ ├── runtime-error-r/ │ │ ├── Makefile │ │ ├── foo.R │ │ └── main.loc │ ├── term-masking/ │ │ ├── foo.loc │ │ ├── foo2.loc │ │ └── foo3.loc │ ├── typechecking-1/ │ │ ├── Makefile │ │ └── foo.loc │ ├── typechecking-2/ │ │ ├── Makefile │ │ └── foo.loc │ ├── typeclass-bad-instance/ │ │ └── foo.loc │ ├── typeclass-bad-instance-expr/ │ │ └── foo.loc │ ├── typeclass-masking/ │ │ ├── foo.loc │ │ ├── foo2.loc │ │ └── foo3.loc │ ├── typeclass-monomorphic-conflict/ │ │ ├── foo.loc │ │ ├── foo2.loc │ │ └── foo3.loc │ ├── typeclass-overlap/ │ │ ├── foo.loc │ │ ├── foo2.loc │ │ └── foo3.loc │ └── undefined-term/ │ ├── Makefile │ └── foo.loc ├── executable-benchmark/ │ ├── .gitignore │ ├── README │ ├── distributed/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── foo.cpp │ │ ├── foo.py │ │ ├── main.loc │ │ └── main.yaml │ ├── parallel-interop/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── foo.R │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── parallel.py │ │ └── test.sh │ └── serial-interop/ │ ├── .gitignore │ ├── Makefile │ ├── foo.R │ ├── foo.hpp │ ├── foo.loc │ ├── foo.py │ ├── long-list.json │ ├── medium-list.json │ └── test.sh ├── golden-tests/ │ ├── .gitignore │ ├── README.md │ ├── alias-array-monoid/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── alias-concrete-bugs/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── alias-constructor-equiv/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── alias-dedup-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── alias-no-cross-instance/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-1-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-1-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-1-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-2-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-2-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-2-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-3-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-3-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-3-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-4-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-4-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-4-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-5-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-5-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-5-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-6-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-6-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-6-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-7-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-7-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-7-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-8-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-8-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── argument-form-8-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── arrow-immutable-cp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── src.hpp │ │ └── src.py │ ├── arrow-immutable-pc/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── src.hpp │ │ └── src.py │ ├── arrow-immutable-pr/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── src.R │ │ └── src.py │ ├── arrow-immutable-rp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── src.R │ │ └── src.py │ ├── arrow-nexus-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── src.hpp │ ├── arrow-nexus-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── src.py │ ├── bare-selector-args/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── bare-selector-chain/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── bug-intrinsic-schema-crash/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helper.py │ │ └── main.loc │ ├── bug-load-type-infer/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helper.py │ │ └── main.loc │ ├── claude-test-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── claude-test-10/ │ │ ├── Makefile │ │ ├── README.md │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── sequence_align.hpp │ │ ├── sequence_io.py │ │ └── sequence_stats.R │ ├── claude-test-11/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── sysinfo.R │ │ ├── sysinfo.hpp │ │ └── sysinfo.py │ ├── claude-test-12/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helper.py │ │ └── main.loc │ ├── claude-test-13/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── stress.hpp │ │ └── stress.py │ ├── claude-test-14/ │ │ ├── Makefile │ │ ├── edge.hpp │ │ ├── edge.py │ │ ├── exp.txt │ │ └── main.loc │ ├── claude-test-15/ │ │ ├── Makefile │ │ ├── bigdata.hpp │ │ ├── bigdata.py │ │ ├── exp.txt │ │ └── main.loc │ ├── claude-test-16/ │ │ ├── Makefile │ │ ├── cstress.hpp │ │ ├── exp.txt │ │ ├── main.loc │ │ └── pstress.py │ ├── claude-test-17/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helper.py │ │ └── main.loc │ ├── claude-test-18/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helper.py │ │ └── main.loc │ ├── claude-test-19/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helper.py │ │ └── main.loc │ ├── claude-test-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── claude-test-20/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helper.py │ │ └── main.loc │ ├── claude-test-3/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── claude-test-4/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── claude-test-5/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── game.hpp │ │ ├── game.py │ │ ├── main.loc │ │ └── package.yaml │ ├── claude-test-6/ │ │ ├── Makefile │ │ ├── README.md │ │ ├── exp.txt │ │ ├── filters.hpp │ │ ├── image_io.py │ │ ├── main.loc │ │ └── stats.R │ ├── claude-test-7/ │ │ ├── Makefile │ │ ├── README.md │ │ ├── aggregations.hpp │ │ ├── data_io.py │ │ ├── exp.txt │ │ ├── main.loc │ │ └── statistics.R │ ├── claude-test-8/ │ │ ├── Makefile │ │ ├── README.md │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── string_ops.hpp │ │ ├── text_processing.py │ │ └── text_stats.R │ ├── claude-test-9/ │ │ ├── Makefile │ │ ├── README.md │ │ ├── exp.txt │ │ ├── finance_stats.R │ │ ├── helpers.py │ │ ├── main.loc │ │ ├── market_io.py │ │ ├── monte_carlo.hpp │ │ └── test-map.loc │ ├── command-groups/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── composition/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── paste.py │ ├── demo-trimming/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── config.json │ │ ├── example.fastq │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── edge-cases-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── edge-cases-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ └── main.loc │ ├── effect-accumulate-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── effect-coerce-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── effect-coerce-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── effect-error-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── effect-multi-label-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── effect-subtype-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── errors/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── eta-reduction-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── eta-reduction-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── eta-reduction-3/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── eta-reduction-4/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── eta-reduction-5/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── eta-reduction-6/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── eta-reduction-7/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── eta-reduction-8-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── eta-reduction-8-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── eval-restrict-source/ │ │ ├── Makefile │ │ └── exp.txt │ ├── feature-integration-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── file-input-c/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── main.loc │ │ └── run.sh │ ├── file-input-py/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── main.loc │ │ └── run.sh │ ├── file-input-r/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── main.loc │ │ └── run.sh │ ├── force-inline-basic/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── formatting/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── functional-data-1/ │ │ ├── Makefile │ │ ├── bar.py │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── functional-data-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── functional-data-3a/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── functional-data-3b/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.hpp │ │ ├── main.loc │ │ └── main.py │ ├── functional-data-3c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── main.py │ ├── functional-data-3d/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── main.py │ ├── functional-data-3d-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.hpp │ │ └── main.loc │ ├── functional-data-3d-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── main.py │ ├── functional-data-3d-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.R │ │ └── main.loc │ ├── functional-data-3e/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── main.py │ ├── functional-data-3f/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── main.py │ ├── functional-data-4/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── main.py │ ├── functional-data-5/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── main.py │ ├── generic-hofs-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── generic-hofs-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── guards-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── guards-inline-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── guards-let-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── guards-let-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── guards-let-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── guards-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── guards-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── higher-kinded-types/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── main.loc │ │ └── notes │ ├── hofs-1/ │ │ ├── Makefile │ │ ├── eq.py │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── holes-func/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── holes-record/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── holes-simple/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── import-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── import-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── localmap/ │ │ │ ├── main.loc │ │ │ └── main.py │ │ └── main.loc │ ├── infix/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── impl.py │ │ └── main.loc │ ├── infix-generic/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── infix-import/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── main.loc │ │ └── ops/ │ │ ├── main.loc │ │ └── ops.py │ ├── infix-local-fixity/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── ops-a/ │ │ │ ├── main.loc │ │ │ └── ops.py │ │ └── ops-b/ │ │ ├── main.loc │ │ └── ops.py │ ├── infix-polyglot/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── ops/ │ │ │ └── main.loc │ │ ├── ops-cpp/ │ │ │ ├── main.loc │ │ │ └── ops.hpp │ │ └── ops-py/ │ │ ├── main.loc │ │ └── ops.py │ ├── infix-typeclass-import/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── main.loc │ │ └── numops/ │ │ ├── main.loc │ │ └── ops.py │ ├── infix-typeclass-polyglot/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── semiring/ │ │ │ └── main.loc │ │ ├── semiring-cpp/ │ │ │ ├── main.loc │ │ │ └── ops.hpp │ │ └── semiring-py/ │ │ ├── main.loc │ │ └── ops.py │ ├── infix-typeclass-simple/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── inline-block-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── inline-cross-lang/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ └── main.loc │ ├── inline-deep-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── inline-func-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── inline-ho-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── inline-mixed-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── inline-old-style-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── inline-op-ho-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── inline-op-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── inline-typeclass-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── ops.py │ ├── interop-1-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── interop-1-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── interop-10/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── interop-11/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── incdef.hpp │ │ ├── main.loc │ │ └── pysum.py │ ├── interop-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3a-cp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3a-pp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── foopy.loc │ │ └── main.loc │ ├── interop-3a-pr/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3a-rc/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3b-cp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3b-pp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3b-pr/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3b-rc/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3c-cp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3c-pp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3c-pr/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3c-rc/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3d-cp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3d-pp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3d-pr/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3d-rc/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3e-cp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3e-pp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3e-pr/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3e-rc/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-3f/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-4/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── interop-5/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── fooc.loc │ │ ├── foopy.loc │ │ └── main.loc │ ├── interop-6/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.loc │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── foor.loc │ │ └── main.loc │ ├── interop-7/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ └── main.loc │ ├── interop-8-py-to-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ └── main.loc │ ├── interop-8-r-to-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ └── main.loc │ ├── interop-8-r-to-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ └── main.loc │ ├── interop-9/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ └── main.loc │ ├── intrinsic-agnostic/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── hello.json │ │ └── main.loc │ ├── intrinsic-constants/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── intrinsic-hash/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── intrinsic-show-ho-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── intrinsic-show-read/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── intrinsic-show-read-nexus/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── let-crosslang/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── f.hpp │ │ ├── f.py │ │ └── main.loc │ ├── let-expressions/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── local-import-cousin-py/ │ │ ├── Makefile │ │ ├── bar/ │ │ │ └── baz/ │ │ │ ├── main.loc │ │ │ └── main.py │ │ ├── bif/ │ │ │ └── biz/ │ │ │ ├── main.loc │ │ │ └── main.py │ │ ├── exp.txt │ │ └── main.loc │ ├── local-import-nested-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── lib/ │ │ │ └── math/ │ │ │ ├── main.loc │ │ │ └── main.py │ │ ├── main.loc │ │ ├── package.yaml │ │ ├── util.loc │ │ └── util.py │ ├── local-import-root-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── root/ │ │ ├── main.loc │ │ └── main.py │ ├── manifold-form-0/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t0.loc │ ├── manifold-form-0x/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── manifold-form-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t1.loc │ ├── manifold-form-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t2.loc │ ├── manifold-form-2x/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t2.loc │ ├── manifold-form-3/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t3.loc │ ├── manifold-form-3x/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t3.loc │ ├── manifold-form-4_c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t4.loc │ ├── manifold-form-4_py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t4.loc │ ├── manifold-form-4_r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── manifold-form-5_c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t5.loc │ ├── manifold-form-5_py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t5.loc │ ├── manifold-form-5_r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t5.loc │ ├── manifold-form-6_c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t6.loc │ ├── manifold-form-6_py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t6.loc │ ├── manifold-form-6_r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t6.loc │ ├── manifold-form-7_c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t7.loc │ ├── manifold-form-7_py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t7.loc │ ├── manifold-form-7_r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── t7.loc │ ├── memory-interop-misalign-cp/ │ │ ├── Makefile │ │ ├── cppfuncs.loc │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ ├── main.loc │ │ ├── pyfuncs.loc │ │ └── types.loc │ ├── memory-nested-misalign-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── memory-nested-misalign-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── memory-optional-double-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── memory-optional-double-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── memory-record-pack-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── memory-record-pack-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── memory-split-block-cpp/ │ │ ├── Makefile │ │ ├── cppfuncs.loc │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ ├── main.loc │ │ ├── pyfuncs.loc │ │ └── types.loc │ ├── module-form-00n/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── fooroot.loc │ │ └── main.loc │ ├── module-form-011/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── fooroot.loc │ │ └── main.loc │ ├── module-form-01n/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── fooroot.loc │ │ └── main.loc │ ├── module-form-0n0/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── foor.loc │ │ ├── fooroot.loc │ │ └── main.loc │ ├── module-form-0n1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── foor.loc │ │ ├── fooroot.loc │ │ └── main.loc │ ├── module-form-101/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── fooroot.loc │ │ └── main.loc │ ├── module-form-10n/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── fooroot.loc │ │ └── main.loc │ ├── module-form-110/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── fooroot.loc │ │ └── main.loc │ ├── module-form-111/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── fooroot.loc │ │ └── main.loc │ ├── module-form-1n0/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── foor.loc │ │ ├── fooroot.loc │ │ └── main.loc │ ├── module-form-n00/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── foor.loc │ │ ├── fooroot.loc │ │ └── main.loc │ ├── module-form-n01/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── foor.loc │ │ ├── fooroot.loc │ │ └── main.loc │ ├── module-form-n10/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ ├── foopy.loc │ │ ├── foor.loc │ │ ├── fooroot.loc │ │ └── main.loc │ ├── multi-lang-mempty-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── multiple-instances-1-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── rms.h │ ├── multiple-instances-1-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── rms.py │ ├── multiple-instances-1-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── rms.R │ ├── multiple-instances-2-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── rms.h │ ├── multiple-instances-2-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── rms.py │ ├── multiple-instances-2-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── rms.R │ │ └── rms.py │ ├── multiprocessing-py-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── namespace-basic/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-disambiguation/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-ns-composition/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-ns-double-import/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-ns-exported/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-ns-guard/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-ns-hof/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-ns-let/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-ns-multi/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── mod-a.loc │ │ ├── mod-b.loc │ │ └── mod-c.loc │ ├── namespace-ns-nested-getter/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-ns-reexport/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ ├── main.loc │ │ └── middle.loc │ ├── namespace-ns-same-func-name/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-ns-shadow/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-ns-unqualified/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-selective/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helpers.loc │ │ └── main.loc │ ├── namespace-separate-impls/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── mod-a.loc │ │ └── mod-b.loc │ ├── nat-typecheck/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── src.hpp │ ├── native-morloc-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── native-morloc-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── native-morloc-3/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── native-morloc-4/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── native-morloc-5/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── high.loc │ │ └── high.py │ ├── native-morloc-6/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── native-morloc-7/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── native-morloc-8/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── native-morloc-9/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── nexus-let-lambda/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── nexus-let-pure/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── null-keyword/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.R │ │ ├── main.hpp │ │ ├── main.loc │ │ └── main.py │ ├── numeric-literals/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── object-1-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── person.h │ ├── object-1-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── person.py │ ├── object-1-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── person.R │ ├── optional-coerce-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── optional-coerce-interop/ │ │ ├── Makefile │ │ ├── cfoo.hpp │ │ ├── cppfuncs.loc │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── pfoo.py │ │ ├── pyfuncs.loc │ │ └── types.loc │ ├── optional-coerce-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── optional-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── optional-interop-cp/ │ │ ├── Makefile │ │ ├── cppfuncs.loc │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ ├── main.loc │ │ ├── pyfuncs.loc │ │ └── types.loc │ ├── optional-interop-pr/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ ├── main.loc │ │ ├── pyfuncs.loc │ │ ├── rfuncs.loc │ │ └── types.loc │ ├── optional-interop-rc/ │ │ ├── Makefile │ │ ├── cppfuncs.loc │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── main.loc │ │ ├── rfuncs.loc │ │ └── types.loc │ ├── optional-json/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── optional-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── optional-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ └── main.loc │ ├── optional-records-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── optional-records-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── optional-records-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ └── main.loc │ ├── packer-definitions-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── packer-definitions-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── lib/ │ │ │ └── json/ │ │ │ ├── json.py │ │ │ └── main.loc │ │ └── main.loc │ ├── packer-definitions-3/ │ │ ├── Makefile │ │ ├── dumby.hpp │ │ ├── dumby.py │ │ ├── exp.txt │ │ └── main.loc │ ├── packer-definitions-4/ │ │ ├── Makefile │ │ ├── dumby.hpp │ │ ├── dumby.py │ │ ├── exp.txt │ │ └── main.loc │ ├── packer-definitions-5/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── map.hpp │ │ └── map.py │ ├── packets-interop/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.h │ │ ├── foo.py │ │ ├── main.loc │ │ └── make_test_data.py │ ├── packets-large/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.h │ │ ├── foo.py │ │ ├── main.loc │ │ ├── make_test_data.py │ │ └── test-data.mpk │ ├── parser-stress/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── path-shadowing-c/ │ │ ├── Makefile │ │ ├── bar/ │ │ │ ├── main.hpp │ │ │ └── main.loc │ │ ├── baz/ │ │ │ ├── main.hpp │ │ │ └── main.loc │ │ ├── exp.txt │ │ └── main.loc │ ├── path-shadowing-py/ │ │ ├── Makefile │ │ ├── bar/ │ │ │ ├── main.loc │ │ │ └── main.py │ │ ├── baz/ │ │ │ ├── main.loc │ │ │ └── main.py │ │ ├── exp.txt │ │ └── main.loc │ ├── path-shadowing-r/ │ │ ├── Makefile │ │ ├── bar/ │ │ │ ├── main.R │ │ │ └── main.loc │ │ ├── baz/ │ │ │ ├── main.R │ │ │ └── main.loc │ │ ├── exp.txt │ │ └── main.loc │ ├── pattern-getters/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── pattern-setters/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main-cpp.loc │ │ ├── main-py.loc │ │ ├── main-r.loc │ │ ├── tests.loc │ │ └── types.hpp │ ├── poly-list-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── record-docstrings/ │ │ ├── Makefile │ │ ├── algconf.json │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── main.loc │ │ └── sysconf.json │ ├── records-alias/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ ├── lib/ │ │ │ └── main.loc │ │ └── main.loc │ ├── records-complex-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── records-complex-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── records-nested/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── records-primitive/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── recursion-cross-py-cpp/ │ │ ├── Makefile │ │ ├── cpp_helpers.hpp │ │ ├── exp.txt │ │ ├── main.loc │ │ └── py_helpers.py │ ├── recursion-cross-r-cpp/ │ │ ├── Makefile │ │ ├── cpp_helpers.hpp │ │ ├── exp.txt │ │ ├── main.loc │ │ └── r_helpers.R │ ├── recursion-direct-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── recursion-direct-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── recursion-direct-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── recursion-helper-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── recursion-helper-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── recursion-mutual-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── recursion-mutual-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── recursion-mutual-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── recursion-thunk-helper-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── recursion-thunk-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── scoping-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── w.loc │ ├── scoping-10/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── high.loc │ │ ├── high.py │ │ └── low.loc │ ├── scoping-11/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── scoping-12/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── scoping-13/ │ │ ├── Makefile │ │ ├── core.hpp │ │ ├── exp.txt │ │ ├── foo/ │ │ │ ├── main.loc │ │ │ └── test/ │ │ │ ├── bar.py │ │ │ └── main.loc │ │ ├── foo-cpp.loc │ │ └── main.loc │ ├── scoping-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── w.loc │ ├── scoping-3/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── w.loc │ ├── scoping-4/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── w.loc │ ├── scoping-5/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── w.loc │ ├── scoping-6/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── w.loc │ ├── scoping-7/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── w.loc │ ├── scoping-8/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── w.loc │ ├── scoping-9/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── w.loc │ ├── selection-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── selection-2/ │ │ ├── Makefile │ │ ├── arithmetic/ │ │ │ └── main.loc │ │ ├── exp.txt │ │ └── main.loc │ ├── selection-3/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── rms.R │ ├── selection-4/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-10-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-10-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-10-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-11-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-11-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-11-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-12-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-12-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-12-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-2-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-2-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-2-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-4-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-4-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-4-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-5-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-5-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-5-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-6-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-6-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-6-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-7-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-7-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-7-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-8-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-8-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-8-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-9-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-9-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── serial-form-9-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── slurm-label-codegen/ │ │ ├── Makefile │ │ ├── bar.py │ │ ├── exp.txt │ │ ├── main.loc │ │ └── main.yaml │ ├── source-old-op-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── specialization-1-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── specialization-1-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ └── main.loc │ ├── specialization-1-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ └── main.loc │ ├── specialization-2-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── specialization-2-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.py │ │ └── main.loc │ ├── stderr-stdout/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.py │ │ ├── main.loc │ │ └── package.yaml │ ├── stdout-flush-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── fun.py │ │ └── main.loc │ ├── string-encoding/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── a.json │ │ ├── b.json │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── string-encoding-utf8/ │ │ ├── Makefile │ │ ├── a.json │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── string-escape/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── helper.R │ │ ├── helper.hpp │ │ ├── helper.py │ │ └── main.loc │ ├── string-interpolation/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── string-json-parsing/ │ │ ├── Makefile │ │ ├── a.json │ │ ├── b.json │ │ ├── c.json │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── string-multiline/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── string-pretty/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── table-1-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── table-1-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── table-1-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── table-2-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── person.h │ ├── table-2-py/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── person.py │ ├── table-2-r/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── person.R │ ├── tensor-comprehensive-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── src.hpp │ ├── tensor-comprehensive-cross/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── src.R │ │ ├── src.hpp │ │ └── src.py │ ├── tensor-cp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── src.hpp │ │ └── src.py │ ├── tensor-dimensions/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── src.hpp │ │ └── src.py │ ├── tensor-nat-basic/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── labeled.hpp │ │ └── main.loc │ ├── tensor-nat-labeled/ │ │ ├── Makefile │ │ ├── cnn.hpp │ │ ├── exp.txt │ │ └── main.loc │ ├── tensor-nexus-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── src.hpp │ ├── tensor-pc/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── src.hpp │ │ └── src.py │ ├── tensor-pr/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── src.R │ │ └── src.py │ ├── tensor-rp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ ├── src.R │ │ └── src.py │ ├── tensor-table-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── src.hpp │ ├── thunk-basic/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── thunk-choose/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── thunk-cross-force/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── thunk-do/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── thunk-effects/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── thunk-eval-forall/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── rng.py │ ├── thunk-eval-hk/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── rng.py │ ├── thunk-export/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── thunk-export-guard/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── thunk-force/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── thunk-guard-cross/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── thunk-interop/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.R │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── thunk-let/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ ├── thunk-nullary-interop/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── two-module/ │ │ ├── Makefile │ │ ├── combat.loc │ │ ├── exp.txt │ │ ├── tavern.loc │ │ ├── util.loc │ │ └── util.py │ ├── type-alias-transitive/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── main.loc │ │ ├── types/ │ │ │ └── main.loc │ │ └── types-cpp.loc │ ├── type-annotations-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── type-identities-c/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── main.loc │ │ └── types.h │ ├── type-synthesis-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── type-synthesis-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── lib/ │ │ │ ├── foo/ │ │ │ │ ├── foo.py │ │ │ │ └── main.loc │ │ │ └── footypes/ │ │ │ └── main.loc │ │ └── main.loc │ ├── typeclass-stress/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── typeclasses-1/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── typeclasses-2/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── typeclasses-3/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── typeclasses-4/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── typeclasses-5/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── typeclasses-6/ │ │ ├── Makefile │ │ ├── exp.txt │ │ └── main.loc │ ├── typeclasses-7/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── typeclasses-8/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── typeclasses-9/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── unicode-edge-cases/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── unicode-interop/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── unicode-interpolation/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.py │ │ └── main.loc │ ├── unicode-source/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ ├── foo.py │ │ └── main.loc │ ├── unicode-source-cpp/ │ │ ├── Makefile │ │ ├── exp.txt │ │ ├── foo.hpp │ │ └── main.loc │ └── unit-1/ │ ├── Makefile │ ├── exp.txt │ ├── foo.py │ └── main.loc ├── install-tests/ │ ├── .gitignore │ ├── README.md │ ├── run-tests.sh │ ├── testcpp1/ │ │ ├── main.loc │ │ ├── package.yaml │ │ └── square.hpp │ ├── testcpp2/ │ │ ├── main.loc │ │ ├── package.yaml │ │ └── src/ │ │ └── dbl.hpp │ ├── testcpp3/ │ │ ├── inc.hpp │ │ ├── main.loc │ │ └── offset.hpp │ ├── testdatafile1/ │ │ ├── data.txt │ │ ├── main.loc │ │ ├── package.yaml │ │ └── reader.py │ ├── testpy1/ │ │ ├── helpers.py │ │ ├── main.loc │ │ └── package.yaml │ ├── testpy2/ │ │ ├── main.loc │ │ ├── package.yaml │ │ └── src/ │ │ └── mathutil.py │ ├── testpy3/ │ │ ├── fmtlib.py │ │ ├── formatter.py │ │ └── main.loc │ ├── testr1/ │ │ ├── main.loc │ │ ├── negate.R │ │ └── package.yaml │ ├── testr2/ │ │ ├── main.loc │ │ ├── package.yaml │ │ └── src/ │ │ └── triple.R │ └── testr3/ │ ├── glue.R │ ├── main.loc │ └── rutil.R ├── integration/ │ ├── Main.hs │ └── Morloc/ │ └── Test/ │ ├── Common.hs │ ├── ConcurrencyTests.hs │ ├── DaemonTests.hs │ ├── InstallTests.hs │ ├── ShmTests.hs │ └── StressTests.hs ├── shm-tests/ │ ├── cppfuncs.loc │ ├── main.loc │ ├── pyfuncs.loc │ ├── stress.hpp │ ├── stress.py │ └── types.loc ├── stress/ │ ├── .gitignore │ ├── README.md │ ├── common.sh │ ├── concurrent-stress.sh │ ├── crash-recovery.sh │ ├── run-all.sh │ ├── valgrind-check.sh │ └── zombie-stress.sh ├── test.sh └── typecheck-benchmark/ ├── apply-ann-20.loc ├── apply-deep-10.loc ├── apply-deep-100.loc ├── apply-deep-30.loc ├── apply-deep-50.loc ├── apply-deep.loc ├── apply-multi-arg-20.loc ├── compose-10.loc ├── compose-20.loc ├── compose-40.loc ├── partial-app.loc ├── poly-compose.loc ├── run-benchmarks.sh └── tuple-nested-20.loc ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ # Ignore build artifacts to keep Docker context small .stack-work/ .git/ out/ container/static-build/.stack-work/ morloc-manager/.stack-work/ data/rust/target/ *.hi *.o ================================================ FILE: .github/workflows/release.yml ================================================ # Release workflow: build all binaries + containers, run tests, publish release. # # Triggered by pushing a version tag: # git tag v0.68.0 # git push origin v0.68.0 # # Produces for each platform (linux-x86_64, linux-arm64, macos-arm64): # - morloc-manager (Rust binary) # - libmorloc.so (Rust runtime library; .dylib on macOS) # - morloc-nexus (Rust binary) # # All three are attached to the GitHub Release. name: Release on: push: tags: ['v*'] env: REGISTRY: ghcr.io IMAGE_BASE: ghcr.io/morloc-project/morloc jobs: # ---- Build Rust binaries (libmorloc + morloc-nexus + morloc-manager) per platform ---- rust-binary: strategy: fail-fast: false matrix: include: - os: ubuntu-latest platform: linux-x86_64 method: docker - os: ubuntu-24.04-arm platform: linux-arm64 method: docker - os: macos-latest platform: macos-arm64 method: native runs-on: ${{ matrix.os }} timeout-minutes: 30 steps: - uses: actions/checkout@v4 # ---- Linux: Docker container build ---- # libmorloc.so + morloc-nexus: glibc (Ubuntu 20.04) # morloc-manager: static (Alpine/musl) - name: Build Rust binaries (Linux) if: matrix.method == 'docker' run: | docker build -t morloc-rust-build \ -f container/static-build/Dockerfile . mkdir -p out docker run --rm -v "$(pwd)/out:/out" morloc-rust-build # Verify morloc-manager is static file out/morloc-manager | grep -qE "static(ally|-pie) linked" - name: Rename artifacts (Linux) if: matrix.method == 'docker' run: | mv out/libmorloc.so out/libmorloc-${{ matrix.platform }}.so mv out/morloc-nexus out/morloc-nexus-${{ matrix.platform }} mv out/morloc-manager out/morloc-manager-${{ matrix.platform }} # ---- macOS: native cargo build ---- - name: Setup Rust (macOS) if: matrix.method == 'native' uses: dtolnay/rust-toolchain@stable - name: Cache Cargo (macOS) if: matrix.method == 'native' uses: actions/cache@v4 with: path: | ~/.cargo/registry ~/.cargo/git data/rust/target key: cargo-macos-${{ hashFiles('data/rust/Cargo.lock') }} restore-keys: cargo-macos- - name: Build Rust binaries (macOS) if: matrix.method == 'native' run: | cd data/rust # Build libmorloc (cdylib produces .dylib on macOS) cargo build --release -p morloc-runtime # Install .dylib so nexus can link against it mkdir -p $HOME/.local/share/morloc/lib cp target/release/libmorloc_runtime.dylib $HOME/.local/share/morloc/lib/libmorloc.dylib # Build nexus cargo build --release -p morloc-nexus # Build manager cargo build --release -p morloc-manager # Collect artifacts mkdir -p ../../out cp target/release/libmorloc_runtime.dylib ../../out/libmorloc-${{ matrix.platform }}.dylib cp target/release/morloc-nexus ../../out/morloc-nexus-${{ matrix.platform }} cp target/release/morloc-manager ../../out/morloc-manager-${{ matrix.platform }} strip ../../out/morloc-nexus-${{ matrix.platform }} || true strip ../../out/morloc-manager-${{ matrix.platform }} || true - name: Upload Rust artifacts uses: actions/upload-artifact@v4 with: name: rust-binaries-${{ matrix.platform }} path: out/* # ---- Run tests using the Rust binaries ---- test: needs: rust-binary runs-on: ubuntu-latest timeout-minutes: 60 env: DEBIAN_FRONTEND: noninteractive steps: - uses: actions/checkout@v4 - uses: haskell-actions/setup@v2 with: ghc-version: '9.6.7' enable-stack: true stack-version: 'latest' - uses: actions/cache@v4 with: path: | ~/.stack/snapshots ~/.stack/setup-exe-cache .stack-work key: stack-deps-release-${{ hashFiles('stack.yaml.lock', 'package.yaml') }} restore-keys: stack-deps-release- - name: Increase shared memory run: sudo mount -o remount,size=4G /dev/shm - name: Install system dependencies run: | sudo apt-get update sudo apt-get install -y --no-install-recommends \ r-base-core python3 python3-dev python3-pip \ python3-numpy g++ gcc make libgsl-dev python3 -m pip install --break-system-packages --upgrade setuptools pyarrow - name: Cache R packages uses: actions/cache@v4 id: r-cache with: path: ~/R/library key: r-lib-${{ runner.os }}-${{ runner.arch }} - name: Install R packages if: steps.r-cache.outputs.cache-hit != 'true' run: | mkdir -p ~/R/library LIBARROW_MINIMAL=true ARROW_S3=OFF ARROW_GCS=OFF \ Rscript -e 'install.packages("arrow", lib="~/R/library", repos = "https://cloud.r-project.org")' - name: Download Rust binaries uses: actions/download-artifact@v4 with: name: rust-binaries-linux-x86_64 path: rust-bin - name: Prepare Rust binaries run: | mkdir -p prebuilt mv rust-bin/libmorloc-linux-x86_64.so prebuilt/libmorloc.so mv rust-bin/morloc-nexus-linux-x86_64 prebuilt/morloc-nexus mv rust-bin/morloc-manager-linux-x86_64 prebuilt/morloc-manager chmod +x prebuilt/libmorloc.so prebuilt/morloc-nexus prebuilt/morloc-manager - name: Add morloc to PATH run: | echo "$HOME/.local/bin" >> $GITHUB_PATH echo "$HOME/.local/share/morloc/bin" >> $GITHUB_PATH echo "R_LIBS_USER=$HOME/R/library" >> $GITHUB_ENV - name: Build morloc run: stack install --system-ghc --no-install-ghc --no-run-tests - name: Initialize morloc (using pre-built Rust binaries) run: | MORLOC_RUST_BIN=$(pwd)/prebuilt morloc init -f morloc install stdlib - name: Run tests run: stack test --system-ghc --no-install-ghc morloc:morloc-test timeout-minutes: 10 # ---- Build and push container images ---- containers: needs: test runs-on: ubuntu-latest timeout-minutes: 120 permissions: packages: write steps: - uses: actions/checkout@v4 - name: Extract version from tag id: ver run: echo "version=${GITHUB_REF_NAME#v}" >> "$GITHUB_OUTPUT" - name: Login to GHCR uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Build tiny (from local source) run: | docker build \ -t ${{ env.IMAGE_BASE }}/morloc-tiny:${{ steps.ver.outputs.version }} \ -t ${{ env.IMAGE_BASE }}/morloc-tiny:edge \ -f container/tiny/Dockerfile . - name: Build full (uses local tiny) run: | docker build \ --build-arg MORLOC_VERSION=${{ steps.ver.outputs.version }} \ -t ${{ env.IMAGE_BASE }}/morloc-full:${{ steps.ver.outputs.version }} \ -t ${{ env.IMAGE_BASE }}/morloc-full:edge \ container/full/ - name: Push all images run: | for img in morloc-tiny morloc-full; do docker push ${{ env.IMAGE_BASE }}/${img}:${{ steps.ver.outputs.version }} docker push ${{ env.IMAGE_BASE }}/${img}:edge done # ---- Create GitHub Release ---- release: if: always() && needs.containers.result == 'success' needs: [containers, rust-binary] runs-on: ubuntu-latest permissions: contents: write steps: - name: Download all artifacts uses: actions/download-artifact@v4 with: pattern: 'rust-binaries-*' merge-multiple: true - name: List artifacts run: ls -lh - name: Create GitHub Release uses: softprops/action-gh-release@v2 with: files: | morloc-manager-* libmorloc-* morloc-nexus-* generate_release_notes: true ================================================ FILE: .github/workflows/test.yml ================================================ name: Test Morloc on: [push] jobs: linux-test: runs-on: ubuntu-latest timeout-minutes: 60 env: DEBIAN_FRONTEND: noninteractive steps: - name: Checkout uses: actions/checkout@v4 - name: Setup Haskell (GHC + Stack) uses: haskell-actions/setup@v2 with: ghc-version: '9.6.7' enable-stack: true stack-version: 'latest' - name: Setup Rust uses: dtolnay/rust-toolchain@stable - name: Cache Stack dependencies uses: actions/cache@v4 with: path: | ~/.stack/snapshots ~/.stack/setup-exe-cache .stack-work key: stack-deps-${{ hashFiles('stack.yaml.lock', 'package.yaml') }} restore-keys: | stack-deps- - name: Cache Cargo dependencies uses: actions/cache@v4 with: path: | ~/.cargo/registry ~/.cargo/git data/rust/target key: cargo-deps-${{ hashFiles('data/rust/Cargo.lock') }} restore-keys: | cargo-deps- - name: Increase shared memory (needed for morloc SHM pool tests) run: sudo mount -o remount,size=4G /dev/shm - name: Install system dependencies run: | sudo apt-get update sudo apt-get install -y --no-install-recommends \ r-base-core \ python3 \ python3-dev \ python3-pip \ python3-numpy \ g++ \ gcc \ make \ libgsl-dev python3 -m pip install --break-system-packages --upgrade setuptools pyarrow - name: Cache R packages uses: actions/cache@v4 id: r-cache with: path: ~/R/library key: r-lib-${{ runner.os }}-${{ runner.arch }} - name: Install R packages if: steps.r-cache.outputs.cache-hit != 'true' run: | mkdir -p ~/R/library LIBARROW_MINIMAL=true ARROW_S3=OFF ARROW_GCS=OFF \ Rscript -e 'install.packages("arrow", lib="~/R/library", repos = "https://cloud.r-project.org")' - name: Add morloc bin folders to PATH run: | echo "$HOME/.local/bin" >> $GITHUB_PATH echo "$HOME/.local/share/morloc/bin" >> $GITHUB_PATH echo "R_LIBS_USER=$HOME/R/library" >> $GITHUB_ENV - name: Build Morloc run: | stack install --system-ghc --no-install-ghc --no-run-tests - name: Initialize morloc and install modules run: | MORLOC_RUST_DIR=$(pwd)/data/rust morloc init -f morloc install stdlib - name: Test morloc run: | stack test --system-ghc --no-install-ghc morloc:morloc-test timeout-minutes: 10 ================================================ FILE: .gitignore ================================================ tags *.hi *.o .stack-work/ .history morloc.cabal *.out run/ .idea/ *.iml # ignore debugging files *.aux *.hp *.prof *.ps .bash_history .local .stack CLAUDE.md .claude/ claude-memory/ .RData .Rhistory pools/ nexus out/ ================================================ FILE: ChangeLog.md ================================================ 0.81.0 [2026-94-22] ------------------- * fix type inference for type families * extend intrinsic support for language-agnostic code * do shell-expansion of flagfile lines in morloc-manager * fix loss of pool stderr on crash (maybe?) 0.80.1 [2026-04-21] ------------------- * add `morloc-manager new --include ` src:dest syntax * make stderr/stdout mim principles more consistent 0.80.0 [2026-04-20] ------------------- * cleaner morloc-manager * fixed --print bug in nexus 0.79.1-5 [2026-04-19] * lot's morloc-manager updates 0.79.0 [2026-04-17] ------------------- * add @datafile intrinsic for finding installed files * default to including all files when installing - parse `.morlocignore` to find non-install files - allow `include` in package.yaml for strict file inclusion * add `morloc-manage doctor` subcommand for health checks * add additional `freeze/unfreeze` validation * fix unfreezing bugs 0.78.0 [2026-04-17] ------------------- * clean up CLI usage statements * suppress "null" outpput in Unit-returning CLIs * fix string escape bugs * fix where parsing after do-block * fix many small morloc-manager issues * fix paths in installed morloc programs 0.77.1 [2026-04-16] ------------------- * resurrected `morloc-manager log` subcommand 0.77.0 [2026-04-15] ------------------- * many many deployment fixes 0.76.0 [2026-04-13] ------------------- * fix docstring groups * add module docstrings * fix bug in unfreeze 0.75.0 [2026-04-12] ------------------- * allow effectful final do-notation statements * lots of bug fixes * make changelog version source of truth 0.74.0 [2026-04-06] ------------------- * unify morloc-manager env, workspace, and version * allow docstrings before declarations * allow literal leading spaces in docstrings * allow escaped `:` syntax 0.73.0 [2026-04-01] ------------------- * fix @load error on missing file * fix error reporting on failed `morloc-manager install` 0.72.0 [2026-03-30] ------------------- * fix unicode support in comments and literal strings * many fixes to the morloc-manager 0.71.0-2 [2026-03-29] ------------------- * port morloc-manager to rust 0.70.1 to 0.70.6 [2026-03-29] ----------------------------- * fix portability for libmorloc and morloc-nexus on darwin and linux-arm64 * python flushing face condition 0.70.0 [2026-03-29] ------------------- * port libmorloc and morloc-nexus from C to Rust 0.69.0 [2026-03-27] ------------------- * port morloc-manager from shell to Haskell * share it as a static binary 0.68.0 [2026-03-22] ------------------- * remove explicit universal quntifiers - before: `id a :: a -> a` - now: `id :: a -> a` * add `morloc uninstall --all` option * do not require repeated `let` terms 0.67.1 [2026-03-17] ------------------- * add missing Nat kind annotation to root modules * better test coverage of Nat kinds * add `morloc uninstall --all` option * add `stdlib` module that simplifies recursive install of everything 0.67.0 [2026-03-17] ------------------- * add Arrow tables with zero-copy between language sharing * add tensors and * remove explicit qualifiers - no more `id a :: a -> a` - now `id :: a -> a` - this does not require C++ be more explicit in their typing * inline when packet data is smaller than 65kb - this avoids fragmenting the shared memory with many small single-use values 0.66.0 [2026-03-12] ------------------- * fix propagation of errors from crashed pools * fix shm memory alignment (inefficiency on x86, crashes on ARM) * add `--sanitize` option to `morloc init` for strict memory * replace megaparsec with parsec * fix label and remote handling 0.65.0 [2026-03-10] ------------------- * `morloc eval` for running anonymous morloc expressions * add %inline pragma to skip manifold gen * allow batch syntax for instances * add @show and @read intrinsics * add namespaces for morloc imports 0.64.0 [2026-03-03] ------------------- * replace thunks with more granular effect system * add intrinsics * disambiguate local module imports * change null to Null 0.63.0 [2026-02-25] ------------------- * add optionals with implicit coercion * for `?Str` with `literal: true`, force `default: null` * add daemon args to nexus 0.62.0 [2026-02-24] ------------------- * term recursion support * conditional guards * nexus pretty printing option (-p/--print) * print defaults in usage statements * more bug fixes 0.61.0 [2026-02-21] ------------------- Build updates * make nexus a constant binary (not recreated and recompiled) * add install handling and portable scripts * add morloc daemon mode accessible through HTTP/TCP and sockets * add `morloc install --build` option for installing both executable modules * add `morloc uninstall` * add `morloc list` with -v option for listing types of all exports Typesystem updates * add let syntax that enforces execution order * class constraints (e.g., unique :: Eq a => [a] -> [a]) * superclasses (e.g., class Semigroup => Monoid a where ...) * add effect system for delayed execution * add do-syntax for imperative programming with effects Better errors and UI * cleaner error messages * error message localization * clean `morloc typecheck` output * add CLI subcommand tested grouping * Add shell TAB-completion * fix haddock for future hackage release Bug fixes * cleaned up memory issues in all C code * replaced mcparallel in R with forked pool of workers * fix bug in `morloc install .` Testing * added stress test for zombies and memory issues * added daemon tests Performance * Split the monolithic (~7000 line) morloc.h file * Use a libmorloc.so shared library rather than importing all as header * Removed all the zombie swarms that where killing heavy morloc projects * Remove mcparallel from R, move most of the pool to C, 4X interop speed * Compile and reuse a single nexus (reduce compile costs) Language onboarding * Added codegen IR that greatly simplifies new language addition * Factor all language-specific material out of main Haskell codebase * Move all grammar into template yaml specs * Created MessagePack bridge to bypass voidstar, making lang onboard almost trivial (at a performance penalty). Other * Transitioned parser from recursive descent to LR1 0.60.0 [2026-02-07] ------------------- * add infix operator support Testing * Generate many new tests with Claude * Extensive testing of infix operators and module inheritance Cleanup * add formatting with fourmulo * add metric and benchmarking Bug fixes * Fix several memory issues in morloc.h and lang bindings * Fixed indexing bug in Generator.hs * Fix missing space in git module cp * Make missing folders in install path * Fix `morloc install` deletion of mod folder when install has trailing '/' * Disallow space between sign and number Performance * Fixed exponential case hit on eta resolution * Fixed quadratic case in subtype algorithm Claude memory * Added claude memory files 0.59.0 [2026-01-23] ------------------- * allow functions in data structures * allow source functions to return functions * allow application of expressions that evaluate to functions * fix bug in C++ bool deserialization 0.58.3 [2026-01-03] ------------------- * fix record docstring inheritance 0.58.2 [2025-12-29] ------------------- * fix minor bug optional versus positional 0.58.1 [2025-12-28] ------------------- * fix minor bug in record unrolling 0.58.0 [2025-12-28] ------------------- * add hex, octal, and binary numeric representations * new record syntax * generate CLI from docstrings * resolve records into groups of arguments * allow literal strings without the extra JSON quoting * change to Apache-2.0 0.57.1 [2025-11-12] ------------------- * minor bug fixes 0.57.0 [2025-11-11] ------------------- * re-allow underscores in variable names * add placeholder syntax / lambda lifting from holes * multi-line strings * string interpolation * getter patterns * setter patterns * write full Morloc nexus evaluator * Fix bug in local installs with "." 0.56.0 [2025-10-08] ------------------- New file organization * Change file layout to conform to XDG spec; replaced `~/.morloc` with `~/.local/share/morloc` * Move standard library to `$MORLOC_HOME/src/morloc/plane/default/morloclib` * src/ - stores any source code morloc needs, not just modules * morloc/ - stores morloc modules * plane/ - stores morloc modules that are part of a plane * default/ - the current default plane * morloclib/ - the org name for the core modules in the default plane New `morloc install` subcommand functionality * Multiple imports may be chained on one command * Source and ref can be included per install, for example: * `root` - install a core module * `root@hash:abcdef1234` - specific core instance * `root@tag:v1.0.0` - specific tag/version * `codeberg:weena/calendar@hash:abcdef1234` - 3rd party with source and ref * Support for install of local modules 0.55.1 [2025-09-29] ------------------- * Bug fixes 0.55.0 [2025-09-27] ------------------- * Allow general types to be declared and imported/exported * Allow undirected dependencies * Allow dashes in module names * Fix many bugs in native Morloc code handling * Fix bug in certain higher order foreign functions * Simplify internal unique naming conventions * Move all tests to use the new root module (rather than base) * Slightly improved error messages Breaking changes: * Move to reverse tree model of dependency handling * Allow exactly one type signature for each term and class * Require explicit typeclass import/export * Fix handling of Python builtin imports Now builtins must not be imported from Python source. Instead import directly from Python, for example: `source Py ("len")` * More efficient Haskell Map usage 0.54.2 [2025-08-09] ------------------- * Enforce lowercase module name rule * Fix handling of executable file name option * Fix record handling 0.54.1 [2025-07-26] ------------------- * Fix pickle bug in Python multiprocessing * Replace asprintf non-standard C function * Partially fix interop for C structs * Fix bug in unqualified imports 0.54.0 [2025-07-13] ------------------- * Add full MessagePack and VoidStar IO to nexus * Fixed type pretty printing in usage and typechecking output * Add nexus option and usage info * Add support for one-line command docstrings 0.53.7 [2025-05-31] ------------------- * Fix bug in parameterized type definition concrete type inference 0.53.6 [2025-05-31] ------------------- * Improve container instructions in README * Fix all README examples * Make Dockerfiles more portable * Fix bug in implicit typeclass instance inheritance 0.53.5 [2025-05-12] ------------------- * Fixed unnecessary copying in C libs * Fixed double let-binding in code gen 0.53.4 [2025-05-08] ------------------- * Replace C daemon forking with thread pooling (4X speedup) * Fix bugs related to 0-length array memory allocation 0.53.3 [2025-05-06] ------------------- * Fix handling of empty vectors 0.53.2 [2025-05-06] ------------------- * Fix bug in JSON parsing 0.53.1 [2025-05-06] ------------------- * Update containers * Update github actions * Fix bug in type scoping (sort of) 0.53.0 [2025-05-04] ------------------- Unify backend under shared C library * Replace python nexus with C nexus * Move packets to little-endian format * Move all packet handling, binary protocol specs, socket handling, JSON parsing from pool code into the shared morloc.h library * Replace R `future` parallelism scheme with builtin `parallel` library Add remote worker and resource management support * Add conventions for specifying caching and evaluation strategy * Add xxhash hashing for caching * Add remote handling with experimental slurm support Other * `morloc` commands now return proper exit codes 0.52.1 [2025-02-16] ------------------- * Add python native bytes and bytearray support 0.52.0 [2025-02-09] ------------------- Type evaluation and specialization * Delayed general alias evaluation * Add strict numeric types (`int8`, `uint8`, `int16` etc) * Allow concrete alias specialization * Add type hints allowing concrete type conservation across foreign calls Bug fixes * Fix bug in opening existing shared memory volumes Language support * Add C++ support for std template list-like types (`list`, `forward_list`, `deque`, `stack`, and `set` (I know, they're a tad different) * Add Python support for `numpy` vectors and arrays * Allow raw R vectors to be interpreted as strings * Nexus no longer creates python objects from returned data, instead writes results directly through C library (`morloc.h`) 0.51.1 [2024-12-04] ------------------- * Do not catch STDOUT and STDERR * Fix NULL return errors * Fix container setup 0.51.0 [2024-12-02] ------------------ Shared memory * Allow processes to communicate through shared memory Setup * `morloc init` command will now build the `morloc` ecosystem, writing all required headers to `morloc` home and compiling any required shared libraries. Other * Fix cases where morloc stalls when errors are transmitted across languages * Moved demos to the dedicated example repo 0.50.0 [2024-11-08] ------------------- Better installation * `morloc install --commit 45d8df12` - for github retrieval by commit hash * `morloc install --branch dev` - to retrieve latest from specific branch * `morloc install --tag 0.1.0` - to retrieve specific tag Better containers * Use podman rather than docker in Makefile * `morloc-tiny:` - everything needed to compile morloc projects * `morloc-full:` - an environment for running projects 0.49.0 [2024-11-04] ------------------- New backend with better performance * Mediate inter-process communication with UNIX domain sockets * Transmit data with MessagePack rather than JSON * Added a benchmarking suite * Added `morloc init` command to configure morloc ecosystem 0.48.0 [2024-05-10] ------------------- Second PeerJ submission (after return by editor for technical reasons) * Made type parameters explicit as type arguments: `id a :: a -> a` rather than either of `id :: a -> a` `id :: forall a . a -> a`. * Pass-by-reference in generated C++ code * Simplify generated C++ code by using function template arguments rather than type casting. 0.47.2 [2024-04-28] ------------------- * made the build static * fixed the dockerfile * exported the vim-syntax file out to its own repo * added a --version option * upgraded to LTS 22.18 0.47.2 [2024-04-28] ------------------- First PeerJ submission 0.47.0 [2024.03.10] ------------------- * Add value checker * Raise error when implementations have equal score (no arbitrary choice) 0.46.0 [2024.03.06] ------------------- * Fix precedence of '@' operator - it binds tightest * Update flu demo * Fix github actions * Fix Dockerfile * Address all compiler warnings 0.45.0 [2024.02.14] ------------------- * Allow explicit import of polymorphic terms * Fix infinite loop bug when two many arguments are applied to a function * Synchronise tests with new core libraries type classes 0.44.0 [2024.02.08] ------------------- Add support for ad hoc polymorphism. * Support sources and declarations in classes * Support multiple parameters * Support overlapping instances * Packers are now implemented through the `Packable` typeclass Some missing features: * No support typeclass constraints in the type signatures. * No support for parameterized class variables * No support for polymorphic recursion (does anyone want that?) 0.43.0 [2024.01.14] ------------------- New features * Allow a module to explicitly export packers * Show pool pseudocode for `typecheck -r` * Add `typecheck dump` subcommand to show expressions and indices * Allow nexus inputs to be files * Remove concrete type signatures - always infer * Make fields in language-specific table decs optional Rather than this: table (Person a) = Person {name :: Str, info :: a} table R (Person a) = "data.frame" {name :: Str, info :: a} Allow this: table (Person a) = Person {name :: Str, info :: a} table R (Person a) = "data.frame" Really, I need to totally redo the table/record/object handling. * Remove support for anonymous records in type signatures I will re-add this possibly at a future time when I refactor Infrastructure changes * Pass all data between pools as files rather than string arguments * Raise an error if any general type cannot be translated to a concrete type Fixes * Fix record type inference * Fix bug in collecting packers (missed packers required by root manifold) * Fix C++ handling of quotes and special characters in JSON strings 0.42.0 [2023.10.11] ------------------- * Infer concrete types from inferred general types when necessary and possible * More informative error messages * Fix template resolution in generated C++ code * Fix include name shadowing conflict in generated C++ code * Partially fix naming conflict in Python and R pools caused by use of "f". My solution was name mangling, though we need a more permanent solution to our problem. * Let user write correct "list" R types for lists, tuples, and records * Fix bug in code generation of lets ## Internal * For `ForeignInterfaceM` constructor of ExprM, store the full call type, not just the return type * Parameterize `ExprM` with the type type (e.g., `Maybe TypeP` or `TypeM`) * Replace `Argument` and `PreArgument` with a parameterized `Arg` type. * Fix broken `ForeignInterfaceM` and `PoolCallM` cases in `typeOfExprM` * Refactor backend Testing and documentation: * Complete flu demo - it builds now, just need to implement the library code for align and other functions. 0.41.1 [2023.05.26] ------------------- * Print nothing if "null" is returned * Fix the import of working directory modules * Resolve bug in occur check * Streamline github actions script * Fix `morloc install` path download for core modules * Raise type error for untyped concrete term * Fix bug in concrete type synthesis 0.41.0 [2023.04.16] ------------------- Language updates * Add check to avoid infinite typechecker recursion * Synthesize concrete types from general types when possible * Improve exports * Move exports to module list (as in Haskell) * Add `*` wildcard to export every top-level named term or type * Raise an error if a non-existing term is exported from a module * Allow concrete (un)packers to vary in number of generic terms * Set functions with empty inputs (e.g., `()`) to have empty lists of arguments * Replace the `Null` literal term with `()` Package updates * Default to c++17, rather than c++11 * Restructure library to avoid name conflicts with pre-existing packages * Replace Perl nexus with Python nexus and remove Perl dependencies Better error messages and logs * Resolve "shit output" error message (map index to export name) * Tidy up the verbose diagnostics * Print general and concrete types for typecheck with -r option * Add multiple levels of verbosity (quiet by default) Bug fixes * Typechecking bug in record access * Fix bug allowing undefined things in module export lists * Fix cousin module imports * Fix unparameterized (un)packer serialization * Fix error raised when a module exporting a type is compiled * Fix out of order record fields in nexus output 0.40.0 [2023.02.04] ------------------- * Infer types of records * Fix bug in concrete type inference * Fix bugs in foreign higher order function code generation * Simplify generator code * Add many tests 0.39.0 [2023.01.03] ------------------- * Add compose operator * Allow eta-reduction 0.38.0 [2022.12.23] ------------------- * Choose casing convention * camel case for terms (for now, underscore is illegal) * pascal case for types * Fix sub-module handling * Fix import/export of type definitions * Better error messages for import/export errors * Somewhat formatted `typecheck` subcommand output * Add option to typecheck backend (concrete types and language selection) 0.37.0 [2022.12.11] ------------------- * Remove extra space printed at the end of R JSON * Clarify error message for missing exports * Clarify error message for missing concrete signature * Fix exponential time bug in parser * Allow prime characters in names after first position * Allow '.' to separate namespaces in imports/modules * Fix infinite loop bug when module name != import name 0.36.0 [2022.02.17] ------------------- * Separate reals from integers * Remove global haskell extensions from package.yaml 0.36.0 [2022.02.17] ------------------- * Separate reals from integers * Remove global haskell extensions from package.yaml 0.35.0 [2021.10.24] ------------------- Where scoping and a total architecture refactor * Fix handling for generic parameterized types * Improve whitespace handling * Statements are order invariant * Thread link from expression to source expression down to generators * Typecheck over final abstract syntax trees rather than expressions * Separate general and concrete typechecking * Pretty instances for all data types * More testing * Agonized over deep and wide structures 0.34.0 [2021.03.05] ------------------- * Add handling for C++ float primitives * Let C++ programs be imported through a header and shared library * Remove semicolon requirement * Add hie.yaml for compatibility with hsl * Update dependency set to LTS-17.4 * Add subparsers to CLI with pcapriotti/optparse-applicative * Remove brace requirement for modules and `where` statements * Add `-o` option to compiler to specify output executable names * Acceptable syntax error messages 0.33.0 [2020.11.03] ------------------- First hackage release * Haddock documentation * Update README * In help statements write universal, not concrete, types * Make default containers non-existential (probably a bad decision?) 0.32.0 [2020.11.01] ------------------- * Add record/table field access * Fix JSON handling in nexus * Fix nexus bug necessitated escaping quotations and braces * Print general types in nexus help * Resolve most GHC warnings 0.31.0 [2020.10.29] ------------------- * Fix anonymous records in C++ * Distinguish 'record', 'object', and 'table' * Add object handling * Add table handling 0.30.0 [2020.10.23] ------------------- * Add `object` keyword for defining record types * Add full record serialization handling (C++, py, R) 0.29.0 [2020.10.21] ------------------- * Add AST directed (de)serialization framework * Add type constructors for parameterized types 0.28.0 [2020.10.12] ------------------- * Allow import/export of type aliases * Refactor with DAGs all through the parser and typechecker 0.27.0 [2020.10.04] ------------------- * Add systematic tests for data serialization * Fix bug in C++ serialization * Move to serialize to dedicated libraries that require no import 0.26.0 [2020.09.27] ------------------- Add `type` keyword for defining type aliases 0.25.0 [2020.09.26] ------------------- No explicit forall. Instead use Haskell convention of generics being lowercase and non-generics being uppercase. * no more explicit "forall" * generics are lowercase in type signatures * non-generic types are uppercase * normal functions are lowercase * class constructors are uppercase (though handling for this is not yet implemented) 0.24.0 [2020.09.22] ------------------- Allow integration of many instances 0.23.0 [2020.05.14] Bug fixes and code cleanup Bug fixes / tests - [x] [x] github issue #7 - new Var=> typechecking rule - [x] [x] github issue #9 - rewire container type inference - [x] [x] github issue #10 - [x] [x] github issue #11 0.22.0 [2020.04.28] ------------------- Implement a schema-directed composable serialization system Major changes * Fully composable serialization over containers and primitives * Improved C++ support of generic functions * Record support for R and Python3 (not C++ yet) * Refactor generator - replace old grammar system * Allow arguments to be passed to general functions (e.g., `foo x = [x]`, where no specific language is needed) Minor changes * change default python3 interpreter from "python" to "python3" * add default library and tmp paths to config handler * test composable serialization functions in all supported languages * allow wrapped comments in R Testing - grammar directed testing * test record handling * remove and replace out-of-date golden tests * systematic argument handling tests * systematic manifold form tests * systematic interop testing 0.21.0 [2020.03.31] ------------------- Major - add handling and test for many many corner cases * Allow export of data statements * Allow export of non-functions * Allow functions with containers at the root * Allow export of 0-argument functions Minor * proof-of-concept composable serialization functions in C++ (cppbase) * add python tests * make the test output look pretty (no weird whitespace) 0.20.0 [2020.03.23] ------------------- * Add composable default types 0.19.1 [2020.02.22] ------------------- * bug fixes 0.19.0 [2020.02.20] ------------------- Major changes * Allow currying * Add realization optimizations * Refactor generator into series of clear transformations * Added handling for dealing with ambiguous ASTs Minor bug fixes and updates * Prettier code generation for C++, Python and R * More detailed comments in generated code * Allow tags on parenthesized types * Fix bug in functions with multiple parameters * Fix bug preventing loading of package metadata 0.18.1 [2019.11.08] ------------------- * Fix travis * Use C++11 for C++ builds * Make .morloc/config optional * Fix bug in parsing unit type: `()` 0.18.0 [2019.11.04] ------------------- * Add bidirectional type system * Allow parameterized concrete types * Allow higher-order functions * Allow properties to contain multiple terms * Add many tests * Add module system * Allow non-primitive types in lists, tuples, and records * Removed arq and SPARQL dependency (very fast compilation) 0.17.4 [2019.06.29] ------------------- * Add C and C++ handling * Define Ord intance for MTypeMeta * Allow pools to be called as executables * Add type handling to generators * Remove redundant SPARQL queries (better performance) * New RDF list semantics * Use strings to represent concrete types (e.g. "char\*") * Write pretty-printed diagnostic files to `$MORLOC_HOME/tmp` * Handling for multiple concrete type signatures (e.g., definition of a function in multiple languages). * Handling for multiple abstract type signatures * Handling for multiple function declarations 0.17.3 [2019.06.14] ------------------- * Partial C support - execution of sourced functions - no composition - no foreign calls * Partial transition to typed generators - bound arguments are still not typed correctly * Use integer IDs to identify manifolds in pools and the nexus (can to make calls between them) instead of long, mangled names. * Replace string names of languages (e.g., "python") with a sum type. 0.17.2 [2019.05.05] ------------------- Pycon release 0.17.1 [2019.04.26] ------------------- * Fix output serialization in generate code * Fix module linking in generated code 0.17.0 [2019.04.16] ------------------- * Add morloc home * Load modules from `$MORLOCHOME/lib` * Create monad stack 0.16.2 [2018.03.05] ------------------- * Add Zenodo badge making the project citable * Move to `morloc-project/morloc` github repo 0.16.1 [2018.09.24] ------------------- Minor release consisting of internal refactoring * Pruned unnecessary code * Pruned unnecessary imports * Compliance with stricter compile flags 0.16.0 [2018.09.14] ------------------- * Write RDF bools in lowercase ("true", rather than "True"), as per specs * Stricter node typing (replace ad hoc names with elements from an ADT) * Add very rudimentary typechecking * Remove SPARQL server dependency (now there's a sluggish Jena dependency) 0.15.1 [2018.09.10] ------------------- * Add error handling and reporting to pools * Add type signature comments to generated pools * Richer internal data structures 0.15.0 [2018.09.05] ------------------- * Generalize code generators using grammar records * Add Python compatibility * Replace unit tests with golden tests * Use docopt and USAGE template for argument handling * Report number of arguments in nexus usage statements ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and ================================================ FILE: README.md ================================================

build status github release license: Apache 2.0

Manual | Discord | Paper | X | BlueSky | Email

Morloc

compose functions across languages under a common type system

**Why use Morloc?** * Universal function composition: Import functions from multiple languages and compose them together under a unified, strongly-typed functional framework. * Polyglot without boilerplate: Use the best language for each task with no manual bindings or interop code. * Type-directed CLI generation: Write concrete function signatures once and automatically generate elegant command-line interfaces with argument parsing, validation, help text, and documentation. * Composable CLI tools: Morloc CLI programs can be composed by simply importing them into a new Morloc module and re-exporting their functions. * Seamless benchmarking and testing: Swap implementations and run the same benchmarks/tests across languages with consistent type signatures and data representation. * Design universal libraries: Build abstract, type-driven libraries and populate them with foreign language implementations, enabling rigorous code organization and reuse. * Smarter workflows: Replace brittle application/file-based pipelines with faster, more maintainable pipelines made from functions acting on structured data. Below is a simple example, for installation details and more examples, see the [Manual](https://morloc-project.github.io/docs). A Morloc module can import functions from foreign languages, assign them general types, and compose new functions: ```morloc -- Morloc code, in "main.loc" module m (vsum) import root-py import root-cpp source Py from "foo.py" ("pmap") pmap a b :: (a -> b) -> [a] -> [b] source Cpp from "foo.hpp" ("sum") sum :: [Real] -> Real --' Input numeric vectors that will be summed in parallel --' metavar: VECTORS type Vectors = [[Real]] --' Sum a list of numeric vectors --' return: Final sum of all elements in all vectors vsum :: Vectors -> Real vsum = sum . pmap sum ``` The imported code is natural code with no Morloc-specific dependencies. Below is the C++ code that defines `sum` as a function of a standard C++ vector of `double`s that returns a `double`: ```C++ // C++ code, in "foo.hpp" #pragma once #include #include double sum(std::vector xs) { return std::accumulate( xs.begin(), xs.end(), 0.0); } ``` Below is Python code that defines a parallel map function: ```python # Python code, in "foo.py" import multiprocessing as mp # Parallel map function def pmap(f, xs): with mp.Pool() as pool: results = pool.map(f, xs) return results ``` This program can be compiled and run as below: ``` $ menv morloc make main.loc $ menv ./nexus vsum -h Usage: ./nexus vsum VECTORS Sum a list of numeric vectors Positional arguments: VECTORS Input numeric vectors that will be summed in parallel type: [[Real]] Return: Real Final sum of all elements in all vectors $ menv ./nexus vsum [[1.2],[0,0.1]] 1.3 ``` ================================================ FILE: bench/Bench.hs ================================================ {-| Module : Bench Description : Benchmark suite for morloc compiler Copyright : (c) Zebulun Arendsee, 2016-2026 License : Apache-2.0 Maintainer : z@morloc.io This benchmark suite tracks performance of key compiler components: - Parser: parsing .loc source files - Type checker: type inference and checking - Code generator: nexus and pool generation Run with: stack bench Run with options: stack bench --benchmark-arguments '--csv bench-results.csv' -} module Main (main) where import Test.Tasty.Bench import qualified Data.Text as T import System.FilePath (()) import qualified System.Directory as SD import Morloc (typecheckFrontend, typecheck) import Morloc.Namespace.Prim (Code(..), Defaultable(..)) import Morloc.Namespace.State (Config(..), MorlocMonad, MorlocError) import qualified Morloc.Monad as MM -- | Helper to read benchmark test files readTestFile :: FilePath -> IO Code readTestFile name = do let path = "bench" "test-data" name Code . T.pack <$> readFile path -- | Create a minimal config for benchmarking emptyConfig :: IO Config emptyConfig = do home <- SD.getHomeDirectory return $ Config { configHome = home <> "/.local/share/morloc" , configLibrary = home <> "/.local/share/src/morloc" , configPlane = "default" , configPlaneCore = "morloclib" , configTmpDir = home <> "/.morloc/tmp" , configBuildConfig = home <> "/.morloc/.build-config.yaml" , configLangOverrides = mempty } -- | Run a MorlocMonad action for benchmarking runBench :: MorlocMonad a -> IO (Either MorlocError a) runBench action = do config <- emptyConfig ((result, _), _) <- MM.runMorlocMonad Nothing 0 config defaultValue action return result main :: IO () main = defaultMain [ bgroup "Parser" [ bench "parse-simple" $ whnfIO $ do code <- readTestFile "simple.loc" runBench (typecheckFrontend Nothing code) , bench "parse-interop" $ whnfIO $ do code <- readTestFile "interop.loc" runBench (typecheckFrontend Nothing code) , bench "parse-complex-types" $ whnfIO $ do code <- readTestFile "complex-types.loc" runBench (typecheckFrontend Nothing code) ] , bgroup "Type Checker" [ bench "typecheck-simple" $ whnfIO $ do code <- readTestFile "simple.loc" runBench (typecheck Nothing code) , bench "typecheck-interop" $ whnfIO $ do code <- readTestFile "interop.loc" runBench (typecheck Nothing code) , bench "typecheck-complex-types" $ whnfIO $ do code <- readTestFile "complex-types.loc" runBench (typecheck Nothing code) ] -- Note: Code generation benchmarks commented out as they require -- file system access and module initialization -- Uncomment after setting up appropriate test environment {- , bgroup "Code Generation" [ bench "generate-simple" $ whnfIO $ do code <- readTestFile "simple.loc" runBench (writeProgram Nothing code) ] -} ] ================================================ FILE: bench/test-data/complex-types.loc ================================================ module complex (processRecords) type Record = {id :: Int, name :: Str, values :: [Real]} processRecords :: [Record] -> [Int] processRecords rs = map (\r -> r.id) rs ================================================ FILE: bench/test-data/interop.loc ================================================ module interop (processList) import math (sqrt from python) processList :: [Real] -> [Real] processList xs = map sqrt xs ================================================ FILE: bench/test-data/simple.loc ================================================ module simple (add, mul) add :: Int -> Int -> Int add x y = x + y mul :: Int -> Int -> Int mul x y = x * y ================================================ FILE: container/Makefile ================================================ # Pushing to the github registry requires a personal token with package # permissions. Login is required, for example: # # $ echo $GITHUB_TOKEN | podman login ghcr.io -u morloc-project --password-stdin # # I am currently using the "classic" token (ghp_*), not the fine-grained # token. These need to be refreshed every few months # # You need to provide the desired morloc version as an environmental variable, # so run the make commands like so: # # $ make MORLOC_VERSION=0.70.0 build-tiny define HEREDOC #!/bin/bash mkdir -p ~/.morloc podman run --rm \ -e HOME=$$HOME \ -v $$HOME/.morloc:$$HOME/.morloc \ -v $$PWD:$$HOME \ -w $$HOME ghcr.io/morloc-project/morloc/morloc-tiny:edge \ morloc "$$@" endef export HEREDOC install: # Pull the exact version to ensure it exists locally podman pull ghcr.io/morloc-project/morloc/morloc-tiny:edge @mkdir -p ${HOME}/bin @echo "$$HEREDOC" > ${HOME}/bin/morloc-edge chmod 755 ${HOME}/bin/morloc-edge # Build a container that just has the morloc compiler. # Context is the repo root (..) so COPY gets the local source tree. build-tiny: podman build --no-cache --force-rm \ -t ghcr.io/morloc-project/morloc/morloc-tiny:$(MORLOC_VERSION) \ -t ghcr.io/morloc-project/morloc/morloc-tiny:edge \ -f tiny/Dockerfile .. # Build the full interactive development image. # Copies the morloc binary from the locally-built morloc-tiny image. # Run build-tiny first. build-full: podman build --no-cache --force-rm \ --build-arg MORLOC_VERSION=$(MORLOC_VERSION) \ -t ghcr.io/morloc-project/morloc/morloc-full:$(MORLOC_VERSION) \ -t ghcr.io/morloc-project/morloc/morloc-full:edge \ full # Build the required docker image build-test: podman build --no-cache --force-rm -t ghcr.io/morloc-project/morloc/morloc-test test podman tag ghcr.io/morloc-project/morloc/morloc-test ghcr.io/morloc-project/morloc/morloc-test:edge shell: podman run --shm-size=4g --rm -it ghcr.io/morloc-project/morloc/morloc-full:edge /bin/bash shell-tiny: podman run --shm-size=4g --rm -it ghcr.io/morloc-project/morloc/morloc-tiny:edge /bin/bash shell-test: podman run --shm-size=4g --rm -it ghcr.io/morloc-project/morloc/morloc-test /bin/bash # push local containers to the github registry # Ensures edge moves to the same digest as $(MORLOC_VERSION) push: # Push tiny version and edge podman push ghcr.io/morloc-project/morloc/morloc-tiny:$(MORLOC_VERSION) podman push ghcr.io/morloc-project/morloc/morloc-tiny:edge # Push full version and edge podman push ghcr.io/morloc-project/morloc/morloc-full:$(MORLOC_VERSION) podman push ghcr.io/morloc-project/morloc/morloc-full:edge # Push test (no moving tag) podman push ghcr.io/morloc-project/morloc/morloc-test:edge # retrieve the latest morloc builds from the github registry pull: podman pull ghcr.io/morloc-project/morloc/morloc-tiny:$(MORLOC_VERSION) podman pull ghcr.io/morloc-project/morloc/morloc-full:$(MORLOC_VERSION) podman pull ghcr.io/morloc-project/morloc/morloc-test podman pull ghcr.io/morloc-project/morloc/morloc-test:edge # Convenience: pull moving edge tags pull-edge: podman pull ghcr.io/morloc-project/morloc/morloc-tiny:edge podman pull ghcr.io/morloc-project/morloc/morloc-full:edge # Cleanup of podman images may be done as follows # $ podman image prune # $ podman container prune # $ podman image rm -f $(podman image ls -q) ================================================ FILE: container/full/Dockerfile ================================================ # Copy the morloc binary from the matching tiny image ARG MORLOC_VERSION=edge FROM ghcr.io/morloc-project/morloc/morloc-tiny:${MORLOC_VERSION} AS morloc-bin FROM docker.io/library/ubuntu:24.04 LABEL org.opencontainers.image.source=https://github.com/morloc-project/morloc LABEL org.opencontainers.image.description="A morloc container intended for interactive shell use" LABEL org.opencontainers.image.licenses=Apache-2.0 COPY --from=morloc-bin /bin/morloc /bin/morloc COPY --from=morloc-bin /opt/morloc-rust-bin /opt/morloc-rust-bin WORKDIR $HOME ENV DEBIAN_FRONTEND=noninteractive ENV PATH="/root/.local/bin:/root/.local/share/morloc/bin:${PATH}" # Pre-built Rust binaries: morloc init uses these directly (no cargo needed) ENV MORLOC_RUST_BIN=/opt/morloc-rust-bin RUN apt-get update # Set the timezone, this avoids hanging later on RUN TZ=Antarctica/Troll apt-get -y install tzdata RUN apt-get install -y r-base python3 python3-dev python3-pip python3-numpy libgsl-dev git curl RUN python3 -m pip install --break-system-packages --upgrade setuptools # Setup the morloc home (uses pre-built Rust binaries, no cargo needed) RUN morloc init # Copy morloc-nexus to a system path so it remains on PATH even when # morloc-manager bind-mounts over ~/.local/bin with a host directory RUN cp /root/.local/share/morloc/bin/morloc-nexus /usr/local/bin/morloc-nexus RUN echo "lang_python3 : python3" >> $HOME/.local/share/morloc/config RUN morloc install stdlib RUN apt-get install -y vim # Copy over custom vimrc COPY assets/vimrc /root/.vimrc COPY assets/README /root/README # Set up vim highlighting for morloc RUN git clone https://github.com/morloc-project/vimmorloc \ && mkdir -p ~/.vim/syntax/ \ && mkdir -p ~/.vim/ftdetect/ \ && cp vimmorloc/loc.vim ~/.vim/syntax/ \ && echo 'au BufRead,BufNewFile *.loc set filetype=loc' > ~/.vim/ftdetect/loc.vim \ && rm -rf vimmorloc RUN git clone https://github.com/morloc-project/morloc ~/morloc COPY assets/bashrc /etc/bash.bashrc # Cleanup to reduce image size RUN apt-get clean && rm -rf /var/lib/apt/lists/* ================================================ FILE: container/full/assets/README ================================================ This container has a full morloc installation and language support for R, Python3, and C++. The ~/tests folder contains a set of tests from the morloc test suite. These may serve as simplistic examples of morloc scripts. Though they are designed for testing the language not pedagogy. ================================================ FILE: container/full/assets/bashrc ================================================ # basic morloc bashrc # If not running interactively, don't do anything case $- in *i*) ;; *) return;; esac # History Configuration HISTCONTROL=ignoreboth:erasedups HISTSIZE=10000 HISTFILESIZE=20000 shopt -s histappend # Shell Options shopt -s checkwinsize shopt -s globstar 2>/dev/null # Colors if [ -x /usr/bin/dircolors ]; then test -r ~/.dircolors && eval "$(dircolors -b ~/.dircolors)" || eval "$(dircolors -b)" fi # Prompt # Detect if we have color support if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null; then # Define colors RESET='\[\033[0m\]' BOLD='\[\033[1m\]' DIM='\[\033[2m\]' # Color palette BLUE='\[\033[38;5;75m\]' GREEN='\[\033[38;5;114m\]' YELLOW='\[\033[38;5;221m\]' GRAY='\[\033[38;5;245m\]' RED='\[\033[38;5;204m\]' DEVNAME="" if [ ! -z "$MORLOC_ENV_NAME" ]; then DEVNAME="${DIM}${GRAY}(${MORLOC_ENV_NAME})${RESET} " fi MORLOC_VERSION=$(morloc --version) if [ $? -eq 0 ]; then MORLOC_VERSION="-${MORLOC_VERSION}" fi # Format: (container) morloc $ PS1="${DEVNAME}${YELLOW}morloc${MORLOC_VERSION}${RESET} $ " else # Fallback for no color PS1="morloc $ " fi # aliases alias ls='ls --color=auto' alias ll='ls -lh' alias la='ls -lah' alias l='ls -CF' # coloring alias grep='grep --color=auto' alias fgrep='fgrep --color=auto' alias egrep='egrep --color=auto' # fast navigation alias ..='cd ..' alias ...='cd ../..' alias ....='cd ../../..' alias .....='cd ../../../..' # setup path [ -d "$HOME/.local/bin" ] && PATH="$HOME/.local/bin:$PATH" [ -d "$HOME/bin" ] && PATH="$HOME/bin:$PATH" # completions if ! shopt -oq posix; then if [ -f /usr/share/bash-completion/bash_completion ]; then . /usr/share/bash-completion/bash_completion elif [ -f /etc/bash_completion ]; then . /etc/bash_completion fi fi ================================================ FILE: container/full/assets/vimrc ================================================ " Jump to the last position when reopening a file au BufReadPost * if line("'\"") > 1 && line("'\"") <= line("$") | exe "normal! g'\"" | endif " Load indentation rules and plugins according to the detected filetype. filetype plugin indent on syntax on set showcmd " Show (partial) command in status line. set showmatch " Show matching brackets. set ignorecase " Do case insensitive matching set smartcase " Do smart case matching set incsearch " Incremental search set autowrite " Automatically save before commands like :next and :make set hidden " Hide buffers when they are abandoned " An OK colorscheme colorscheme torte ================================================ FILE: container/static-build/Dockerfile ================================================ # Portable build of libmorloc.so, morloc-nexus, and morloc-manager. # # Strategy: # - libmorloc.so: shared library built on Ubuntu 20.04 (glibc >= 2.31) # - morloc-nexus: dynamically linked to libmorloc.so (glibc >= 2.31) # - morloc-manager: fully static binary (Alpine/musl, runs on any Linux) # # Build: # docker build -t morloc-rust-build -f container/static-build/Dockerfile . # docker run --rm -v $(pwd)/out:/out morloc-rust-build # # Output: # ./out/libmorloc.so (shared library, glibc >= 2.31) # ./out/morloc-nexus (binary, glibc >= 2.31, links libmorloc.so) # ./out/morloc-manager (static binary, runs on any Linux including NixOS) # =========================================================================== # Stage 1: Build libmorloc.so + morloc-nexus on Ubuntu (glibc) # # morloc-nexus dynamically links libmorloc.so at runtime, so both must be # built against the same libc. Ubuntu 20.04 gives glibc 2.31 forward compat. # =========================================================================== FROM docker.io/library/ubuntu:20.04 AS glibc-builder ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends \ curl ca-certificates gcc g++ make pkg-config \ && rm -rf /var/lib/apt/lists/* RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable \ && /root/.cargo/bin/cargo --version ENV PATH="/root/.cargo/bin:${PATH}" WORKDIR /build # Copy Cargo manifests first for dependency caching COPY data/rust/Cargo.toml data/rust/Cargo.lock ./ COPY data/rust/morloc-runtime/Cargo.toml ./morloc-runtime/ COPY data/rust/morloc-nexus/Cargo.toml ./morloc-nexus/ COPY data/rust/morloc-manager/Cargo.toml ./morloc-manager/ # Create dummy source files for dependency pre-build RUN mkdir -p morloc-runtime/src morloc-nexus/src morloc-manager/src \ && echo "fn main() {}" > morloc-nexus/src/main.rs \ && echo "fn main() {}" > morloc-manager/src/main.rs \ && echo "" > morloc-runtime/src/lib.rs RUN cargo build --release -p morloc-runtime 2>/dev/null || true # Copy actual source COPY data/rust/ ./ # Force rebuild after copying real source over dummy stubs RUN touch morloc-runtime/src/lib.rs morloc-nexus/src/main.rs # Build libmorloc.so from staticlib via gcc --whole-archive to export ALL # symbols (internal Rust runtime state must be visible to language extensions) RUN cargo build --release -p morloc-runtime RUN mkdir -p /root/.local/share/morloc/lib \ && gcc -shared -o /root/.local/share/morloc/lib/libmorloc.so \ -Wl,--whole-archive target/release/libmorloc_runtime.a -Wl,--no-whole-archive \ -lpthread -lrt -ldl -lm # Build morloc-nexus (dynamically links libmorloc.so) RUN cargo build --release -p morloc-nexus RUN strip /root/.local/share/morloc/lib/libmorloc.so target/release/morloc-nexus # =========================================================================== # Stage 2: Build morloc-manager on Alpine (musl, fully static) # # morloc-manager has no native dependencies — pure Rust crates only. # Building on Alpine produces a fully static musl binary that runs on any # Linux, including NixOS and minimal containers. # =========================================================================== FROM docker.io/library/alpine:3.20 AS musl-builder RUN apk add --no-cache curl gcc musl-dev RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable \ && /root/.cargo/bin/cargo --version ENV PATH="/root/.cargo/bin:${PATH}" WORKDIR /build # Copy Cargo manifests for dependency caching COPY data/rust/Cargo.toml data/rust/Cargo.lock ./ COPY data/rust/morloc-runtime/Cargo.toml ./morloc-runtime/ COPY data/rust/morloc-nexus/Cargo.toml ./morloc-nexus/ COPY data/rust/morloc-manager/Cargo.toml ./morloc-manager/ RUN mkdir -p morloc-runtime/src morloc-nexus/src morloc-manager/src \ && echo "fn main() {}" > morloc-nexus/src/main.rs \ && echo "fn main() {}" > morloc-manager/src/main.rs \ && echo "" > morloc-runtime/src/lib.rs RUN cargo build --release -p morloc-manager 2>/dev/null || true COPY data/rust/ ./ # Force rebuild: cargo may skip if it thinks the cached dummy binary is fresh RUN touch morloc-manager/src/main.rs \ && cargo build --release -p morloc-manager RUN strip target/release/morloc-manager # =========================================================================== # Stage 3: Verify and output # =========================================================================== FROM docker.io/library/alpine:3.20 RUN apk add --no-cache file COPY --from=glibc-builder /root/.local/share/morloc/lib/libmorloc.so /out-staging/libmorloc.so COPY --from=glibc-builder /build/target/release/morloc-nexus /out-staging/morloc-nexus COPY --from=musl-builder /build/target/release/morloc-manager /out-staging/morloc-manager # Verify morloc-manager is static (no dynamic linker needed) RUN file /out-staging/morloc-manager \ && ! ldd /out-staging/morloc-manager 2>&1 | grep -q "=>" \ && echo "OK: morloc-manager has no dynamic dependencies" CMD ["sh", "-c", "cp /out-staging/libmorloc.so /out-staging/morloc-nexus /out-staging/morloc-manager /out/ && echo 'Wrote libmorloc.so, morloc-nexus, morloc-manager' && ls -lh /out/libmorloc.so /out/morloc-nexus /out/morloc-manager && echo && file /out/morloc-manager"] ================================================ FILE: container/static-build/build.sh ================================================ #!/bin/sh # Build portable libmorloc.so, morloc-nexus, and morloc-manager. # # Usage: # ./container/static-build/build.sh # # Output: # ./out/libmorloc.so (shared library, glibc >= 2.31) # ./out/morloc-nexus (binary, glibc >= 2.31, links libmorloc.so) # ./out/morloc-manager (static binary, runs on any Linux) set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" # Detect container engine (prefer podman) if command -v podman >/dev/null 2>&1; then ENGINE=podman elif command -v docker >/dev/null 2>&1; then ENGINE=docker else echo "Error: neither podman nor docker found" >&2 exit 1 fi echo "Building libmorloc.so, morloc-nexus, and morloc-manager with $ENGINE..." mkdir -p "$PROJECT_DIR/out" $ENGINE build \ -t morloc-rust-build \ -f "$SCRIPT_DIR/Dockerfile" \ "$PROJECT_DIR" $ENGINE run --rm \ -v "$PROJECT_DIR/out:/out" \ morloc-rust-build echo "" echo "Binaries:" ls -lh "$PROJECT_DIR/out/libmorloc.so" "$PROJECT_DIR/out/morloc-nexus" "$PROJECT_DIR/out/morloc-manager" file "$PROJECT_DIR/out/libmorloc.so" "$PROJECT_DIR/out/morloc-nexus" "$PROJECT_DIR/out/morloc-manager" ================================================ FILE: container/test/Dockerfile ================================================ FROM docker.io/library/ubuntu:24.04 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y \ git \ gdb \ curl \ pkg-config \ libglib2.0-dev \ build-essential \ libffi-dev \ libgmp-dev \ zlib1g-dev # Install GHCup into /opt/ghcup (accessible to all users) ENV GHCUP_INSTALL_BASE_PREFIX=/opt ENV BOOTSTRAP_HASKELL_NONINTERACTIVE=1 RUN curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | sh # Add ghcup to PATH ENV PATH="/opt/.ghcup/bin:${PATH}" # Install Rust toolchain (needed by morloc init to build libmorloc.so) RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ENV PATH="/root/.cargo/bin:${PATH}" # Set the timezone, this avoids hanging later on RUN TZ=Antarctica/Troll apt-get -y install tzdata # hyperfine is needed for shell benchmarks RUN apt-get install -y r-base python3 python3-dev python3-pip libgsl-dev git hyperfine RUN python3 -m pip install --break-system-packages --upgrade setuptools numpy pyarrow # Set up R environment # stringi is needed for benchmarks RUN Rscript -e 'install.packages("stringi", repos = "https://cloud.r-project.org")' # Install R Arrow package for cross-language Arrow IPC support ENV LIBARROW_MINIMAL=true ENV ARROW_S3=OFF ENV ARROW_GCS=OFF RUN Rscript -e 'install.packages("arrow", repos = "https://cloud.r-project.org")' # Cleanup to reduce image size RUN apt-get clean && rm -rf /var/lib/apt/lists/* # Create /home/dev with permissive access so any --user UID:GID can write here. # morloc-manager bind-mounts .stack and .local into this directory. RUN mkdir -p /home/dev && chmod 1777 /home/dev COPY assets/bashrc /etc/bash.bashrc ================================================ FILE: container/test/assets/bashrc ================================================ # basic morloc bashrc # If not running interactively, don't do anything case $- in *i*) ;; *) return;; esac # History Configuration HISTCONTROL=ignoreboth:erasedups HISTSIZE=10000 HISTFILESIZE=20000 shopt -s histappend # Shell Options shopt -s checkwinsize shopt -s globstar 2>/dev/null # Colors if [ -x /usr/bin/dircolors ]; then test -r ~/.dircolors && eval "$(dircolors -b ~/.dircolors)" || eval "$(dircolors -b)" fi # Prompt # Detect if we have color support if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null; then # Define colors RESET='\[\033[0m\]' BOLD='\[\033[1m\]' DIM='\[\033[2m\]' # Color palette BLUE='\[\033[38;5;75m\]' GREEN='\[\033[38;5;114m\]' YELLOW='\[\033[38;5;221m\]' GRAY='\[\033[38;5;245m\]' RED='\[\033[38;5;204m\]' DEVNAME="" if [ ! -z "$MORLOC_ENV_NAME" ]; then DEVNAME="${DIM}${GRAY}(${MORLOC_ENV_NAME})${RESET} " fi # Format: (container) morloc $ PS1="${DEVNAME}${YELLOW}morloc-dev${RESET} $ " else # Fallback for no color PS1="morloc-dev $ " fi # aliases alias ls='ls --color=auto' alias ll='ls -lh' alias la='ls -lah' alias l='ls -CF' # coloring alias grep='grep --color=auto' alias fgrep='fgrep --color=auto' alias egrep='egrep --color=auto' # fast navigation alias ..='cd ..' alias ...='cd ../..' alias ....='cd ../../..' alias .....='cd ../../../..' # setup path [ -d "$HOME/.local/bin" ] && PATH="$HOME/.local/bin:$PATH" [ -d "$HOME/bin" ] && PATH="$HOME/bin:$PATH" # completions if ! shopt -oq posix; then if [ -f /usr/share/bash-completion/bash_completion ]; then . /usr/share/bash-completion/bash_completion elif [ -f /etc/bash_completion ]; then . /etc/bash_completion fi fi ================================================ FILE: container/tiny/Dockerfile ================================================ ############################################################################### # Stage 1: Build the morloc compiler from local source FROM docker.io/library/ubuntu:24.04 AS morloc-build ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y git curl pkg-config libglib2.0-dev # Install stack RUN curl -SL https://get.haskellstack.org/ | sh # Copy the local source tree (no git clone -- builds exactly this checkout) COPY . /morloc # Build morloc RUN cd /morloc && stack install --no-run-tests ############################################################################### # Stage 2: Build Rust binaries (libmorloc.so + morloc-nexus + morloc-manager) FROM docker.io/library/ubuntu:24.04 AS rust-build ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends \ curl ca-certificates gcc g++ make pkg-config \ && rm -rf /var/lib/apt/lists/* RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable ENV PATH="/root/.cargo/bin:${PATH}" COPY data/rust/ /build/ WORKDIR /build # Build libmorloc.so from staticlib RUN cargo build --release -p morloc-runtime RUN gcc -shared -o libmorloc.so \ -Wl,--whole-archive target/release/libmorloc_runtime.a -Wl,--no-whole-archive \ -lpthread -lrt -ldl -lm # Install libmorloc.so where the nexus linker expects it RUN mkdir -p /root/.local/share/morloc/lib \ && cp libmorloc.so /root/.local/share/morloc/lib/ # Build morloc-nexus and morloc-manager RUN cargo build --release -p morloc-nexus RUN cargo build --release -p morloc-manager # Strip all RUN strip libmorloc.so target/release/morloc-nexus target/release/morloc-manager # Collect into a single directory RUN mkdir -p /rust-bin \ && cp libmorloc.so /rust-bin/ \ && cp target/release/morloc-nexus /rust-bin/ \ && cp target/release/morloc-manager /rust-bin/ ############################################################################### # Stage 3: Final minimal image with compiler + pre-built Rust binaries LABEL org.opencontainers.image.source=https://github.com/morloc-project/morloc LABEL org.opencontainers.image.description="Morloc executable in a minimal container" LABEL org.opencontainers.image.licenses=Apache-2.0 FROM docker.io/library/ubuntu:24.04 COPY --from=morloc-build /root/.local/bin/morloc /bin/morloc COPY --from=rust-build /rust-bin/ /opt/morloc-rust-bin/ # morloc make builds C++ pools, so g++ is needed RUN apt-get update && apt-get install -y --no-install-recommends g++ \ && apt-get clean && rm -rf /var/lib/apt/lists/* # Point morloc init at pre-built binaries (no cargo needed at runtime) ENV MORLOC_RUST_BIN=/opt/morloc-rust-bin ================================================ FILE: data/lang/c/lang.yaml ================================================ # C language metadata for morloc compiler name: c extension: c aliases: [] is_compiled: true run_command: [] serial_type: "" cost: 1 ================================================ FILE: data/lang/cpp/cppmorloc.cpp ================================================ #include #include #include #include "morloc.h" absptr_t cpp_rel2abs(relptr_t ptr){ char* errmsg = NULL; absptr_t absptr = rel2abs(ptr, &errmsg); if(errmsg != NULL){ std::string msg(errmsg); free(errmsg); throw std::runtime_error(msg); } return absptr; } relptr_t abs2rel_cpp(absptr_t ptr){ char* errmsg = NULL; relptr_t relptr = abs2rel(ptr, &errmsg); if(errmsg != NULL){ std::string msg(errmsg); free(errmsg); throw std::runtime_error(msg); } return relptr; } bool shfree_cpp(absptr_t ptr){ char* errmsg = NULL; bool success = shfree(ptr, &errmsg); if(errmsg != NULL){ std::string msg(errmsg); free(errmsg); throw std::runtime_error(msg); } return success; } Schema* parse_schema_cpp(const char* schema_ptr){ char* errmsg = NULL; Schema* schema = parse_schema(schema_ptr, &errmsg); if(errmsg != NULL){ std::string msg(errmsg); free(errmsg); throw std::runtime_error(msg); } return schema; } void* shmalloc_cpp(size_t size){ char* errmsg = NULL; void* new_ptr = shmalloc(size, &errmsg); if(errmsg != NULL){ std::string msg(errmsg); free(errmsg); throw std::runtime_error(msg); } return new_ptr; } shm_t* shinit_cpp(const char* shm_basename, size_t volume_index, size_t shm_size) { char* errmsg = NULL; shm_t* new_ptr = shinit(shm_basename, volume_index, shm_size, &errmsg); if(errmsg != NULL){ std::string msg(errmsg); free(errmsg); throw std::runtime_error(msg); } return new_ptr; } int pack_with_schema_cpp(const void* mlc, const Schema* schema, char** mpk, size_t* mpk_size){ char* errmsg = NULL; int exitcode = pack_with_schema(mlc, schema, mpk, mpk_size, &errmsg); if(errmsg != NULL){ std::string msg(errmsg); free(errmsg); throw std::runtime_error(msg); } return exitcode; } int unpack_with_schema_cpp(const char* mgk, size_t mgk_size, const Schema* schema, void** mlcptr){ char* errmsg = NULL; int exitcode = unpack_with_schema(mgk, mgk_size, schema, mlcptr, &errmsg); if(errmsg != NULL){ std::string msg(errmsg); free(errmsg); throw std::runtime_error(msg); } return exitcode; } ================================================ FILE: data/lang/cpp/cppmorloc.hpp ================================================ #ifndef __CPPMORLOC_HPP__ #define __CPPMORLOC_HPP__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "morloc.h" #include "mlc_tensor.hpp" // ============================================================ // Type traits for container dispatch // ============================================================ template struct is_std_vector : std::false_type {}; template struct is_std_vector> : std::true_type {}; template struct is_std_list : std::false_type {}; template struct is_std_list> : std::true_type {}; template struct is_std_forward_list : std::false_type {}; template struct is_std_forward_list> : std::true_type {}; template struct is_std_deque : std::false_type {}; template struct is_std_deque> : std::true_type {}; template struct is_std_stack : std::false_type {}; template struct is_std_stack> : std::true_type {}; template struct is_std_queue : std::false_type {}; template struct is_std_queue> : std::true_type {}; template struct is_std_tuple : std::false_type {}; template struct is_std_tuple> : std::true_type {}; template struct is_std_pair : std::false_type {}; template struct is_std_pair> : std::true_type {}; template struct is_std_optional : std::false_type {}; template struct is_std_optional> : std::true_type {}; template inline constexpr bool is_non_vector_container_v = is_std_list::value || is_std_forward_list::value || is_std_deque::value || is_std_stack::value || is_std_queue::value; // ============================================================ // Container-to-vector conversion // ============================================================ template auto to_vector(const Container& c) { using T = typename Container::value_type; if constexpr (is_std_stack::value) { std::vector v; auto copy = c; while (!copy.empty()) { v.push_back(copy.top()); copy.pop(); } std::reverse(v.begin(), v.end()); return v; } else if constexpr (is_std_queue::value) { std::vector v; auto copy = c; while (!copy.empty()) { v.push_back(copy.front()); copy.pop(); } return v; } else { return std::vector(c.begin(), c.end()); } } // ============================================================ // C runtime wrappers (implementations in cppmorloc.cpp) // ============================================================ absptr_t cpp_rel2abs(relptr_t ptr); relptr_t abs2rel_cpp(absptr_t ptr); // Resolve a relative pointer using either base-pointer arithmetic (inline data) // or SHM. When base_ptr is non-null, data lives in a contiguous malloc'd blob. static inline void* resolve_relptr_cpp(relptr_t relptr, const void* base_ptr) { if (base_ptr) { return (char*)base_ptr + relptr; } return cpp_rel2abs(relptr); } bool shfree_cpp(absptr_t ptr); Schema* parse_schema_cpp(const char* schema_ptr); void* shmalloc_cpp(size_t size); shm_t* shinit_cpp(const char* shm_basename, size_t volume_index, size_t shm_size); int pack_with_schema_cpp(const void* mlc, const Schema* schema, char** mpk, size_t* mpk_size); int unpack_with_schema_cpp(const char* mgk, size_t mgk_size, const Schema* schema, void** mlcptr); // ============================================================ // mpk_pack / mpk_unpack declarations // ============================================================ template std::vector mpk_pack(const T& data, const std::string& schema_str); template T mpk_unpack(const std::vector& packed_data, const std::string& schema_str); // ============================================================ // schema_alignment (C++ mirror of the C function in schema.c) // ============================================================ inline size_t schema_alignment_cpp(const Schema* schema) { switch (schema->type) { case MORLOC_NIL: case MORLOC_BOOL: case MORLOC_SINT8: case MORLOC_UINT8: return 1; case MORLOC_SINT16: case MORLOC_UINT16: return 2; case MORLOC_SINT32: case MORLOC_UINT32: case MORLOC_FLOAT32: return 4; case MORLOC_SINT64: case MORLOC_UINT64: case MORLOC_FLOAT64: case MORLOC_STRING: case MORLOC_ARRAY: case MORLOC_TENSOR: return alignof(size_t); case MORLOC_TUPLE: case MORLOC_MAP: { size_t max_align = 1; for (size_t i = 0; i < schema->size; i++) { size_t a = schema_alignment_cpp(schema->parameters[i]); if (a > max_align) max_align = a; } return max_align; } case MORLOC_OPTIONAL: return schema_alignment_cpp(schema->parameters[0]); default: return alignof(size_t); } } // ============================================================ // get_shm_size // ============================================================ // Forward declaration template size_t get_shm_size(const Schema* schema, const T& data); size_t get_shm_size(const Schema* schema, const std::nullptr_t&) { return sizeof(int8_t); } // Primitives template size_t get_shm_size(const Schema* schema, const Primitive& data) { return schema->width; } template size_t get_shm_size(const Schema* schema, const std::vector& data) { size_t total_size = schema->width; // worst-case cursor alignment padding for element data total_size += schema_alignment_cpp(schema->parameters[0]) - 1; switch(schema->parameters[0]->type){ case MORLOC_NIL: case MORLOC_BOOL: case MORLOC_SINT8: case MORLOC_SINT16: case MORLOC_SINT32: case MORLOC_SINT64: case MORLOC_UINT8: case MORLOC_UINT16: case MORLOC_UINT32: case MORLOC_UINT64: case MORLOC_FLOAT32: case MORLOC_FLOAT64: total_size += data.size() * schema->parameters[0]->width; break; case MORLOC_STRING: case MORLOC_ARRAY: case MORLOC_TUPLE: case MORLOC_MAP: case MORLOC_OPTIONAL: for(size_t i = 0; i < data.size(); i++){ total_size += get_shm_size(schema->parameters[0], data[i]); } break; } return total_size; } // Optional: tag byte + aligned inner value template size_t get_shm_size(const Schema* schema, const std::optional& data) { if (!data.has_value()) { return schema->width; } size_t inner_size = get_shm_size(schema->parameters[0], *data); size_t extra = (inner_size > schema->parameters[0]->width) ? inner_size - schema->parameters[0]->width : 0; return schema->width + extra; } size_t get_shm_size(const Schema* schema, const std::string& data) { return schema->width + data.size(); } size_t get_shm_size(void* dest, const Schema* schema, const char* data) { return schema->width + strlen(data); } template size_t createTupleShmSizeHelper(const Schema* schema, const Tuple& data, std::index_sequence) { size_t total_size = schema->width; (void)std::initializer_list{( [&](){ size_t elem = get_shm_size(schema->parameters[Is], std::get(data)); if (elem > schema->parameters[Is]->width) { total_size += elem - schema->parameters[Is]->width; } }(), 0 )...}; return total_size; } template size_t get_shm_size(const Schema* schema, const std::tuple& data) { return createTupleShmSizeHelper(schema, data, std::index_sequence_for{}); } // Non-vector containers: convert to vector and delegate template size_t get_shm_size(const Schema* schema, const std::list& data) { return get_shm_size(schema, to_vector(data)); } template size_t get_shm_size(const Schema* schema, const std::forward_list& data) { return get_shm_size(schema, to_vector(data)); } template size_t get_shm_size(const Schema* schema, const std::deque& data) { return get_shm_size(schema, to_vector(data)); } template size_t get_shm_size(const Schema* schema, const std::stack& data) { return get_shm_size(schema, to_vector(data)); } template size_t get_shm_size(const Schema* schema, const std::queue& data) { return get_shm_size(schema, to_vector(data)); } // Tensor: header + shape array + contiguous data template size_t get_shm_size(const Schema* schema, const mlc::Tensor& data) { using S = mlc::tensor_storage_t; size_t total = sizeof(Tensor); // alignment padding for shape array total += alignof(int64_t) - 1; total += NDim * sizeof(int64_t); // alignment padding for element data total += schema_alignment_cpp(schema->parameters[0]) - 1; total += data.size() * sizeof(S); return total; } // ============================================================ // toAnything - top-level (allocating) // ============================================================ // Generic top-level: compute size, allocate, serialize template void* toAnything(const Schema* schema, const T& data){ size_t total_size = get_shm_size(schema, data); void* dest = shmalloc_cpp(total_size); void* cursor = (void*)((char*)dest + schema->width); try { return toAnything(dest, &cursor, schema, data); } catch (...) { shfree_cpp(dest); throw; } } // Non-vector containers: convert to vector and delegate template void* toAnything(const Schema* schema, const std::stack& data) { return toAnything(schema, to_vector(data)); } template void* toAnything(const Schema* schema, const std::forward_list& data) { return toAnything(schema, to_vector(data)); } template void* toAnything(const Schema* schema, const std::queue& data) { return toAnything(schema, to_vector(data)); } template void* toAnything(const Schema* schema, const std::deque& data) { return toAnything(schema, to_vector(data)); } template void* toAnything(const Schema* schema, const std::list& data) { return toAnything(schema, to_vector(data)); } // ============================================================ // toAnything - cursor-based (recursive) // ============================================================ // Forward declaration template void* toAnything(void* dest, void** cursor, const Schema* schema, const T& data); // Write raw binary data as an array void* binarytoAnything(void* dest, void** cursor, const Schema* schema, const uint8_t* data, size_t size) { Array* result = static_cast(dest); result->size = size; if(size == 0){ result->data = RELNULL; return dest; } absptr_t data_ptr = static_cast(*cursor); result->data = abs2rel_cpp(data_ptr); *cursor = static_cast(*cursor) + size * schema->parameters[0]->width; memcpy(data_ptr, data, size); return dest; } void* toAnything(void* dest, void** cursor, const Schema* schema, const std::nullptr_t&) { *((int8_t*)dest) = (int8_t)0; return dest; } // Primitives template void* toAnything(void* dest, void** cursor, const Schema* schema, const Primitive& data) { *((Primitive*)dest) = data; return dest; } // Vector (primary array implementation) template void* toAnything(void* dest, void** cursor, const Schema* schema, const std::vector& data) { Array* result = static_cast(dest); result->size = data.size(); if(data.size() == 0){ result->data = RELNULL; return dest; } // align cursor for element data placement *cursor = reinterpret_cast(ALIGN_UP(reinterpret_cast(*cursor), schema_alignment_cpp(schema->parameters[0]))); result->data = abs2rel_cpp(static_cast(*cursor)); *cursor = static_cast(*cursor) + data.size() * schema->parameters[0]->width; char* start = (char*)cpp_rel2abs(result->data); size_t width = schema->parameters[0]->width; for (size_t i = 0; i < data.size(); ++i) { toAnything(start + width * i, cursor, schema->parameters[0], data[i]); } return dest; } // Shared helper for iterable containers (list, forward_list, deque) template void* toAnything_seq(void* dest, void** cursor, const Schema* schema, const Container& data, size_t size) { Array* result = static_cast(dest); result->size = size; if(size == 0){ result->data = RELNULL; return dest; } // align cursor for element data placement *cursor = reinterpret_cast(ALIGN_UP(reinterpret_cast(*cursor), schema_alignment_cpp(schema->parameters[0]))); result->data = abs2rel_cpp(static_cast(*cursor)); *cursor = static_cast(*cursor) + size * schema->parameters[0]->width; char* start = (char*)cpp_rel2abs(result->data); size_t width = schema->parameters[0]->width; size_t i = 0; for (const auto& item : data) { toAnything(start + width * i, cursor, schema->parameters[0], item); ++i; } return dest; } template void* toAnything(void* dest, void** cursor, const Schema* schema, const std::list& data) { return toAnything_seq(dest, cursor, schema, data, data.size()); } template void* toAnything(void* dest, void** cursor, const Schema* schema, const std::forward_list& data) { return toAnything_seq(dest, cursor, schema, data, std::distance(data.begin(), data.end())); } template void* toAnything(void* dest, void** cursor, const Schema* schema, const std::deque& data) { return toAnything_seq(dest, cursor, schema, data, data.size()); } // Stack and queue: convert to vector and delegate template void* toAnything(void* dest, void** cursor, const Schema* schema, const std::queue& data) { return toAnything(dest, cursor, schema, to_vector(data)); } template void* toAnything(void* dest, void** cursor, const Schema* schema, const std::stack& data) { return toAnything(dest, cursor, schema, to_vector(data)); } // String and C string void* toAnything(void* dest, void** cursor, const Schema* schema, const std::string& data) { return binarytoAnything(dest, cursor, schema, (const uint8_t*)data.c_str(), data.size()); } void* toAnything(void* dest, void** cursor, const Schema* schema, const char* data) { return binarytoAnything(dest, cursor, schema, (const uint8_t*)data, strlen(data)); } // Tuple template void* createTupleAnythingHelper(void* dest, const Schema* schema, void** cursor, const Tuple& data, std::index_sequence) { (void)std::initializer_list{( toAnything((char*)dest + schema->offsets[Is], cursor, schema->parameters[Is], std::get(data)), 0 )...}; return dest; } template void* toAnything(void* dest, void** cursor, const Schema* schema, const std::tuple& data) { return createTupleAnythingHelper(dest, schema, cursor, data, std::index_sequence_for{}); } // Pair (reuses tuple helper since std::pair supports std::get) template void* toAnything(void* dest, void** cursor, const Schema* schema, const std::pair& data) { return createTupleAnythingHelper(dest, schema, cursor, data, std::index_sequence<0, 1>{}); } // Optional template void* toAnything(void* dest, void** cursor, const Schema* schema, const std::optional& data) { if (!data.has_value()) { *((uint8_t*)dest) = 0; memset((char*)dest + schema->offsets[0], 0, schema->parameters[0]->width); } else { *((uint8_t*)dest) = 1; toAnything((char*)dest + schema->offsets[0], cursor, schema->parameters[0], *data); } return dest; } // Tensor: write Tensor header + shape array + contiguous data template void* toAnything(void* dest, void** cursor, const Schema* schema, const mlc::Tensor& data) { Tensor* result = static_cast(dest); result->total_elements = data.size(); result->device_type = 0; result->device_id = 0; if (data.size() == 0) { result->shape = RELNULL; result->data = RELNULL; return dest; } // Write shape array *cursor = reinterpret_cast(ALIGN_UP(reinterpret_cast(*cursor), alignof(int64_t))); result->shape = abs2rel_cpp(static_cast(*cursor)); int64_t* shape_dst = (int64_t*)*cursor; for (int i = 0; i < NDim; i++) shape_dst[i] = data.shape()[i]; *cursor = (char*)*cursor + NDim * sizeof(int64_t); // Write data buffer (contiguous row-major) using S = mlc::tensor_storage_t; size_t elem_align = schema_alignment_cpp(schema->parameters[0]); *cursor = reinterpret_cast(ALIGN_UP(reinterpret_cast(*cursor), elem_align)); result->data = abs2rel_cpp(static_cast(*cursor)); size_t data_bytes = data.size() * sizeof(S); memcpy(*cursor, data.data(), data_bytes); *cursor = (char*)*cursor + data_bytes; return dest; } // ============================================================ // fromAnything - single template with if constexpr dispatch // ============================================================ // Forward declaration for recursive calls template T fromAnything(const Schema* schema, const void* data, T* = nullptr, const void* base_ptr = nullptr); // Tuple helper (needs forward declaration of fromAnything) template Tuple fromTupleAnythingHelper( const Schema* schema, const void* anything, std::index_sequence, Tuple* = nullptr, const void* base_ptr = nullptr ) { return Tuple(fromAnything(schema->parameters[Is], (char*)anything + schema->offsets[Is], static_cast*>(nullptr), base_ptr)...); } template T fromAnything(const Schema* schema, const void* data, T*, const void* base_ptr) { if(data == NULL){ throw std::runtime_error("Void error in fromAnything"); } if constexpr (std::is_same_v) { // NOTE: do NOT use bool here since its width is often not 1 byte return *(uint8_t*)data == 1; } else if constexpr (std::is_same_v) { Array* array = (Array*)data; if(array->size > 0){ return std::string((char*)resolve_relptr_cpp(array->data, base_ptr), array->size); } return std::string(""); } else if constexpr (is_std_vector::value) { using ElemT = typename T::value_type; std::vector result; Array* array = (Array*)data; if(array->size == 0) return result; // Fast path for primitive arrays switch(schema->parameters[0]->type){ case MORLOC_NIL: case MORLOC_BOOL: case MORLOC_SINT8: case MORLOC_SINT16: case MORLOC_SINT32: case MORLOC_SINT64: case MORLOC_UINT8: case MORLOC_UINT16: case MORLOC_UINT32: case MORLOC_UINT64: case MORLOC_FLOAT32: case MORLOC_FLOAT64: { ElemT* arr_start = (ElemT*)resolve_relptr_cpp(array->data, base_ptr); std::vector pv(arr_start, arr_start + array->size); return pv; } } // Complex element types result.reserve(array->size); const Schema* elem_schema = schema->parameters[0]; char* start = (char*)resolve_relptr_cpp(array->data, base_ptr); for(size_t i = 0; i < array->size; i++){ result.push_back(fromAnything(elem_schema, (void*)(start + i * elem_schema->width), static_cast(nullptr), base_ptr)); } return result; } else if constexpr (is_non_vector_container_v) { using ElemT = typename T::value_type; Array* array = (Array*)data; T result; if(array->size == 0) return result; const Schema* elem_schema = schema->parameters[0]; char* start = (char*)resolve_relptr_cpp(array->data, base_ptr); constexpr bool reverse = is_std_stack::value || is_std_forward_list::value; if constexpr (reverse) { for (size_t i = array->size; i > 0; --i) { auto elem = fromAnything(elem_schema, (void*)(start + (i-1) * elem_schema->width), static_cast(nullptr), base_ptr); if constexpr (is_std_stack::value) result.push(std::move(elem)); else result.push_front(std::move(elem)); } } else { for (size_t i = 0; i < array->size; ++i) { auto elem = fromAnything(elem_schema, (void*)(start + i * elem_schema->width), static_cast(nullptr), base_ptr); if constexpr (is_std_queue::value) result.push(std::move(elem)); else result.push_back(std::move(elem)); } } return result; } else if constexpr (is_std_tuple::value) { return fromTupleAnythingHelper( schema, data, std::make_index_sequence>{}, static_cast(nullptr), base_ptr ); } else if constexpr (is_std_pair::value) { return fromTupleAnythingHelper( schema, data, std::index_sequence<0, 1>{}, static_cast(nullptr), base_ptr ); } else if constexpr (is_std_optional::value) { using InnerT = typename T::value_type; uint8_t tag = *(const uint8_t*)data; if (tag == 0) { return std::nullopt; } return std::optional(fromAnything(schema->parameters[0], (const char*)data + schema->offsets[0], static_cast(nullptr), base_ptr)); } else if constexpr (mlc::is_mlc_tensor_v) { using ElemT = mlc::tensor_element_t; using StorageT = mlc::tensor_storage_t; constexpr int NDim = mlc::tensor_ndim_v; const Tensor* tensor = (const Tensor*)data; if (tensor->total_elements == 0) { int64_t zero_shape[NDim] = {}; return T(zero_shape); } const int64_t* shape = (const int64_t*)resolve_relptr_cpp(tensor->shape, base_ptr); StorageT* tdata = (StorageT*)resolve_relptr_cpp(tensor->data, base_ptr); return T(tdata, shape, tensor->total_elements); } else { // Primitives (int, double, float, etc.) // Record types are handled by generated overloads which are preferred // by overload resolution over this template. return *(T*)data; } } // ============================================================ // mpk_pack / mpk_unpack // ============================================================ template std::vector mpk_pack(const T& data, const std::string& schema_str) { const char* schema_ptr = schema_str.c_str(); Schema* schema = parse_schema_cpp(schema_ptr); void* voidstar = nullptr; char* msgpack_data = NULL; size_t msg_size = 0; try { voidstar = toAnything(schema, data); pack_with_schema_cpp(voidstar, schema, &msgpack_data, &msg_size); } catch (...) { if (voidstar) shfree_cpp(voidstar); free(msgpack_data); free_schema(schema); throw; } shfree_cpp(voidstar); std::vector result(msgpack_data, msgpack_data + msg_size); free(msgpack_data); free_schema(schema); return result; } template T mpk_unpack(const std::vector& packed_data, const std::string& schema_str) { const char* schema_ptr = schema_str.c_str(); Schema* schema = parse_schema_cpp(schema_ptr); void* voidstar = nullptr; int unpack_result = unpack_with_schema_cpp(packed_data.data(), packed_data.size(), schema, &voidstar); if (unpack_result != 0) { free_schema(schema); throw std::runtime_error("Unpacking failed"); } T x; try { x = fromAnything(schema, voidstar, static_cast(nullptr)); } catch (...) { free_schema(schema); shfree_cpp(voidstar); throw; } free_schema(schema); shfree_cpp(voidstar); return x; } #endif ================================================ FILE: data/lang/cpp/init.sh ================================================ #!/bin/bash set -e MORLOC_HOME="$1" BUILD_DIR="$2" SANITIZE_FLAGS="$3" INCLUDE_DIR="$MORLOC_HOME/include" LIB_DIR="$MORLOC_HOME/lib" # Install mlccpptypes if not present if [ ! -d "$INCLUDE_DIR/mlccpptypes" ]; then git clone https://github.com/morloclib/mlccpptypes "$INCLUDE_DIR/mlccpptypes" fi # Install headers cp "$BUILD_DIR/cppmorloc.hpp" "$INCLUDE_DIR/" cp "$BUILD_DIR/mlc_arrow.hpp" "$INCLUDE_DIR/" cp "$BUILD_DIR/mlc_tensor.hpp" "$INCLUDE_DIR/" # Install nanoarrow headers mkdir -p "$INCLUDE_DIR/nanoarrow" cp "$BUILD_DIR/nanoarrow.h" "$INCLUDE_DIR/nanoarrow/" # Compile nanoarrow.c gcc -c -O2 -fPIC $SANITIZE_FLAGS -I"$INCLUDE_DIR" -o "$BUILD_DIR/nanoarrow.o" "$BUILD_DIR/nanoarrow.c" # Compile cppmorloc.cpp g++ -c --std=c++17 -O2 $SANITIZE_FLAGS -I"$INCLUDE_DIR" -o "$BUILD_DIR/cppmorloc.o" "$BUILD_DIR/cppmorloc.cpp" # Archive into libcppmorloc.a ar rcs "$LIB_DIR/libcppmorloc.a" "$BUILD_DIR/cppmorloc.o" "$BUILD_DIR/nanoarrow.o" # Compile precompiled header cp "$BUILD_DIR/morloc_pch.hpp" "$INCLUDE_DIR/" g++ --std=c++17 -O2 $SANITIZE_FLAGS -I"$INCLUDE_DIR" -x c++-header "$INCLUDE_DIR/morloc_pch.hpp" -o "$INCLUDE_DIR/morloc_pch.hpp.gch" ================================================ FILE: data/lang/cpp/lang.yaml ================================================ # C++ language metadata for morloc compiler name: cpp extension: cpp aliases: ["c++"] is_compiled: true run_command: [] serial_type: "uint8_t*" cost: 0 ================================================ FILE: data/lang/cpp/mlc_arrow.hpp ================================================ #ifndef MLC_ARROW_HPP #define MLC_ARROW_HPP // mlc_arrow.hpp -- thin RAII wrapper around Arrow C Data Interface structs // for use in morloc C++ pools. Holds ArrowSchema + ArrowArray as a single // move-only value. The pool template dispatches arrow-hinted schemas to // arrow_to_shm / arrow_from_shm (in libmorloc.so) via this type. // // User code should include to build and read columns. #include "morloc.h" #include #include #include namespace mlc { class ArrowTable { public: // Construct from moved-in C Data Interface structs. // Takes ownership of release callbacks. ArrowTable(struct ArrowSchema schema, struct ArrowArray array) : schema_(schema), array_(array) { // Zero the source structs so the caller does not double-release memset(&schema, 0, sizeof(schema)); memset(&array, 0, sizeof(array)); } ~ArrowTable() { if (array_.release) array_.release(&array_); if (schema_.release) schema_.release(&schema_); } // Move-only ArrowTable(ArrowTable&& other) noexcept : schema_(other.schema_), array_(other.array_) { memset(&other.schema_, 0, sizeof(other.schema_)); memset(&other.array_, 0, sizeof(other.array_)); } ArrowTable& operator=(ArrowTable&& other) noexcept { if (this != &other) { if (array_.release) array_.release(&array_); if (schema_.release) schema_.release(&schema_); schema_ = other.schema_; array_ = other.array_; memset(&other.schema_, 0, sizeof(other.schema_)); memset(&other.array_, 0, sizeof(other.array_)); } return *this; } ArrowTable(const ArrowTable&) = delete; ArrowTable& operator=(const ArrowTable&) = delete; // Accessors (const -- arrow data is immutable) const struct ArrowSchema* schema() const { return &schema_; } const struct ArrowArray* array() const { return &array_; } int64_t n_columns() const { return schema_.n_children; } int64_t n_rows() const { return array_.length; } // Build from shared memory header (zero-copy import) static ArrowTable from_shm(const arrow_shm_header_t* hdr) { struct ArrowSchema as; struct ArrowArray aa; char* err = nullptr; arrow_from_shm(hdr, &as, &aa, &err); if (err) { std::string msg(err); free(err); throw std::runtime_error(msg); } return ArrowTable(std::move(as), std::move(aa)); } // Move table data to shared memory: copies buffers into a contiguous SHM // block, frees the original heap buffers, then repoints this table's // internal ArrowSchema/ArrowArray into the SHM block (zero-copy). // After this call the table is still usable but backed by SHM. // Returns relptr to the SHM block for use in packets. relptr_t move_to_shm() { // Step 1: copy all column data into contiguous SHM char* copy_err = nullptr; relptr_t rp = arrow_to_shm(&array_, &schema_, ©_err); if (copy_err) { std::string msg(copy_err); free(copy_err); throw std::runtime_error(msg); } // Step 2: release heap-backed structs (frees all original buffers) if (array_.release) array_.release(&array_); if (schema_.release) schema_.release(&schema_); memset(&schema_, 0, sizeof(schema_)); memset(&array_, 0, sizeof(array_)); // Step 3: resolve SHM pointer and rebuild structs pointing into it char* resolve_err = nullptr; void* abs = rel2abs(rp, &resolve_err); if (resolve_err) { std::string msg(resolve_err); free(resolve_err); throw std::runtime_error(msg); } char* shm_err = nullptr; arrow_from_shm((const arrow_shm_header_t*)abs, &schema_, &array_, &shm_err); if (shm_err) { std::string msg(shm_err); free(shm_err); throw std::runtime_error(msg); } return rp; } private: struct ArrowSchema schema_; struct ArrowArray array_; }; } // namespace mlc #endif // MLC_ARROW_HPP ================================================ FILE: data/lang/cpp/mlc_tensor.hpp ================================================ #ifndef MLC_TENSOR_HPP #define MLC_TENSOR_HPP // mlc_tensor.hpp -- dense N-dimensional tensor for morloc C++ pools. // Data is always contiguous row-major (C order). The Tensor struct in // schema.h defines the voidstar layout; this header provides the C++ // user-facing type that maps to it. #include "morloc.h" #include #include #include #include namespace mlc { // Storage type trait: maps bool to uint8_t so that tensor memory layout // matches the voidstar format (MORLOC_BOOL = 1 byte) regardless of // sizeof(bool) on the target platform. template struct tensor_storage { using type = T; }; template<> struct tensor_storage { using type = uint8_t; }; template using tensor_storage_t = typename tensor_storage::type; template class Tensor { using S = tensor_storage_t; public: // Construct with given shape, allocating data on the heap Tensor(const int64_t (&dims)[NDim]) : owns_data_(true) { for (int i = 0; i < NDim; i++) shape_[i] = dims[i]; total_ = 1; for (int i = 0; i < NDim; i++) total_ *= (size_t)shape_[i]; data_ = new S[total_](); } // Construct from initializer list of dims Tensor(std::initializer_list dims) : owns_data_(true) { if ((int)dims.size() != NDim) { throw std::runtime_error("Tensor dimension mismatch"); } int i = 0; for (auto d : dims) shape_[i++] = d; total_ = 1; for (i = 0; i < NDim; i++) total_ *= (size_t)shape_[i]; data_ = new S[total_](); } // Construct as a view over existing data (does not own) Tensor(S* data, const int64_t* shape, size_t total) : data_(data), total_(total), owns_data_(false) { for (int i = 0; i < NDim; i++) shape_[i] = shape[i]; } ~Tensor() { if (owns_data_ && data_) delete[] data_; } // Move-only Tensor(Tensor&& other) noexcept : data_(other.data_), total_(other.total_), owns_data_(other.owns_data_) { for (int i = 0; i < NDim; i++) shape_[i] = other.shape_[i]; other.data_ = nullptr; other.owns_data_ = false; } Tensor& operator=(Tensor&& other) noexcept { if (this != &other) { if (owns_data_ && data_) delete[] data_; data_ = other.data_; total_ = other.total_; owns_data_ = other.owns_data_; for (int i = 0; i < NDim; i++) shape_[i] = other.shape_[i]; other.data_ = nullptr; other.owns_data_ = false; } return *this; } Tensor(const Tensor&) = delete; Tensor& operator=(const Tensor&) = delete; // Accessors (S* for raw access; S == T for all types except bool) const S* data() const { return data_; } S* data() { return data_; } constexpr int ndim() const { return NDim; } const int64_t* shape() const { return shape_; } int64_t shape(int d) const { return shape_[d]; } size_t size() const { return total_; } // Linear access (returns S& which is uint8_t& for bool tensors; // implicit conversion to/from bool handles the difference) const S& operator[](size_t i) const { return data_[i]; } S& operator[](size_t i) { return data_[i]; } // 1D access template> const S& operator()(int64_t i) const { return data_[i]; } template> S& operator()(int64_t i) { return data_[i]; } // 2D access (row-major) template> const S& operator()(int64_t i, int64_t j) const { return data_[i * shape_[1] + j]; } template> S& operator()(int64_t i, int64_t j) { return data_[i * shape_[1] + j]; } // 3D access (row-major) template> const S& operator()(int64_t i, int64_t j, int64_t k) const { return data_[(i * shape_[1] + j) * shape_[2] + k]; } template> S& operator()(int64_t i, int64_t j, int64_t k) { return data_[(i * shape_[1] + j) * shape_[2] + k]; } private: S* data_ = nullptr; int64_t shape_[NDim] = {}; size_t total_ = 0; bool owns_data_ = false; }; // Convenience aliases template using Tensor1 = Tensor; template using Tensor2 = Tensor; template using Tensor3 = Tensor; template using Tensor4 = Tensor; template using Tensor5 = Tensor; // Type trait for detecting mlc::Tensor template struct is_mlc_tensor : std::false_type {}; template struct is_mlc_tensor> : std::true_type {}; template inline constexpr bool is_mlc_tensor_v = is_mlc_tensor::value; // Extract element type from Tensor template struct tensor_element; template struct tensor_element> { using type = T; }; template using tensor_element_t = typename tensor_element::type; // Extract ndim from Tensor template struct tensor_ndim; template struct tensor_ndim> { static constexpr int value = N; }; template inline constexpr int tensor_ndim_v = tensor_ndim::value; } // namespace mlc #endif // MLC_TENSOR_HPP ================================================ FILE: data/lang/cpp/morloc_pch.hpp ================================================ // Precompiled header for morloc C++ pools. // Compiled once during 'morloc init', reused for every pool compilation. #ifndef MORLOC_PCH_HPP #define MORLOC_PCH_HPP // STL containers #include #include #include #include #include #include #include // STL algorithms and utilities #include #include #include #include #include #include // Strings and I/O #include #include #include #include // C standard library #include #include #include #include #include #include // POSIX headers #include #include #include #include #include // Morloc runtime #include "morloc.h" #endif ================================================ FILE: data/lang/cpp/nanoarrow/nanoarrow.c ================================================ // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include #include #include #include #include #include #include "nanoarrow/nanoarrow.h" const char* ArrowNanoarrowVersion(void) { return NANOARROW_VERSION; } int ArrowNanoarrowVersionInt(void) { return NANOARROW_VERSION_INT; } ArrowErrorCode ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) { if (error == NULL) { return NANOARROW_OK; } memset(error->message, 0, sizeof(error->message)); va_list args; va_start(args, fmt); int chars_needed = vsnprintf(error->message, sizeof(error->message), fmt, args); va_end(args); if (chars_needed < 0) { return EINVAL; } else if (((size_t)chars_needed) >= sizeof(error->message)) { return ERANGE; } else { return NANOARROW_OK; } } void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) { layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY; layout->buffer_data_type[0] = NANOARROW_TYPE_BOOL; layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA; layout->buffer_data_type[1] = storage_type; layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_NONE; layout->buffer_data_type[2] = NANOARROW_TYPE_UNINITIALIZED; layout->element_size_bits[0] = 1; layout->element_size_bits[1] = 0; layout->element_size_bits[2] = 0; layout->child_size_elements = 0; switch (storage_type) { case NANOARROW_TYPE_UNINITIALIZED: case NANOARROW_TYPE_NA: case NANOARROW_TYPE_RUN_END_ENCODED: layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE; layout->buffer_data_type[0] = NANOARROW_TYPE_UNINITIALIZED; layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE; layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED; layout->element_size_bits[0] = 0; break; case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_MAP: layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; layout->element_size_bits[1] = 32; break; case NANOARROW_TYPE_LARGE_LIST: layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; layout->element_size_bits[1] = 64; break; case NANOARROW_TYPE_STRUCT: case NANOARROW_TYPE_FIXED_SIZE_LIST: layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE; layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED; break; case NANOARROW_TYPE_BOOL: layout->element_size_bits[1] = 1; break; case NANOARROW_TYPE_UINT8: case NANOARROW_TYPE_INT8: layout->element_size_bits[1] = 8; break; case NANOARROW_TYPE_UINT16: case NANOARROW_TYPE_INT16: case NANOARROW_TYPE_HALF_FLOAT: layout->element_size_bits[1] = 16; break; case NANOARROW_TYPE_UINT32: case NANOARROW_TYPE_INT32: case NANOARROW_TYPE_FLOAT: case NANOARROW_TYPE_DECIMAL32: layout->element_size_bits[1] = 32; break; case NANOARROW_TYPE_INTERVAL_MONTHS: layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; layout->element_size_bits[1] = 32; break; case NANOARROW_TYPE_UINT64: case NANOARROW_TYPE_INT64: case NANOARROW_TYPE_DOUBLE: case NANOARROW_TYPE_INTERVAL_DAY_TIME: case NANOARROW_TYPE_DECIMAL64: layout->element_size_bits[1] = 64; break; case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: layout->element_size_bits[1] = 128; break; case NANOARROW_TYPE_DECIMAL256: layout->element_size_bits[1] = 256; break; case NANOARROW_TYPE_FIXED_SIZE_BINARY: layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY; break; case NANOARROW_TYPE_DENSE_UNION: layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID; layout->buffer_data_type[0] = NANOARROW_TYPE_INT8; layout->element_size_bits[0] = 8; layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_UNION_OFFSET; layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; layout->element_size_bits[1] = 32; break; case NANOARROW_TYPE_SPARSE_UNION: layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID; layout->buffer_data_type[0] = NANOARROW_TYPE_INT8; layout->element_size_bits[0] = 8; layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE; layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED; break; case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_BINARY: layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; layout->element_size_bits[1] = 32; layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA; layout->buffer_data_type[2] = storage_type; break; case NANOARROW_TYPE_LARGE_STRING: layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; layout->element_size_bits[1] = 64; layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA; layout->buffer_data_type[2] = NANOARROW_TYPE_STRING; break; case NANOARROW_TYPE_LARGE_BINARY: layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; layout->element_size_bits[1] = 64; layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA; layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY; break; case NANOARROW_TYPE_BINARY_VIEW: layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA; layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY_VIEW; layout->element_size_bits[1] = 128; break; case NANOARROW_TYPE_STRING_VIEW: layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA; layout->buffer_data_type[1] = NANOARROW_TYPE_STRING_VIEW; layout->element_size_bits[1] = 128; break; case NANOARROW_TYPE_LIST_VIEW: layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_VIEW_OFFSET; layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; layout->element_size_bits[1] = 32; layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_SIZE; layout->buffer_data_type[2] = NANOARROW_TYPE_INT32; layout->element_size_bits[2] = 32; break; case NANOARROW_TYPE_LARGE_LIST_VIEW: layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_VIEW_OFFSET; layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; layout->element_size_bits[1] = 64; layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_SIZE; layout->buffer_data_type[2] = NANOARROW_TYPE_INT64; layout->element_size_bits[2] = 64; break; default: break; } } void* ArrowMalloc(int64_t size) { return malloc(size); } void* ArrowRealloc(void* ptr, int64_t size) { return realloc(ptr, size); } void ArrowFree(void* ptr) { free(ptr); } static uint8_t* ArrowBufferAllocatorMallocReallocate( struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size, int64_t new_size) { NANOARROW_UNUSED(allocator); NANOARROW_UNUSED(old_size); return (uint8_t*)ArrowRealloc(ptr, new_size); } static void ArrowBufferAllocatorMallocFree(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size) { NANOARROW_UNUSED(allocator); NANOARROW_UNUSED(size); if (ptr != NULL) { ArrowFree(ptr); } } static struct ArrowBufferAllocator ArrowBufferAllocatorMalloc = { &ArrowBufferAllocatorMallocReallocate, &ArrowBufferAllocatorMallocFree, NULL}; struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void) { return ArrowBufferAllocatorMalloc; } static uint8_t* ArrowBufferDeallocatorReallocate(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size, int64_t new_size) { NANOARROW_UNUSED(new_size); // Attempting to reallocate a buffer with a custom deallocator is // a programming error. In debug mode, crash here. #if defined(NANOARROW_DEBUG) NANOARROW_PRINT_AND_DIE(ENOMEM, "It is an error to reallocate a buffer whose allocator is " "ArrowBufferDeallocator()"); #endif // In release mode, ensure the the deallocator is called exactly // once using the pointer it was given and return NULL, which // will trigger the caller to return ENOMEM. allocator->free(allocator, ptr, old_size); *allocator = ArrowBufferAllocatorDefault(); return NULL; } struct ArrowBufferAllocator ArrowBufferDeallocator( void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size), void* private_data) { struct ArrowBufferAllocator allocator; allocator.reallocate = &ArrowBufferDeallocatorReallocate; allocator.free = custom_free; allocator.private_data = private_data; return allocator; } static const int kInt32DecimalDigits = 9; static const uint64_t kUInt32PowersOfTen[] = { 1ULL, 10ULL, 100ULL, 1000ULL, 10000ULL, 100000ULL, 1000000ULL, 10000000ULL, 100000000ULL, 1000000000ULL}; // Adapted from Arrow C++ to use 32-bit words for better C portability // https://github.com/apache/arrow/blob/cd3321b28b0c9703e5d7105d6146c1270bbadd7f/cpp/src/arrow/util/decimal.cc#L524-L544 static void ShiftAndAdd(struct ArrowStringView value, uint32_t* out, int64_t out_size) { // We use strtoll for parsing, which needs input that is null-terminated char chunk_string[16]; for (int64_t posn = 0; posn < value.size_bytes;) { int64_t remaining = value.size_bytes - posn; int64_t group_size; if (remaining > kInt32DecimalDigits) { group_size = kInt32DecimalDigits; } else { group_size = remaining; } const uint64_t multiple = kUInt32PowersOfTen[group_size]; memcpy(chunk_string, value.data + posn, group_size); chunk_string[group_size] = '\0'; uint32_t chunk = (uint32_t)strtoll(chunk_string, NULL, 10); for (int64_t i = 0; i < out_size; i++) { uint64_t tmp = out[i]; tmp *= multiple; tmp += chunk; out[i] = (uint32_t)(tmp & 0xFFFFFFFFULL); chunk = (uint32_t)(tmp >> 32); } posn += group_size; } } ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, struct ArrowStringView value) { // Check for sign int is_negative = value.data[0] == '-'; int has_sign = is_negative || value.data[0] == '+'; value.data += has_sign; value.size_bytes -= has_sign; // Check all characters are digits that are not the negative sign for (int64_t i = 0; i < value.size_bytes; i++) { char c = value.data[i]; if (c < '0' || c > '9') { return EINVAL; } } // Skip over leading 0s int64_t n_leading_zeroes = 0; for (int64_t i = 0; i < value.size_bytes; i++) { if (value.data[i] == '0') { n_leading_zeroes++; } else { break; } } value.data += n_leading_zeroes; value.size_bytes -= n_leading_zeroes; // Use 32-bit words for portability uint32_t words32[8]; memset(words32, 0, sizeof(words32)); int n_words32 = decimal->n_words > 0 ? decimal->n_words * 2 : 1; NANOARROW_DCHECK(n_words32 <= 8); memset(words32, 0, sizeof(words32)); ShiftAndAdd(value, words32, n_words32); if (_ArrowIsLittleEndian() || n_words32 == 1) { memcpy(decimal->words, words32, sizeof(uint32_t) * n_words32); } else { uint64_t lo; uint64_t hi; for (int i = 0; i < decimal->n_words; i++) { lo = (uint64_t)words32[i * 2]; hi = (uint64_t)words32[i * 2 + 1] << 32; decimal->words[decimal->n_words - i - 1] = lo | hi; } } if (is_negative) { ArrowDecimalNegate(decimal); } return NANOARROW_OK; } // Adapted from Arrow C++ for C // https://github.com/apache/arrow/blob/cd3321b28b0c9703e5d7105d6146c1270bbadd7f/cpp/src/arrow/util/decimal.cc#L365 ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decimal, struct ArrowBuffer* buffer) { NANOARROW_DCHECK(decimal->n_words == 0 || decimal->n_words == 1 || decimal->n_words == 2 || decimal->n_words == 4); // For the 32-bit case, just use snprintf() if (decimal->n_words == 0) { int32_t value; memcpy(&value, decimal->words, sizeof(int32_t)); NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, 16)); int n_chars = snprintf((char*)buffer->data + buffer->size_bytes, (buffer->capacity_bytes - buffer->size_bytes), "%d", value); if (n_chars <= 0) { return EINVAL; } buffer->size_bytes += n_chars; return NANOARROW_OK; } int is_negative = ArrowDecimalSign(decimal) < 0; uint64_t words_little_endian[4]; if (decimal->n_words == 0) { words_little_endian[0] = 0; memcpy(words_little_endian, decimal->words, sizeof(uint32_t)); } else if (decimal->low_word_index == 0) { memcpy(words_little_endian, decimal->words, decimal->n_words * sizeof(uint64_t)); } else { for (int i = 0; i < decimal->n_words; i++) { words_little_endian[i] = decimal->words[decimal->n_words - i - 1]; } } // We've already made a copy, so negate that if needed if (is_negative) { if (decimal->n_words == 0) { uint32_t elem = (uint32_t)words_little_endian[0]; elem = ~elem + 1; words_little_endian[0] = (int32_t)elem; } else { uint64_t carry = 1; for (int i = 0; i < decimal->n_words; i++) { uint64_t elem = words_little_endian[i]; elem = ~elem + carry; carry &= (elem == 0); words_little_endian[i] = elem; } } } // Find the most significant word that is non-zero int most_significant_elem_idx = -1; if (decimal->n_words == 0) { if (words_little_endian[0] != 0) { most_significant_elem_idx = 0; } } else { for (int i = decimal->n_words - 1; i >= 0; i--) { if (words_little_endian[i] != 0) { most_significant_elem_idx = i; break; } } } // If they are all zero, the output is just '0' if (most_significant_elem_idx == -1) { NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(buffer, '0')); return NANOARROW_OK; } // Define segments such that each segment represents 9 digits with the // least significant group of 9 digits first. For example, if the input represents // 9876543210123456789, then segments will be [123456789, 876543210, 9]. // We handle at most a signed 256 bit integer, whose maximum value occupies 77 // characters. Thus, we need at most 9 segments. const uint32_t k1e9 = 1000000000U; int num_segments = 0; uint32_t segments[9]; memset(segments, 0, sizeof(segments)); uint64_t* most_significant_elem = words_little_endian + most_significant_elem_idx; do { // Compute remainder = words_little_endian % 1e9 and words_little_endian = // words_little_endian / 1e9. uint32_t remainder = 0; uint64_t* elem = most_significant_elem; do { // Compute dividend = (remainder << 32) | *elem (a virtual 96-bit integer); // *elem = dividend / 1e9; // remainder = dividend % 1e9. uint32_t hi = (uint32_t)(*elem >> 32); uint32_t lo = (uint32_t)(*elem & 0xFFFFFFFFULL); uint64_t dividend_hi = ((uint64_t)(remainder) << 32) | hi; uint64_t quotient_hi = dividend_hi / k1e9; remainder = (uint32_t)(dividend_hi % k1e9); uint64_t dividend_lo = ((uint64_t)(remainder) << 32) | lo; uint64_t quotient_lo = dividend_lo / k1e9; remainder = (uint32_t)(dividend_lo % k1e9); *elem = (quotient_hi << 32) | quotient_lo; } while (elem-- != words_little_endian); segments[num_segments++] = remainder; } while (*most_significant_elem != 0 || most_significant_elem-- != words_little_endian); // We know our output has no more than 9 digits per segment, plus a negative sign, // plus any further digits between our output of 9 digits plus enough // extra characters to ensure that snprintf() with n = 21 (maximum length of %lu // including a the null terminator) is bounded properly. NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, num_segments * 9 + 1 + 21 - 9)); if (is_negative) { buffer->data[buffer->size_bytes++] = '-'; } // The most significant segment should have no leading zeroes int n_chars = snprintf((char*)buffer->data + buffer->size_bytes, 21, "%lu", (unsigned long)segments[num_segments - 1]); // Ensure that an encoding error from snprintf() does not result // in an out-of-bounds access. if (n_chars < 0) { return ERANGE; } buffer->size_bytes += n_chars; // Subsequent output needs to be left-padded with zeroes such that each segment // takes up exactly 9 digits. for (int i = num_segments - 2; i >= 0; i--) { int n_chars = snprintf((char*)buffer->data + buffer->size_bytes, 21, "%09lu", (unsigned long)segments[i]); buffer->size_bytes += n_chars; NANOARROW_DCHECK(buffer->size_bytes <= buffer->capacity_bytes); } return NANOARROW_OK; } ArrowErrorCode ArrowDecimalAppendStringToBuffer(const struct ArrowDecimal* decimal, struct ArrowBuffer* buffer) { int64_t buffer_size = buffer->size_bytes; NANOARROW_RETURN_NOT_OK(ArrowDecimalAppendDigitsToBuffer(decimal, buffer)); int64_t digits_size = buffer->size_bytes - buffer_size; if (decimal->scale <= 0) { // e.g., digits are -12345 and scale is -2 -> -1234500 // Just add zeros to the end for (int i = decimal->scale; i < 0; i++) { NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(buffer, '0')); } return NANOARROW_OK; } int is_negative = buffer->data[0] == '-'; int64_t num_digits = digits_size - is_negative; if (num_digits <= decimal->scale) { // e.g., digits are -12345 and scale is 6 -> -0.012345 // Insert "0." between the (maybe) negative sign and the digits int64_t num_zeros_after_decimal = decimal->scale - num_digits; NANOARROW_RETURN_NOT_OK( ArrowBufferResize(buffer, buffer->size_bytes + num_zeros_after_decimal + 2, 0)); uint8_t* digits_start = buffer->data + is_negative; memmove(digits_start + num_zeros_after_decimal + 2, digits_start, num_digits); *digits_start++ = '0'; *digits_start++ = '.'; for (int i = 0; i < num_zeros_after_decimal; i++) { *digits_start++ = '0'; } } else { // e.g., digits are -12345 and scale is 4 -> -1.2345 // Insert a decimal point before scale digits of output NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes + 1, 0)); uint8_t* decimal_point_to_be = buffer->data + buffer->size_bytes - 1 - decimal->scale; memmove(decimal_point_to_be + 1, decimal_point_to_be, decimal->scale); *decimal_point_to_be = '.'; } return NANOARROW_OK; } // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include #include #include #include #include #include "nanoarrow/nanoarrow.h" static void ArrowSchemaReleaseInternal(struct ArrowSchema* schema) { if (schema->format != NULL) ArrowFree((void*)schema->format); if (schema->name != NULL) ArrowFree((void*)schema->name); if (schema->metadata != NULL) ArrowFree((void*)schema->metadata); // This object owns the memory for all the children, but those // children may have been generated elsewhere and might have // their own release() callback. if (schema->children != NULL) { for (int64_t i = 0; i < schema->n_children; i++) { if (schema->children[i] != NULL) { if (schema->children[i]->release != NULL) { ArrowSchemaRelease(schema->children[i]); } ArrowFree(schema->children[i]); } } ArrowFree(schema->children); } // This object owns the memory for the dictionary but it // may have been generated somewhere else and have its own // release() callback. if (schema->dictionary != NULL) { if (schema->dictionary->release != NULL) { ArrowSchemaRelease(schema->dictionary); } ArrowFree(schema->dictionary); } // private data not currently used if (schema->private_data != NULL) { ArrowFree(schema->private_data); } schema->release = NULL; } static const char* ArrowSchemaFormatTemplate(enum ArrowType type) { switch (type) { case NANOARROW_TYPE_UNINITIALIZED: return NULL; case NANOARROW_TYPE_NA: return "n"; case NANOARROW_TYPE_BOOL: return "b"; case NANOARROW_TYPE_UINT8: return "C"; case NANOARROW_TYPE_INT8: return "c"; case NANOARROW_TYPE_UINT16: return "S"; case NANOARROW_TYPE_INT16: return "s"; case NANOARROW_TYPE_UINT32: return "I"; case NANOARROW_TYPE_INT32: return "i"; case NANOARROW_TYPE_UINT64: return "L"; case NANOARROW_TYPE_INT64: return "l"; case NANOARROW_TYPE_HALF_FLOAT: return "e"; case NANOARROW_TYPE_FLOAT: return "f"; case NANOARROW_TYPE_DOUBLE: return "g"; case NANOARROW_TYPE_STRING: return "u"; case NANOARROW_TYPE_LARGE_STRING: return "U"; case NANOARROW_TYPE_STRING_VIEW: return "vu"; case NANOARROW_TYPE_BINARY: return "z"; case NANOARROW_TYPE_BINARY_VIEW: return "vz"; case NANOARROW_TYPE_LARGE_BINARY: return "Z"; case NANOARROW_TYPE_DATE32: return "tdD"; case NANOARROW_TYPE_DATE64: return "tdm"; case NANOARROW_TYPE_INTERVAL_MONTHS: return "tiM"; case NANOARROW_TYPE_INTERVAL_DAY_TIME: return "tiD"; case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: return "tin"; case NANOARROW_TYPE_LIST: return "+l"; case NANOARROW_TYPE_LARGE_LIST: return "+L"; case NANOARROW_TYPE_LIST_VIEW: return "+vl"; case NANOARROW_TYPE_LARGE_LIST_VIEW: return "+vL"; case NANOARROW_TYPE_STRUCT: return "+s"; case NANOARROW_TYPE_MAP: return "+m"; case NANOARROW_TYPE_RUN_END_ENCODED: return "+r"; default: return NULL; } } static int ArrowSchemaInitChildrenIfNeeded(struct ArrowSchema* schema, enum ArrowType type) { switch (type) { case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_FIXED_SIZE_LIST: case NANOARROW_TYPE_LIST_VIEW: case NANOARROW_TYPE_LARGE_LIST_VIEW: NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 1)); ArrowSchemaInit(schema->children[0]); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "item")); break; case NANOARROW_TYPE_MAP: NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 1)); NANOARROW_RETURN_NOT_OK( ArrowSchemaInitFromType(schema->children[0], NANOARROW_TYPE_STRUCT)); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "entries")); schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema->children[0], 2)); ArrowSchemaInit(schema->children[0]->children[0]); ArrowSchemaInit(schema->children[0]->children[1]); NANOARROW_RETURN_NOT_OK( ArrowSchemaSetName(schema->children[0]->children[0], "key")); schema->children[0]->children[0]->flags &= ~ARROW_FLAG_NULLABLE; NANOARROW_RETURN_NOT_OK( ArrowSchemaSetName(schema->children[0]->children[1], "value")); break; case NANOARROW_TYPE_RUN_END_ENCODED: NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 2)); ArrowSchemaInit(schema->children[0]); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "run_ends")); schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; ArrowSchemaInit(schema->children[1]); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[1], "values")); default: break; } return NANOARROW_OK; } void ArrowSchemaInit(struct ArrowSchema* schema) { schema->format = NULL; schema->name = NULL; schema->metadata = NULL; schema->flags = ARROW_FLAG_NULLABLE; schema->n_children = 0; schema->children = NULL; schema->dictionary = NULL; schema->private_data = NULL; schema->release = &ArrowSchemaReleaseInternal; } ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, enum ArrowType type) { // We don't allocate the dictionary because it has to be nullptr // for non-dictionary-encoded arrays. // Set the format to a valid format string for type const char* template_format = ArrowSchemaFormatTemplate(type); // If type isn't recognized and not explicitly unset if (template_format == NULL && type != NANOARROW_TYPE_UNINITIALIZED) { return EINVAL; } NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, template_format)); // For types with an umabiguous child structure, allocate children return ArrowSchemaInitChildrenIfNeeded(schema, type); } ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, int64_t n_children) { NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRUCT)); NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, n_children)); for (int64_t i = 0; i < n_children; i++) { ArrowSchemaInit(schema->children[i]); } return NANOARROW_OK; } ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowType type) { ArrowSchemaInit(schema); int result = ArrowSchemaSetType(schema, type); if (result != NANOARROW_OK) { ArrowSchemaRelease(schema); return result; } return NANOARROW_OK; } ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema, enum ArrowType type, int32_t fixed_size) { if (fixed_size <= 0) { return EINVAL; } char buffer[64]; int n_chars; switch (type) { case NANOARROW_TYPE_FIXED_SIZE_BINARY: n_chars = snprintf(buffer, sizeof(buffer), "w:%" PRId32, fixed_size); break; case NANOARROW_TYPE_FIXED_SIZE_LIST: n_chars = snprintf(buffer, sizeof(buffer), "+w:%" PRId32, fixed_size); break; default: return EINVAL; } if (((size_t)n_chars) >= sizeof(buffer) || n_chars < 0) { return ERANGE; } buffer[n_chars] = '\0'; NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, buffer)); if (type == NANOARROW_TYPE_FIXED_SIZE_LIST) { NANOARROW_RETURN_NOT_OK(ArrowSchemaInitChildrenIfNeeded(schema, type)); } return NANOARROW_OK; } ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type, int32_t decimal_precision, int32_t decimal_scale) { if (decimal_precision <= 0) { return EINVAL; } char buffer[64]; int n_chars; switch (type) { case NANOARROW_TYPE_DECIMAL32: if (decimal_precision > 9) { return EINVAL; } n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,32", decimal_precision, decimal_scale); break; case NANOARROW_TYPE_DECIMAL64: if (decimal_precision > 18) { return EINVAL; } n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,64", decimal_precision, decimal_scale); break; case NANOARROW_TYPE_DECIMAL128: if (decimal_precision > 38) { return EINVAL; } n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d", decimal_precision, decimal_scale); break; case NANOARROW_TYPE_DECIMAL256: if (decimal_precision > 76) { return EINVAL; } n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,256", decimal_precision, decimal_scale); break; default: return EINVAL; } if (((size_t)n_chars) >= sizeof(buffer) || n_chars < 0) { return ERANGE; } buffer[n_chars] = '\0'; return ArrowSchemaSetFormat(schema, buffer); } ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema, enum ArrowType run_end_type) { switch (run_end_type) { case NANOARROW_TYPE_INT16: case NANOARROW_TYPE_INT32: case NANOARROW_TYPE_INT64: break; default: return EINVAL; } NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat( schema, ArrowSchemaFormatTemplate(NANOARROW_TYPE_RUN_END_ENCODED))); NANOARROW_RETURN_NOT_OK( ArrowSchemaInitChildrenIfNeeded(schema, NANOARROW_TYPE_RUN_END_ENCODED)); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema->children[0], run_end_type)); NANOARROW_RETURN_NOT_OK( ArrowSchemaSetType(schema->children[1], NANOARROW_TYPE_UNINITIALIZED)); return NANOARROW_OK; } static const char* ArrowTimeUnitFormatString(enum ArrowTimeUnit time_unit) { switch (time_unit) { case NANOARROW_TIME_UNIT_SECOND: return "s"; case NANOARROW_TIME_UNIT_MILLI: return "m"; case NANOARROW_TIME_UNIT_MICRO: return "u"; case NANOARROW_TIME_UNIT_NANO: return "n"; default: return NULL; } } ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, enum ArrowType type, enum ArrowTimeUnit time_unit, const char* timezone) { const char* time_unit_str = ArrowTimeUnitFormatString(time_unit); if (time_unit_str == NULL) { return EINVAL; } char buffer[128]; int n_chars; switch (type) { case NANOARROW_TYPE_TIME32: if (timezone != NULL) { return EINVAL; } switch (time_unit) { case NANOARROW_TIME_UNIT_MICRO: case NANOARROW_TIME_UNIT_NANO: return EINVAL; default: break; } n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str); break; case NANOARROW_TYPE_TIME64: if (timezone != NULL) { return EINVAL; } switch (time_unit) { case NANOARROW_TIME_UNIT_SECOND: case NANOARROW_TIME_UNIT_MILLI: return EINVAL; default: break; } n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str); break; case NANOARROW_TYPE_TIMESTAMP: if (timezone == NULL) { timezone = ""; } n_chars = snprintf(buffer, sizeof(buffer), "ts%s:%s", time_unit_str, timezone); break; case NANOARROW_TYPE_DURATION: if (timezone != NULL) { return EINVAL; } n_chars = snprintf(buffer, sizeof(buffer), "tD%s", time_unit_str); break; default: return EINVAL; } if (((size_t)n_chars) >= sizeof(buffer) || n_chars < 0) { return ERANGE; } buffer[n_chars] = '\0'; return ArrowSchemaSetFormat(schema, buffer); } ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowType type, int64_t n_children) { if (n_children < 0 || n_children > 127) { return EINVAL; } // Max valid size would be +ud:0,1,...126 = 401 characters + null terminator char format_out[512]; int64_t format_out_size = 512; memset(format_out, 0, format_out_size); int n_chars; char* format_cursor = format_out; switch (type) { case NANOARROW_TYPE_SPARSE_UNION: n_chars = snprintf(format_cursor, format_out_size, "+us:"); format_cursor += n_chars; format_out_size -= n_chars; break; case NANOARROW_TYPE_DENSE_UNION: n_chars = snprintf(format_cursor, format_out_size, "+ud:"); format_cursor += n_chars; format_out_size -= n_chars; break; default: return EINVAL; } // Ensure that an encoding error from snprintf() does not result // in an out-of-bounds access. if (n_chars < 0) { return ERANGE; } if (n_children > 0) { n_chars = snprintf(format_cursor, format_out_size, "0"); format_cursor += n_chars; format_out_size -= n_chars; for (int64_t i = 1; i < n_children; i++) { n_chars = snprintf(format_cursor, format_out_size, ",%" PRId64, i); format_cursor += n_chars; format_out_size -= n_chars; } } // Ensure that an encoding error from snprintf() does not result // in an out-of-bounds access. if (n_chars < 0) { return ERANGE; } NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, format_out)); NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, n_children)); for (int64_t i = 0; i < n_children; i++) { ArrowSchemaInit(schema->children[i]); } return NANOARROW_OK; } ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format) { if (schema->format != NULL) { ArrowFree((void*)schema->format); } if (format != NULL) { size_t format_size = strlen(format) + 1; schema->format = (const char*)ArrowMalloc(format_size); if (schema->format == NULL) { return ENOMEM; } memcpy((void*)schema->format, format, format_size); } else { schema->format = NULL; } return NANOARROW_OK; } ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name) { if (schema->name != NULL) { ArrowFree((void*)schema->name); } if (name != NULL) { size_t name_size = strlen(name) + 1; schema->name = (const char*)ArrowMalloc(name_size); if (schema->name == NULL) { return ENOMEM; } memcpy((void*)schema->name, name, name_size); } else { schema->name = NULL; } return NANOARROW_OK; } ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata) { if (schema->metadata != NULL) { ArrowFree((void*)schema->metadata); } if (metadata != NULL) { size_t metadata_size = ArrowMetadataSizeOf(metadata); schema->metadata = (const char*)ArrowMalloc(metadata_size); if (schema->metadata == NULL) { return ENOMEM; } memcpy((void*)schema->metadata, metadata, metadata_size); } else { schema->metadata = NULL; } return NANOARROW_OK; } ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema, int64_t n_children) { if (schema->children != NULL) { return EEXIST; } if (n_children > 0) { schema->children = (struct ArrowSchema**)ArrowMalloc(n_children * sizeof(struct ArrowSchema*)); if (schema->children == NULL) { return ENOMEM; } schema->n_children = n_children; memset(schema->children, 0, n_children * sizeof(struct ArrowSchema*)); for (int64_t i = 0; i < n_children; i++) { schema->children[i] = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema)); if (schema->children[i] == NULL) { return ENOMEM; } schema->children[i]->release = NULL; } } return NANOARROW_OK; } ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema) { if (schema->dictionary != NULL) { return EEXIST; } schema->dictionary = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema)); if (schema->dictionary == NULL) { return ENOMEM; } schema->dictionary->release = NULL; return NANOARROW_OK; } ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema, struct ArrowSchema* schema_out) { ArrowSchemaInit(schema_out); int result = ArrowSchemaSetFormat(schema_out, schema->format); if (result != NANOARROW_OK) { ArrowSchemaRelease(schema_out); return result; } schema_out->flags = schema->flags; result = ArrowSchemaSetName(schema_out, schema->name); if (result != NANOARROW_OK) { ArrowSchemaRelease(schema_out); return result; } result = ArrowSchemaSetMetadata(schema_out, schema->metadata); if (result != NANOARROW_OK) { ArrowSchemaRelease(schema_out); return result; } result = ArrowSchemaAllocateChildren(schema_out, schema->n_children); if (result != NANOARROW_OK) { ArrowSchemaRelease(schema_out); return result; } for (int64_t i = 0; i < schema->n_children; i++) { result = ArrowSchemaDeepCopy(schema->children[i], schema_out->children[i]); if (result != NANOARROW_OK) { ArrowSchemaRelease(schema_out); return result; } } if (schema->dictionary != NULL) { result = ArrowSchemaAllocateDictionary(schema_out); if (result != NANOARROW_OK) { ArrowSchemaRelease(schema_out); return result; } result = ArrowSchemaDeepCopy(schema->dictionary, schema_out->dictionary); if (result != NANOARROW_OK) { ArrowSchemaRelease(schema_out); return result; } } return NANOARROW_OK; } static void ArrowSchemaViewSetPrimitive(struct ArrowSchemaView* schema_view, enum ArrowType type) { schema_view->type = type; schema_view->storage_type = type; } static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view, const char* format, const char** format_end_out, struct ArrowError* error) { *format_end_out = format; // needed for decimal parsing const char* parse_start; char* parse_end; switch (format[0]) { case 'n': schema_view->type = NANOARROW_TYPE_NA; schema_view->storage_type = NANOARROW_TYPE_NA; *format_end_out = format + 1; return NANOARROW_OK; case 'b': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_BOOL); *format_end_out = format + 1; return NANOARROW_OK; case 'c': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT8); *format_end_out = format + 1; return NANOARROW_OK; case 'C': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT8); *format_end_out = format + 1; return NANOARROW_OK; case 's': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT16); *format_end_out = format + 1; return NANOARROW_OK; case 'S': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT16); *format_end_out = format + 1; return NANOARROW_OK; case 'i': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32); *format_end_out = format + 1; return NANOARROW_OK; case 'I': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT32); *format_end_out = format + 1; return NANOARROW_OK; case 'l': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); *format_end_out = format + 1; return NANOARROW_OK; case 'L': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT64); *format_end_out = format + 1; return NANOARROW_OK; case 'e': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_HALF_FLOAT); *format_end_out = format + 1; return NANOARROW_OK; case 'f': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_FLOAT); *format_end_out = format + 1; return NANOARROW_OK; case 'g': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DOUBLE); *format_end_out = format + 1; return NANOARROW_OK; // decimal case 'd': if (format[1] != ':' || format[2] == '\0') { ArrowErrorSet(error, "Expected ':precision,scale[,bitwidth]' following 'd'"); return EINVAL; } parse_start = format + 2; schema_view->decimal_precision = (int32_t)strtol(parse_start, &parse_end, 10); if (parse_end == parse_start || parse_end[0] != ',') { ArrowErrorSet(error, "Expected 'precision,scale[,bitwidth]' following 'd:'"); return EINVAL; } parse_start = parse_end + 1; schema_view->decimal_scale = (int32_t)strtol(parse_start, &parse_end, 10); if (parse_end == parse_start) { ArrowErrorSet(error, "Expected 'scale[,bitwidth]' following 'd:precision,'"); return EINVAL; } else if (parse_end[0] != ',') { schema_view->decimal_bitwidth = 128; } else { parse_start = parse_end + 1; schema_view->decimal_bitwidth = (int32_t)strtol(parse_start, &parse_end, 10); if (parse_start == parse_end) { ArrowErrorSet(error, "Expected precision following 'd:precision,scale,'"); return EINVAL; } } *format_end_out = parse_end; switch (schema_view->decimal_bitwidth) { case 32: ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL32); return NANOARROW_OK; case 64: ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL64); return NANOARROW_OK; case 128: ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL128); return NANOARROW_OK; case 256: ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL256); return NANOARROW_OK; default: ArrowErrorSet(error, "Expected decimal bitwidth of 128 or 256 but found %" PRId32, schema_view->decimal_bitwidth); return EINVAL; } // validity + data case 'w': schema_view->type = NANOARROW_TYPE_FIXED_SIZE_BINARY; schema_view->storage_type = NANOARROW_TYPE_FIXED_SIZE_BINARY; if (format[1] != ':' || format[2] == '\0') { ArrowErrorSet(error, "Expected ':' following 'w'"); return EINVAL; } schema_view->fixed_size = (int32_t)strtol(format + 2, (char**)format_end_out, 10); return NANOARROW_OK; // validity + offset + data case 'z': schema_view->type = NANOARROW_TYPE_BINARY; schema_view->storage_type = NANOARROW_TYPE_BINARY; *format_end_out = format + 1; return NANOARROW_OK; case 'u': schema_view->type = NANOARROW_TYPE_STRING; schema_view->storage_type = NANOARROW_TYPE_STRING; *format_end_out = format + 1; return NANOARROW_OK; // validity + large_offset + data case 'Z': schema_view->type = NANOARROW_TYPE_LARGE_BINARY; schema_view->storage_type = NANOARROW_TYPE_LARGE_BINARY; *format_end_out = format + 1; return NANOARROW_OK; case 'U': schema_view->type = NANOARROW_TYPE_LARGE_STRING; schema_view->storage_type = NANOARROW_TYPE_LARGE_STRING; *format_end_out = format + 1; return NANOARROW_OK; // nested types case '+': switch (format[1]) { // list has validity + offset or offset case 'l': schema_view->storage_type = NANOARROW_TYPE_LIST; schema_view->type = NANOARROW_TYPE_LIST; *format_end_out = format + 2; return NANOARROW_OK; // large list has validity + large_offset or large_offset case 'L': schema_view->storage_type = NANOARROW_TYPE_LARGE_LIST; schema_view->type = NANOARROW_TYPE_LARGE_LIST; *format_end_out = format + 2; return NANOARROW_OK; // run end encoded has no buffer at all case 'r': schema_view->storage_type = NANOARROW_TYPE_RUN_END_ENCODED; schema_view->type = NANOARROW_TYPE_RUN_END_ENCODED; *format_end_out = format + 2; return NANOARROW_OK; // just validity buffer case 'w': if (format[2] != ':' || format[3] == '\0') { ArrowErrorSet(error, "Expected ':' following '+w'"); return EINVAL; } schema_view->storage_type = NANOARROW_TYPE_FIXED_SIZE_LIST; schema_view->type = NANOARROW_TYPE_FIXED_SIZE_LIST; schema_view->fixed_size = (int32_t)strtol(format + 3, (char**)format_end_out, 10); return NANOARROW_OK; case 's': schema_view->storage_type = NANOARROW_TYPE_STRUCT; schema_view->type = NANOARROW_TYPE_STRUCT; *format_end_out = format + 2; return NANOARROW_OK; case 'm': schema_view->storage_type = NANOARROW_TYPE_MAP; schema_view->type = NANOARROW_TYPE_MAP; *format_end_out = format + 2; return NANOARROW_OK; // unions case 'u': switch (format[2]) { case 'd': schema_view->storage_type = NANOARROW_TYPE_DENSE_UNION; schema_view->type = NANOARROW_TYPE_DENSE_UNION; break; case 's': schema_view->storage_type = NANOARROW_TYPE_SPARSE_UNION; schema_view->type = NANOARROW_TYPE_SPARSE_UNION; break; default: ArrowErrorSet(error, "Expected union format string +us: or " "+ud: but found '%s'", format); return EINVAL; } if (format[3] == ':') { schema_view->union_type_ids = format + 4; int64_t n_type_ids = _ArrowParseUnionTypeIds(schema_view->union_type_ids, NULL); if (n_type_ids != schema_view->schema->n_children) { ArrowErrorSet(error, "Expected union type_ids parameter to be a comma-separated " "list of %" PRId64 " values between 0 and 127 but found '%s'", schema_view->schema->n_children, schema_view->union_type_ids); return EINVAL; } *format_end_out = format + strlen(format); return NANOARROW_OK; } else { ArrowErrorSet(error, "Expected union format string +us: or +ud: " "but found '%s'", format); return EINVAL; } // views case 'v': switch (format[2]) { case 'l': schema_view->storage_type = NANOARROW_TYPE_LIST_VIEW; schema_view->type = NANOARROW_TYPE_LIST_VIEW; *format_end_out = format + 3; return NANOARROW_OK; case 'L': schema_view->storage_type = NANOARROW_TYPE_LARGE_LIST_VIEW; schema_view->type = NANOARROW_TYPE_LARGE_LIST_VIEW; *format_end_out = format + 3; return NANOARROW_OK; default: ArrowErrorSet( error, "Expected view format string +vl or +vL but found '%s'", format); return EINVAL; } default: ArrowErrorSet(error, "Expected nested type format string but found '%s'", format); return EINVAL; } // date/time types case 't': switch (format[1]) { // date case 'd': switch (format[2]) { case 'D': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32); schema_view->type = NANOARROW_TYPE_DATE32; *format_end_out = format + 3; return NANOARROW_OK; case 'm': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); schema_view->type = NANOARROW_TYPE_DATE64; *format_end_out = format + 3; return NANOARROW_OK; default: ArrowErrorSet(error, "Expected 'D' or 'm' following 'td' but found '%s'", format + 2); return EINVAL; } // time of day case 't': switch (format[2]) { case 's': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32); schema_view->type = NANOARROW_TYPE_TIME32; schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND; *format_end_out = format + 3; return NANOARROW_OK; case 'm': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32); schema_view->type = NANOARROW_TYPE_TIME32; schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI; *format_end_out = format + 3; return NANOARROW_OK; case 'u': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); schema_view->type = NANOARROW_TYPE_TIME64; schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO; *format_end_out = format + 3; return NANOARROW_OK; case 'n': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); schema_view->type = NANOARROW_TYPE_TIME64; schema_view->time_unit = NANOARROW_TIME_UNIT_NANO; *format_end_out = format + 3; return NANOARROW_OK; default: ArrowErrorSet( error, "Expected 's', 'm', 'u', or 'n' following 'tt' but found '%s'", format + 2); return EINVAL; } // timestamp case 's': switch (format[2]) { case 's': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); schema_view->type = NANOARROW_TYPE_TIMESTAMP; schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND; break; case 'm': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); schema_view->type = NANOARROW_TYPE_TIMESTAMP; schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI; break; case 'u': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); schema_view->type = NANOARROW_TYPE_TIMESTAMP; schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO; break; case 'n': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); schema_view->type = NANOARROW_TYPE_TIMESTAMP; schema_view->time_unit = NANOARROW_TIME_UNIT_NANO; break; default: ArrowErrorSet( error, "Expected 's', 'm', 'u', or 'n' following 'ts' but found '%s'", format + 2); return EINVAL; } if (format[3] != ':') { ArrowErrorSet(error, "Expected ':' following '%.3s' but found '%s'", format, format + 3); return EINVAL; } schema_view->timezone = format + 4; *format_end_out = format + strlen(format); return NANOARROW_OK; // duration case 'D': switch (format[2]) { case 's': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); schema_view->type = NANOARROW_TYPE_DURATION; schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND; *format_end_out = format + 3; return NANOARROW_OK; case 'm': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); schema_view->type = NANOARROW_TYPE_DURATION; schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI; *format_end_out = format + 3; return NANOARROW_OK; case 'u': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); schema_view->type = NANOARROW_TYPE_DURATION; schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO; *format_end_out = format + 3; return NANOARROW_OK; case 'n': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); schema_view->type = NANOARROW_TYPE_DURATION; schema_view->time_unit = NANOARROW_TIME_UNIT_NANO; *format_end_out = format + 3; return NANOARROW_OK; default: ArrowErrorSet(error, "Expected 's', 'm', u', or 'n' following 'tD' but found '%s'", format + 2); return EINVAL; } // interval case 'i': switch (format[2]) { case 'M': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_MONTHS); *format_end_out = format + 3; return NANOARROW_OK; case 'D': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_DAY_TIME); *format_end_out = format + 3; return NANOARROW_OK; case 'n': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); *format_end_out = format + 3; return NANOARROW_OK; default: ArrowErrorSet(error, "Expected 'M', 'D', or 'n' following 'ti' but found '%s'", format + 2); return EINVAL; } default: ArrowErrorSet( error, "Expected 'd', 't', 's', 'D', or 'i' following 't' but found '%s'", format + 1); return EINVAL; } // view types case 'v': { switch (format[1]) { case 'u': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_STRING_VIEW); *format_end_out = format + 2; return NANOARROW_OK; case 'z': ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_BINARY_VIEW); *format_end_out = format + 2; return NANOARROW_OK; default: ArrowErrorSet(error, "Expected 'u', or 'z' following 'v' but found '%s'", format + 1); return EINVAL; } } default: ArrowErrorSet(error, "Unknown format: '%s'", format); return EINVAL; } } static ArrowErrorCode ArrowSchemaViewValidateNChildren( struct ArrowSchemaView* schema_view, int64_t n_children, struct ArrowError* error) { if (n_children != -1 && schema_view->schema->n_children != n_children) { ArrowErrorSet( error, "Expected schema with %" PRId64 " children but found %" PRId64 " children", n_children, schema_view->schema->n_children); return EINVAL; } // Don't do a full validation of children but do check that they won't // segfault if inspected struct ArrowSchema* child; for (int64_t i = 0; i < schema_view->schema->n_children; i++) { child = schema_view->schema->children[i]; if (child == NULL) { ArrowErrorSet( error, "Expected valid schema at schema->children[%" PRId64 "] but found NULL", i); return EINVAL; } else if (child->release == NULL) { ArrowErrorSet(error, "Expected valid schema at schema->children[%" PRId64 "] but found a released schema", i); return EINVAL; } } return NANOARROW_OK; } static ArrowErrorCode ArrowSchemaViewValidateUnion(struct ArrowSchemaView* schema_view, struct ArrowError* error) { return ArrowSchemaViewValidateNChildren(schema_view, -1, error); } static ArrowErrorCode ArrowSchemaViewValidateMap(struct ArrowSchemaView* schema_view, struct ArrowError* error) { NANOARROW_RETURN_NOT_OK(ArrowSchemaViewValidateNChildren(schema_view, 1, error)); if (schema_view->schema->children[0]->n_children != 2) { ArrowErrorSet(error, "Expected child of map type to have 2 children but found %" PRId64, schema_view->schema->children[0]->n_children); return EINVAL; } if (strcmp(schema_view->schema->children[0]->format, "+s") != 0) { ArrowErrorSet(error, "Expected format of child of map type to be '+s' but found '%s'", schema_view->schema->children[0]->format); return EINVAL; } if (schema_view->schema->children[0]->flags & ARROW_FLAG_NULLABLE) { ArrowErrorSet(error, "Expected child of map type to be non-nullable but was nullable"); return EINVAL; } if (schema_view->schema->children[0]->children[0]->flags & ARROW_FLAG_NULLABLE) { ArrowErrorSet(error, "Expected key of map type to be non-nullable but was nullable"); return EINVAL; } return NANOARROW_OK; } static ArrowErrorCode ArrowSchemaViewValidateDictionary( struct ArrowSchemaView* schema_view, struct ArrowError* error) { // check for valid index type switch (schema_view->storage_type) { case NANOARROW_TYPE_UINT8: case NANOARROW_TYPE_INT8: case NANOARROW_TYPE_UINT16: case NANOARROW_TYPE_INT16: case NANOARROW_TYPE_UINT32: case NANOARROW_TYPE_INT32: case NANOARROW_TYPE_UINT64: case NANOARROW_TYPE_INT64: break; default: ArrowErrorSet( error, "Expected dictionary schema index type to be an integral type but found '%s'", schema_view->schema->format); return EINVAL; } struct ArrowSchemaView dictionary_schema_view; return ArrowSchemaViewInit(&dictionary_schema_view, schema_view->schema->dictionary, error); } static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_view, enum ArrowType type, struct ArrowError* error) { switch (type) { case NANOARROW_TYPE_NA: case NANOARROW_TYPE_BOOL: case NANOARROW_TYPE_UINT8: case NANOARROW_TYPE_INT8: case NANOARROW_TYPE_UINT16: case NANOARROW_TYPE_INT16: case NANOARROW_TYPE_UINT32: case NANOARROW_TYPE_INT32: case NANOARROW_TYPE_UINT64: case NANOARROW_TYPE_INT64: case NANOARROW_TYPE_HALF_FLOAT: case NANOARROW_TYPE_FLOAT: case NANOARROW_TYPE_DOUBLE: case NANOARROW_TYPE_DECIMAL32: case NANOARROW_TYPE_DECIMAL64: case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_DECIMAL256: case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_LARGE_STRING: case NANOARROW_TYPE_BINARY: case NANOARROW_TYPE_LARGE_BINARY: case NANOARROW_TYPE_DATE32: case NANOARROW_TYPE_DATE64: case NANOARROW_TYPE_INTERVAL_MONTHS: case NANOARROW_TYPE_INTERVAL_DAY_TIME: case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: case NANOARROW_TYPE_TIMESTAMP: case NANOARROW_TYPE_TIME32: case NANOARROW_TYPE_TIME64: case NANOARROW_TYPE_DURATION: case NANOARROW_TYPE_BINARY_VIEW: case NANOARROW_TYPE_STRING_VIEW: return ArrowSchemaViewValidateNChildren(schema_view, 0, error); case NANOARROW_TYPE_FIXED_SIZE_BINARY: if (schema_view->fixed_size <= 0) { ArrowErrorSet(error, "Expected size > 0 for fixed size binary but found size %d", schema_view->fixed_size); return EINVAL; } return ArrowSchemaViewValidateNChildren(schema_view, 0, error); case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_LIST_VIEW: case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_LARGE_LIST_VIEW: case NANOARROW_TYPE_FIXED_SIZE_LIST: return ArrowSchemaViewValidateNChildren(schema_view, 1, error); case NANOARROW_TYPE_RUN_END_ENCODED: return ArrowSchemaViewValidateNChildren(schema_view, 2, error); case NANOARROW_TYPE_STRUCT: return ArrowSchemaViewValidateNChildren(schema_view, -1, error); case NANOARROW_TYPE_SPARSE_UNION: case NANOARROW_TYPE_DENSE_UNION: return ArrowSchemaViewValidateUnion(schema_view, error); case NANOARROW_TYPE_MAP: return ArrowSchemaViewValidateMap(schema_view, error); case NANOARROW_TYPE_DICTIONARY: return ArrowSchemaViewValidateDictionary(schema_view, error); default: ArrowErrorSet(error, "Expected a valid enum ArrowType value but found %d", schema_view->type); return EINVAL; } return NANOARROW_OK; } ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, const struct ArrowSchema* schema, struct ArrowError* error) { if (schema == NULL) { ArrowErrorSet(error, "Expected non-NULL schema"); return EINVAL; } if (schema->release == NULL) { ArrowErrorSet(error, "Expected non-released schema"); return EINVAL; } schema_view->schema = schema; const char* format = schema->format; if (format == NULL) { ArrowErrorSet( error, "Error parsing schema->format: Expected a null-terminated string but found NULL"); return EINVAL; } size_t format_len = strlen(format); if (format_len == 0) { ArrowErrorSet(error, "Error parsing schema->format: Expected a string with size > 0"); return EINVAL; } const char* format_end_out; int result = ArrowSchemaViewParse(schema_view, format, &format_end_out, error); if (result != NANOARROW_OK) { if (error != NULL) { char child_error[1024]; memcpy(child_error, ArrowErrorMessage(error), 1024); ArrowErrorSet(error, "Error parsing schema->format: %s", child_error); } return result; } if ((format + format_len) != format_end_out) { ArrowErrorSet(error, "Error parsing schema->format '%s': parsed %d/%zu characters", format, (int)(format_end_out - format), format_len); return EINVAL; } if (schema->dictionary != NULL) { schema_view->type = NANOARROW_TYPE_DICTIONARY; } NANOARROW_RETURN_NOT_OK( ArrowSchemaViewValidate(schema_view, schema_view->storage_type, error)); if (schema_view->storage_type != schema_view->type) { NANOARROW_RETURN_NOT_OK( ArrowSchemaViewValidate(schema_view, schema_view->type, error)); } int64_t unknown_flags = schema->flags & ~NANOARROW_FLAG_ALL_SUPPORTED; if (unknown_flags != 0) { ArrowErrorSet(error, "Unknown ArrowSchema flag"); return EINVAL; } if (schema->flags & ARROW_FLAG_DICTIONARY_ORDERED && schema_view->type != NANOARROW_TYPE_DICTIONARY) { ArrowErrorSet(error, "ARROW_FLAG_DICTIONARY_ORDERED is only relevant for dictionaries"); return EINVAL; } if (schema->flags & ARROW_FLAG_MAP_KEYS_SORTED && schema_view->type != NANOARROW_TYPE_MAP) { ArrowErrorSet(error, "ARROW_FLAG_MAP_KEYS_SORTED is only relevant for a map type"); return EINVAL; } ArrowLayoutInit(&schema_view->layout, schema_view->storage_type); if (schema_view->storage_type == NANOARROW_TYPE_FIXED_SIZE_BINARY) { schema_view->layout.element_size_bits[1] = (int64_t)schema_view->fixed_size * 8; } else if (schema_view->storage_type == NANOARROW_TYPE_FIXED_SIZE_LIST) { schema_view->layout.child_size_elements = schema_view->fixed_size; } schema_view->extension_name = ArrowCharView(NULL); schema_view->extension_metadata = ArrowCharView(NULL); NANOARROW_RETURN_NOT_OK(ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:name"), &schema_view->extension_name)); NANOARROW_RETURN_NOT_OK(ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:metadata"), &schema_view->extension_metadata)); return NANOARROW_OK; } static int64_t ArrowSchemaTypeToStringInternal(struct ArrowSchemaView* schema_view, char* out, int64_t n) { const char* type_string = ArrowTypeString(schema_view->type); switch (schema_view->type) { case NANOARROW_TYPE_DECIMAL32: case NANOARROW_TYPE_DECIMAL64: case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_DECIMAL256: return snprintf(out, n, "%s(%" PRId32 ", %" PRId32 ")", type_string, schema_view->decimal_precision, schema_view->decimal_scale); case NANOARROW_TYPE_TIMESTAMP: return snprintf(out, n, "%s('%s', '%s')", type_string, ArrowTimeUnitString(schema_view->time_unit), schema_view->timezone); case NANOARROW_TYPE_TIME32: case NANOARROW_TYPE_TIME64: case NANOARROW_TYPE_DURATION: return snprintf(out, n, "%s('%s')", type_string, ArrowTimeUnitString(schema_view->time_unit)); case NANOARROW_TYPE_FIXED_SIZE_BINARY: case NANOARROW_TYPE_FIXED_SIZE_LIST: return snprintf(out, n, "%s(%" PRId32 ")", type_string, schema_view->fixed_size); case NANOARROW_TYPE_SPARSE_UNION: case NANOARROW_TYPE_DENSE_UNION: return snprintf(out, n, "%s([%s])", type_string, schema_view->union_type_ids); default: return snprintf(out, n, "%s", type_string); } } // Helper for bookkeeping to emulate sprintf()-like behaviour spread // among multiple sprintf calls. static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last, int64_t* n_remaining, int64_t* n_chars) { // In the unlikely snprintf() returning a negative value (encoding error), // ensure the result won't cause an out-of-bounds access. if (n_chars_last < 0) { n_chars_last = 0; } *n_chars += n_chars_last; *n_remaining -= n_chars_last; // n_remaining is never less than 0 if (*n_remaining < 0) { *n_remaining = 0; } // Can't do math on a NULL pointer if (*out != NULL) { *out += n_chars_last; } } int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t n, char recursive) { if (schema == NULL) { return snprintf(out, n, "[invalid: pointer is null]"); } if (schema->release == NULL) { return snprintf(out, n, "[invalid: schema is released]"); } struct ArrowSchemaView schema_view; struct ArrowError error; if (ArrowSchemaViewInit(&schema_view, schema, &error) != NANOARROW_OK) { return snprintf(out, n, "[invalid: %s]", ArrowErrorMessage(&error)); } // Extension type and dictionary should include both the top-level type // and the storage type. int is_extension = schema_view.extension_name.size_bytes > 0; int is_dictionary = schema->dictionary != NULL; int64_t n_chars = 0; int64_t n_chars_last = 0; // Uncommon but not technically impossible that both are true if (is_extension && is_dictionary) { n_chars_last = snprintf( out, n, "%.*s{dictionary(%s)<", (int)schema_view.extension_name.size_bytes, schema_view.extension_name.data, ArrowTypeString(schema_view.storage_type)); } else if (is_extension) { n_chars_last = snprintf(out, n, "%.*s{", (int)schema_view.extension_name.size_bytes, schema_view.extension_name.data); } else if (is_dictionary) { n_chars_last = snprintf(out, n, "dictionary(%s)<", ArrowTypeString(schema_view.storage_type)); } ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); if (!is_dictionary) { n_chars_last = ArrowSchemaTypeToStringInternal(&schema_view, out, n); } else { n_chars_last = ArrowSchemaToString(schema->dictionary, out, n, recursive); } ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); if (recursive && schema->format[0] == '+') { n_chars_last = snprintf(out, n, "<"); ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); for (int64_t i = 0; i < schema->n_children; i++) { if (i > 0) { n_chars_last = snprintf(out, n, ", "); ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); } // ArrowSchemaToStringInternal() will validate the child and print the error, // but we need the name first if (schema->children[i] != NULL && schema->children[i]->release != NULL && schema->children[i]->name != NULL) { n_chars_last = snprintf(out, n, "%s: ", schema->children[i]->name); ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); } n_chars_last = ArrowSchemaToString(schema->children[i], out, n, recursive); ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); } n_chars_last = snprintf(out, n, ">"); ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); } if (is_extension && is_dictionary) { n_chars += snprintf(out, n, ">}"); } else if (is_extension) { n_chars += snprintf(out, n, "}"); } else if (is_dictionary) { n_chars += snprintf(out, n, ">"); } // Ensure that we always return a positive result if (n_chars > 0) { return n_chars; } else { return 0; } } ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader, const char* metadata) { reader->metadata = metadata; if (reader->metadata == NULL) { reader->offset = 0; reader->remaining_keys = 0; } else { memcpy(&reader->remaining_keys, reader->metadata, sizeof(int32_t)); reader->offset = sizeof(int32_t); } return NANOARROW_OK; } ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader, struct ArrowStringView* key_out, struct ArrowStringView* value_out) { if (reader->remaining_keys <= 0) { return EINVAL; } int64_t pos = 0; int32_t key_size; memcpy(&key_size, reader->metadata + reader->offset + pos, sizeof(int32_t)); pos += sizeof(int32_t); key_out->data = reader->metadata + reader->offset + pos; key_out->size_bytes = key_size; pos += key_size; int32_t value_size; memcpy(&value_size, reader->metadata + reader->offset + pos, sizeof(int32_t)); pos += sizeof(int32_t); value_out->data = reader->metadata + reader->offset + pos; value_out->size_bytes = value_size; pos += value_size; reader->offset += pos; reader->remaining_keys--; return NANOARROW_OK; } int64_t ArrowMetadataSizeOf(const char* metadata) { if (metadata == NULL) { return 0; } struct ArrowMetadataReader reader; struct ArrowStringView key; struct ArrowStringView value; if (ArrowMetadataReaderInit(&reader, metadata) != NANOARROW_OK) { return 0; } int64_t size = sizeof(int32_t); while (ArrowMetadataReaderRead(&reader, &key, &value) == NANOARROW_OK) { size += sizeof(int32_t) + key.size_bytes + sizeof(int32_t) + value.size_bytes; } return size; } static ArrowErrorCode ArrowMetadataGetValueInternal(const char* metadata, struct ArrowStringView* key, struct ArrowStringView* value_out) { struct ArrowMetadataReader reader; struct ArrowStringView existing_key; struct ArrowStringView existing_value; NANOARROW_RETURN_NOT_OK(ArrowMetadataReaderInit(&reader, metadata)); while (ArrowMetadataReaderRead(&reader, &existing_key, &existing_value) == NANOARROW_OK) { int key_equal = key->size_bytes == existing_key.size_bytes && strncmp(key->data, existing_key.data, existing_key.size_bytes) == 0; if (key_equal) { value_out->data = existing_value.data; value_out->size_bytes = existing_value.size_bytes; break; } } return NANOARROW_OK; } ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key, struct ArrowStringView* value_out) { if (value_out == NULL) { return EINVAL; } return ArrowMetadataGetValueInternal(metadata, &key, value_out); } char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key) { struct ArrowStringView value = ArrowCharView(NULL); if (ArrowMetadataGetValue(metadata, key, &value) != NANOARROW_OK) { return 0; } return value.data != NULL; } ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer, const char* metadata) { ArrowBufferInit(buffer); return ArrowBufferAppend(buffer, metadata, ArrowMetadataSizeOf(metadata)); } static ArrowErrorCode ArrowMetadataBuilderAppendInternal(struct ArrowBuffer* buffer, struct ArrowStringView* key, struct ArrowStringView* value) { if (value == NULL) { return NANOARROW_OK; } if (buffer->capacity_bytes == 0) { NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(buffer, 0)); } if (((size_t)buffer->capacity_bytes) < sizeof(int32_t)) { return EINVAL; } int32_t n_keys; memcpy(&n_keys, buffer->data, sizeof(int32_t)); int32_t key_size = (int32_t)key->size_bytes; int32_t value_size = (int32_t)value->size_bytes; NANOARROW_RETURN_NOT_OK(ArrowBufferReserve( buffer, sizeof(int32_t) + key_size + sizeof(int32_t) + value_size)); ArrowBufferAppendUnsafe(buffer, &key_size, sizeof(int32_t)); ArrowBufferAppendUnsafe(buffer, key->data, key_size); ArrowBufferAppendUnsafe(buffer, &value_size, sizeof(int32_t)); ArrowBufferAppendUnsafe(buffer, value->data, value_size); n_keys++; memcpy(buffer->data, &n_keys, sizeof(int32_t)); return NANOARROW_OK; } static ArrowErrorCode ArrowMetadataBuilderSetInternal(struct ArrowBuffer* buffer, struct ArrowStringView* key, struct ArrowStringView* value) { // Inspect the current value to see if we can avoid copying the buffer struct ArrowStringView current_value = ArrowCharView(NULL); NANOARROW_RETURN_NOT_OK( ArrowMetadataGetValueInternal((const char*)buffer->data, key, ¤t_value)); // The key should be removed but no key exists if (value == NULL && current_value.data == NULL) { return NANOARROW_OK; } // The key/value can be appended because no key exists if (value != NULL && current_value.data == NULL) { return ArrowMetadataBuilderAppendInternal(buffer, key, value); } struct ArrowMetadataReader reader; struct ArrowStringView existing_key; struct ArrowStringView existing_value; NANOARROW_RETURN_NOT_OK(ArrowMetadataReaderInit(&reader, (const char*)buffer->data)); struct ArrowBuffer new_buffer; NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderInit(&new_buffer, NULL)); while (reader.remaining_keys > 0) { int result = ArrowMetadataReaderRead(&reader, &existing_key, &existing_value); if (result != NANOARROW_OK) { ArrowBufferReset(&new_buffer); return result; } if (key->size_bytes == existing_key.size_bytes && strncmp((const char*)key->data, (const char*)existing_key.data, existing_key.size_bytes) == 0) { result = ArrowMetadataBuilderAppendInternal(&new_buffer, key, value); value = NULL; } else { result = ArrowMetadataBuilderAppendInternal(&new_buffer, &existing_key, &existing_value); } if (result != NANOARROW_OK) { ArrowBufferReset(&new_buffer); return result; } } ArrowBufferReset(buffer); ArrowBufferMove(&new_buffer, buffer); return NANOARROW_OK; } ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer, struct ArrowStringView key, struct ArrowStringView value) { return ArrowMetadataBuilderAppendInternal(buffer, &key, &value); } ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer, struct ArrowStringView key, struct ArrowStringView value) { return ArrowMetadataBuilderSetInternal(buffer, &key, &value); } ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, struct ArrowStringView key) { return ArrowMetadataBuilderSetInternal(buffer, &key, NULL); } // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include #include #include #include #include #include #include #include "nanoarrow/nanoarrow.h" static void ArrowArrayReleaseInternal(struct ArrowArray* array) { // Release buffers held by this array struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; if (private_data != NULL) { ArrowBitmapReset(&private_data->bitmap); ArrowBufferReset(&private_data->buffers[0]); ArrowBufferReset(&private_data->buffers[1]); ArrowFree(private_data->buffer_data); for (int32_t i = 0; i < private_data->n_variadic_buffers; ++i) { ArrowBufferReset(&private_data->variadic_buffers[i]); } ArrowFree(private_data->variadic_buffers); ArrowFree(private_data); } // This object owns the memory for all the children, but those // children may have been generated elsewhere and might have // their own release() callback. if (array->children != NULL) { for (int64_t i = 0; i < array->n_children; i++) { if (array->children[i] != NULL) { if (array->children[i]->release != NULL) { ArrowArrayRelease(array->children[i]); } ArrowFree(array->children[i]); } } ArrowFree(array->children); } // This object owns the memory for the dictionary but it // may have been generated somewhere else and have its own // release() callback. if (array->dictionary != NULL) { if (array->dictionary->release != NULL) { ArrowArrayRelease(array->dictionary); } ArrowFree(array->dictionary); } // Mark released array->release = NULL; } static int ArrowArrayIsInternal(struct ArrowArray* array) { return array->release == &ArrowArrayReleaseInternal; } static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, enum ArrowType storage_type) { switch (storage_type) { case NANOARROW_TYPE_UNINITIALIZED: case NANOARROW_TYPE_NA: case NANOARROW_TYPE_RUN_END_ENCODED: array->n_buffers = 0; break; case NANOARROW_TYPE_FIXED_SIZE_LIST: case NANOARROW_TYPE_STRUCT: case NANOARROW_TYPE_SPARSE_UNION: array->n_buffers = 1; break; case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_MAP: case NANOARROW_TYPE_BOOL: case NANOARROW_TYPE_UINT8: case NANOARROW_TYPE_INT8: case NANOARROW_TYPE_UINT16: case NANOARROW_TYPE_INT16: case NANOARROW_TYPE_UINT32: case NANOARROW_TYPE_INT32: case NANOARROW_TYPE_UINT64: case NANOARROW_TYPE_INT64: case NANOARROW_TYPE_HALF_FLOAT: case NANOARROW_TYPE_FLOAT: case NANOARROW_TYPE_DOUBLE: case NANOARROW_TYPE_DECIMAL32: case NANOARROW_TYPE_DECIMAL64: case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_DECIMAL256: case NANOARROW_TYPE_INTERVAL_MONTHS: case NANOARROW_TYPE_INTERVAL_DAY_TIME: case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: case NANOARROW_TYPE_FIXED_SIZE_BINARY: case NANOARROW_TYPE_DENSE_UNION: array->n_buffers = 2; break; case NANOARROW_TYPE_BINARY_VIEW: case NANOARROW_TYPE_STRING_VIEW: array->n_buffers = NANOARROW_BINARY_VIEW_FIXED_BUFFERS + 1; break; case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_LARGE_STRING: case NANOARROW_TYPE_BINARY: case NANOARROW_TYPE_LARGE_BINARY: case NANOARROW_TYPE_LIST_VIEW: case NANOARROW_TYPE_LARGE_LIST_VIEW: array->n_buffers = 3; break; default: return EINVAL; return NANOARROW_OK; } struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; private_data->storage_type = storage_type; return NANOARROW_OK; } ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, enum ArrowType storage_type) { array->length = 0; array->null_count = 0; array->offset = 0; array->n_buffers = 0; array->n_children = 0; array->buffers = NULL; array->children = NULL; array->dictionary = NULL; array->release = &ArrowArrayReleaseInternal; array->private_data = NULL; struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)ArrowMalloc(sizeof(struct ArrowArrayPrivateData)); if (private_data == NULL) { array->release = NULL; return ENOMEM; } ArrowBitmapInit(&private_data->bitmap); ArrowBufferInit(&private_data->buffers[0]); ArrowBufferInit(&private_data->buffers[1]); private_data->buffer_data = (const void**)ArrowMalloc(sizeof(void*) * NANOARROW_MAX_FIXED_BUFFERS); for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; ++i) { private_data->buffer_data[i] = NULL; } private_data->n_variadic_buffers = 0; private_data->variadic_buffers = NULL; private_data->list_view_offset = 0; array->private_data = private_data; array->buffers = (const void**)(private_data->buffer_data); // These are not technically "storage" in the sense that they do not appear // in the ArrowSchemaView's storage_type member; however, allowing them here // is helpful to maximize the number of types that can avoid going through // ArrowArrayInitFromSchema(). switch (storage_type) { case NANOARROW_TYPE_DURATION: case NANOARROW_TYPE_TIMESTAMP: case NANOARROW_TYPE_TIME64: case NANOARROW_TYPE_DATE64: storage_type = NANOARROW_TYPE_INT64; break; case NANOARROW_TYPE_TIME32: case NANOARROW_TYPE_DATE32: storage_type = NANOARROW_TYPE_INT32; break; default: break; } int result = ArrowArraySetStorageType(array, storage_type); if (result != NANOARROW_OK) { ArrowArrayRelease(array); return result; } ArrowLayoutInit(&private_data->layout, storage_type); // We can only know this not to be true when initializing based on a schema // so assume this to be true. private_data->union_type_id_is_child_index = 1; return NANOARROW_OK; } ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, const struct ArrowArrayView* array_view, struct ArrowError* error) { NANOARROW_RETURN_NOT_OK_WITH_ERROR( ArrowArrayInitFromType(array, array_view->storage_type), error); int result; struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; private_data->layout = array_view->layout; if (array_view->n_children > 0) { result = ArrowArrayAllocateChildren(array, array_view->n_children); if (result != NANOARROW_OK) { ArrowArrayRelease(array); return result; } for (int64_t i = 0; i < array_view->n_children; i++) { result = ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error); if (result != NANOARROW_OK) { ArrowArrayRelease(array); return result; } } } if (array_view->dictionary != NULL) { result = ArrowArrayAllocateDictionary(array); if (result != NANOARROW_OK) { ArrowArrayRelease(array); return result; } result = ArrowArrayInitFromArrayView(array->dictionary, array_view->dictionary, error); if (result != NANOARROW_OK) { ArrowArrayRelease(array); return result; } } return NANOARROW_OK; } ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array, const struct ArrowSchema* schema, struct ArrowError* error) { struct ArrowArrayView array_view; NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(&array_view, schema, error)); NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromArrayView(array, &array_view, error)); if (array_view.storage_type == NANOARROW_TYPE_DENSE_UNION || array_view.storage_type == NANOARROW_TYPE_SPARSE_UNION) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; // We can still build arrays if this isn't true; however, the append // functions won't work. Instead, we store this value and error only // when StartAppending is called. private_data->union_type_id_is_child_index = _ArrowUnionTypeIdsWillEqualChildIndices(schema->format + 4, schema->n_children); } ArrowArrayViewReset(&array_view); return NANOARROW_OK; } ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children) { if (array->children != NULL) { return EINVAL; } if (n_children == 0) { return NANOARROW_OK; } array->children = (struct ArrowArray**)ArrowMalloc(n_children * sizeof(struct ArrowArray*)); if (array->children == NULL) { return ENOMEM; } memset(array->children, 0, n_children * sizeof(struct ArrowArray*)); for (int64_t i = 0; i < n_children; i++) { array->children[i] = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray)); if (array->children[i] == NULL) { return ENOMEM; } array->children[i]->release = NULL; } array->n_children = n_children; return NANOARROW_OK; } ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array) { if (array->dictionary != NULL) { return EINVAL; } array->dictionary = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray)); if (array->dictionary == NULL) { return ENOMEM; } array->dictionary->release = NULL; return NANOARROW_OK; } void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; ArrowBufferMove(&bitmap->buffer, &private_data->bitmap.buffer); private_data->bitmap.size_bits = bitmap->size_bits; bitmap->size_bits = 0; private_data->buffer_data[0] = private_data->bitmap.buffer.data; array->null_count = -1; } ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i, struct ArrowBuffer* buffer) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; if (i >= array->n_buffers || i < 0) { return EINVAL; } // Find the `i`th buffer, release what is currently there, and move the // supplied buffer into that slot. struct ArrowBuffer* dst = ArrowArrayBuffer(array, i); ArrowBufferReset(dst); ArrowBufferMove(buffer, dst); // Flush the pointer into array->buffers. In theory clients should call // ArrowArrayFinishBuilding() to flush the pointer values before passing // this array elsewhere; however, in early nanoarrow versions this was not // needed and some code may depend on this being true. private_data->buffer_data[i] = dst->data; array->buffers = private_data->buffer_data; return NANOARROW_OK; } static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView* array_view, struct ArrowArray* array, struct ArrowError* error) { if (!ArrowArrayIsInternal(array)) { ArrowErrorSet(error, "Can't initialize internal ArrowArrayView from external ArrowArray"); return EINVAL; } struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; ArrowArrayViewInitFromType(array_view, private_data->storage_type); array_view->layout = private_data->layout; array_view->array = array; array_view->length = array->length; array_view->offset = array->offset; array_view->null_count = array->null_count; array_view->buffer_views[0].data.as_uint8 = private_data->bitmap.buffer.data; array_view->buffer_views[0].size_bytes = private_data->bitmap.buffer.size_bytes; array_view->buffer_views[1].data.as_uint8 = private_data->buffers[0].data; array_view->buffer_views[1].size_bytes = private_data->buffers[0].size_bytes; array_view->buffer_views[2].data.as_uint8 = private_data->buffers[1].data; array_view->buffer_views[2].size_bytes = private_data->buffers[1].size_bytes; int result = ArrowArrayViewAllocateChildren(array_view, array->n_children); if (result != NANOARROW_OK) { ArrowArrayViewReset(array_view); return result; } for (int64_t i = 0; i < array->n_children; i++) { result = ArrowArrayViewInitFromArray(array_view->children[i], array->children[i], error); if (result != NANOARROW_OK) { ArrowArrayViewReset(array_view); return result; } } if (array->dictionary != NULL) { result = ArrowArrayViewAllocateDictionary(array_view); if (result != NANOARROW_OK) { ArrowArrayViewReset(array_view); return result; } result = ArrowArrayViewInitFromArray(array_view->dictionary, array->dictionary, error); if (result != NANOARROW_OK) { ArrowArrayViewReset(array_view); return result; } } return NANOARROW_OK; } static ArrowErrorCode ArrowArrayReserveInternal(struct ArrowArray* array, struct ArrowArrayView* array_view) { // Loop through buffers and reserve the extra space that we know about for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { // Don't reserve on a validity buffer that hasn't been allocated yet if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY && ArrowArrayBuffer(array, i)->data == NULL) { continue; } int64_t additional_size_bytes = array_view->buffer_views[i].size_bytes - ArrowArrayBuffer(array, i)->size_bytes; if (additional_size_bytes > 0) { NANOARROW_RETURN_NOT_OK( ArrowBufferReserve(ArrowArrayBuffer(array, i), additional_size_bytes)); } } // Recursively reserve children for (int64_t i = 0; i < array->n_children; i++) { NANOARROW_RETURN_NOT_OK( ArrowArrayReserveInternal(array->children[i], array_view->children[i])); } return NANOARROW_OK; } ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array, int64_t additional_size_elements) { struct ArrowArrayView array_view; NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array, NULL)); // Calculate theoretical buffer sizes (recursively) ArrowArrayViewSetLength(&array_view, array->length + additional_size_elements); // Walk the structure (recursively) int result = ArrowArrayReserveInternal(array, &array_view); ArrowArrayViewReset(&array_view); if (result != NANOARROW_OK) { return result; } return NANOARROW_OK; } static ArrowErrorCode ArrowArrayFinalizeBuffers(struct ArrowArray* array) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY || private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) { continue; } struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i); if (buffer->data == NULL) { NANOARROW_RETURN_NOT_OK((ArrowBufferReserve(buffer, 1))); } } for (int64_t i = 0; i < array->n_children; i++) { if (ArrowArrayIsInternal(array->children[i])) { NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->children[i])); } } if (array->dictionary != NULL && ArrowArrayIsInternal(array->dictionary)) { NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->dictionary)); } return NANOARROW_OK; } static ArrowErrorCode ArrowArrayFlushInternalPointers(struct ArrowArray* array) { NANOARROW_DCHECK(ArrowArrayIsInternal(array)); struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; if (array->n_buffers > NANOARROW_MAX_FIXED_BUFFERS) { // If the variadic sizes buffer was not set and there is at least one variadic // buffer, populate it now (if there are no variadic buffers there will be exactly // three total buffers and we don't need to do anything special here). Notably, this // will occur when building a BinaryView/StringView array by element using the // appender. struct ArrowBuffer* sizes_buffer = ArrowArrayBuffer(array, array->n_buffers - 1); if (sizes_buffer->data == NULL && sizes_buffer->size_bytes == 0) { NANOARROW_RETURN_NOT_OK( ArrowBufferReserve(sizes_buffer, private_data->n_variadic_buffers)); for (int64_t i = 0; i < private_data->n_variadic_buffers; i++) { struct ArrowBuffer* variadic_buffer = ArrowArrayBuffer(array, i + NANOARROW_BINARY_VIEW_FIXED_BUFFERS); NANOARROW_RETURN_NOT_OK( ArrowBufferAppendInt64(sizes_buffer, variadic_buffer->size_bytes)); } } } for (int32_t i = 0; i < array->n_buffers; i++) { private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data; } array->buffers = (const void**)(private_data->buffer_data); // Flush internal pointers for child/dictionary arrays if we allocated them. Clients // building arrays by buffer might have moved arrays from some other source (e.g., // to create a record batch) and calling this function in that case will cause a crash. for (int64_t i = 0; i < array->n_children; i++) { if (ArrowArrayIsInternal(array->children[i])) { NANOARROW_RETURN_NOT_OK(ArrowArrayFlushInternalPointers(array->children[i])); } } if (array->dictionary != NULL && ArrowArrayIsInternal(array->dictionary)) { NANOARROW_RETURN_NOT_OK(ArrowArrayFlushInternalPointers(array->dictionary)); } return NANOARROW_OK; } ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array, enum ArrowValidationLevel validation_level, struct ArrowError* error) { // Even if the data buffer is size zero, the pointer value needed to be non-null // in some implementations (at least one version of Arrow C++ at the time this // was added and C# as later discovered). Only do this fix if we can assume // CPU data access. if (validation_level >= NANOARROW_VALIDATION_LEVEL_DEFAULT) { NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayFinalizeBuffers(array), error); } // Make sure the value we get with array->buffers[i] is set to the actual // pointer (which may have changed from the original due to reallocation) NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayFlushInternalPointers(array), error); if (validation_level == NANOARROW_VALIDATION_LEVEL_NONE) { return NANOARROW_OK; } // For validation, initialize an ArrowArrayView with our known buffer sizes struct ArrowArrayView array_view; NANOARROW_RETURN_NOT_OK_WITH_ERROR( ArrowArrayViewInitFromArray(&array_view, array, error), error); int result = ArrowArrayViewValidate(&array_view, validation_level, error); ArrowArrayViewReset(&array_view); return result; } ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array, struct ArrowError* error) { return ArrowArrayFinishBuilding(array, NANOARROW_VALIDATION_LEVEL_DEFAULT, error); } void ArrowArrayViewInitFromType(struct ArrowArrayView* array_view, enum ArrowType storage_type) { memset(array_view, 0, sizeof(struct ArrowArrayView)); array_view->storage_type = storage_type; ArrowLayoutInit(&array_view->layout, storage_type); } ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view, int64_t n_children) { if (array_view->children != NULL) { return EINVAL; } if (n_children == 0) { array_view->n_children = 0; return NANOARROW_OK; } array_view->children = (struct ArrowArrayView**)ArrowMalloc(n_children * sizeof(struct ArrowArrayView*)); if (array_view->children == NULL) { return ENOMEM; } for (int64_t i = 0; i < n_children; i++) { array_view->children[i] = NULL; } array_view->n_children = n_children; for (int64_t i = 0; i < n_children; i++) { array_view->children[i] = (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView)); if (array_view->children[i] == NULL) { return ENOMEM; } ArrowArrayViewInitFromType(array_view->children[i], NANOARROW_TYPE_UNINITIALIZED); } return NANOARROW_OK; } ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view) { if (array_view->dictionary != NULL) { return EINVAL; } array_view->dictionary = (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView)); if (array_view->dictionary == NULL) { return ENOMEM; } ArrowArrayViewInitFromType(array_view->dictionary, NANOARROW_TYPE_UNINITIALIZED); return NANOARROW_OK; } ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view, const struct ArrowSchema* schema, struct ArrowError* error) { struct ArrowSchemaView schema_view; int result = ArrowSchemaViewInit(&schema_view, schema, error); if (result != NANOARROW_OK) { return result; } ArrowArrayViewInitFromType(array_view, schema_view.storage_type); array_view->layout = schema_view.layout; result = ArrowArrayViewAllocateChildren(array_view, schema->n_children); if (result != NANOARROW_OK) { ArrowErrorSet(error, "ArrowArrayViewAllocateChildren() failed"); ArrowArrayViewReset(array_view); return result; } for (int64_t i = 0; i < schema->n_children; i++) { result = ArrowArrayViewInitFromSchema(array_view->children[i], schema->children[i], error); if (result != NANOARROW_OK) { ArrowArrayViewReset(array_view); return result; } } if (schema->dictionary != NULL) { result = ArrowArrayViewAllocateDictionary(array_view); if (result != NANOARROW_OK) { ArrowArrayViewReset(array_view); return result; } result = ArrowArrayViewInitFromSchema(array_view->dictionary, schema->dictionary, error); if (result != NANOARROW_OK) { ArrowArrayViewReset(array_view); return result; } } if (array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION || array_view->storage_type == NANOARROW_TYPE_DENSE_UNION) { array_view->union_type_id_map = (int8_t*)ArrowMalloc(256 * sizeof(int8_t)); if (array_view->union_type_id_map == NULL) { return ENOMEM; } memset(array_view->union_type_id_map, -1, 256); int32_t n_type_ids = _ArrowParseUnionTypeIds(schema_view.union_type_ids, array_view->union_type_id_map + 128); for (int8_t child_index = 0; child_index < n_type_ids; child_index++) { int8_t type_id = array_view->union_type_id_map[128 + child_index]; array_view->union_type_id_map[type_id] = child_index; } } return NANOARROW_OK; } void ArrowArrayViewReset(struct ArrowArrayView* array_view) { if (array_view->children != NULL) { for (int64_t i = 0; i < array_view->n_children; i++) { if (array_view->children[i] != NULL) { ArrowArrayViewReset(array_view->children[i]); ArrowFree(array_view->children[i]); } } ArrowFree(array_view->children); } if (array_view->dictionary != NULL) { ArrowArrayViewReset(array_view->dictionary); ArrowFree(array_view->dictionary); } if (array_view->union_type_id_map != NULL) { ArrowFree(array_view->union_type_id_map); } ArrowArrayViewInitFromType(array_view, NANOARROW_TYPE_UNINITIALIZED); } void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) { for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8; switch (array_view->layout.buffer_type[i]) { case NANOARROW_BUFFER_TYPE_VALIDITY: array_view->buffer_views[i].size_bytes = _ArrowBytesForBits(length); continue; case NANOARROW_BUFFER_TYPE_DATA_OFFSET: // Probably don't want/need to rely on the producer to have allocated an // offsets buffer of length 1 for a zero-size array array_view->buffer_views[i].size_bytes = (length != 0) * element_size_bytes * (length + 1); continue; case NANOARROW_BUFFER_TYPE_DATA: array_view->buffer_views[i].size_bytes = _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * length) / 8; continue; case NANOARROW_BUFFER_TYPE_TYPE_ID: case NANOARROW_BUFFER_TYPE_UNION_OFFSET: case NANOARROW_BUFFER_TYPE_VIEW_OFFSET: case NANOARROW_BUFFER_TYPE_SIZE: array_view->buffer_views[i].size_bytes = element_size_bytes * length; continue; case NANOARROW_BUFFER_TYPE_VARIADIC_DATA: case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE: case NANOARROW_BUFFER_TYPE_NONE: array_view->buffer_views[i].size_bytes = 0; continue; } } switch (array_view->storage_type) { case NANOARROW_TYPE_STRUCT: case NANOARROW_TYPE_SPARSE_UNION: for (int64_t i = 0; i < array_view->n_children; i++) { ArrowArrayViewSetLength(array_view->children[i], length); } break; case NANOARROW_TYPE_FIXED_SIZE_LIST: if (array_view->n_children >= 1) { ArrowArrayViewSetLength(array_view->children[0], length * array_view->layout.child_size_elements); } default: break; } } // This version recursively extracts information from the array and stores it // in the array view, performing any checks that require the original array. static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view, const struct ArrowArray* array, struct ArrowError* error) { array_view->array = array; array_view->offset = array->offset; array_view->length = array->length; array_view->null_count = array->null_count; array_view->variadic_buffer_sizes = NULL; array_view->variadic_buffers = NULL; array_view->n_variadic_buffers = 0; int64_t buffers_required = 0; const int nfixed_buf = array_view->storage_type == NANOARROW_TYPE_STRING_VIEW || array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW ? NANOARROW_BINARY_VIEW_FIXED_BUFFERS : NANOARROW_MAX_FIXED_BUFFERS; for (int i = 0; i < nfixed_buf; i++) { if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) { break; } buffers_required++; // Set buffer pointer array_view->buffer_views[i].data.data = array->buffers[i]; // If non-null, set buffer size to unknown. if (array->buffers[i] == NULL) { array_view->buffer_views[i].size_bytes = 0; } else { array_view->buffer_views[i].size_bytes = -1; } } if (array_view->storage_type == NANOARROW_TYPE_STRING_VIEW || array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW) { const int64_t n_buffers = array->n_buffers; const int32_t nfixed_buf = NANOARROW_BINARY_VIEW_FIXED_BUFFERS; const int32_t nvariadic_buf = (int32_t)(n_buffers - nfixed_buf - 1); array_view->n_variadic_buffers = nvariadic_buf; buffers_required += nvariadic_buf + 1; array_view->variadic_buffers = array->buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS; array_view->variadic_buffer_sizes = (int64_t*)array->buffers[n_buffers - 1]; } if (buffers_required != array->n_buffers) { ArrowErrorSet(error, "Expected array with %" PRId64 " buffer(s) but found %" PRId64 " buffer(s)", buffers_required, array->n_buffers); return EINVAL; } // Check number of children if (array_view->n_children != array->n_children) { ArrowErrorSet(error, "Expected %" PRId64 " children but found %" PRId64 " children", array_view->n_children, array->n_children); return EINVAL; } // Recurse for children for (int64_t i = 0; i < array_view->n_children; i++) { NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view->children[i], array->children[i], error)); } // Check dictionary if (array->dictionary == NULL && array_view->dictionary != NULL) { ArrowErrorSet(error, "Expected dictionary but found NULL"); return EINVAL; } if (array->dictionary != NULL && array_view->dictionary == NULL) { ArrowErrorSet(error, "Expected NULL dictionary but found dictionary member"); return EINVAL; } if (array->dictionary != NULL) { NANOARROW_RETURN_NOT_OK( ArrowArrayViewSetArrayInternal(array_view->dictionary, array->dictionary, error)); } return NANOARROW_OK; } static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view, struct ArrowError* error) { if (array_view->length < 0) { ArrowErrorSet(error, "Expected length >= 0 but found length %" PRId64, array_view->length); return EINVAL; } if (array_view->offset < 0) { ArrowErrorSet(error, "Expected offset >= 0 but found offset %" PRId64, array_view->offset); return EINVAL; } // Ensure that offset + length fits within an int64 before a possible overflow if ((uint64_t)array_view->offset + (uint64_t)array_view->length > (uint64_t)INT64_MAX) { ArrowErrorSet(error, "Offset + length is > INT64_MAX"); return EINVAL; } // Calculate buffer sizes that do not require buffer access. If marked as // unknown, assign the buffer size; otherwise, validate it. int64_t offset_plus_length = array_view->offset + array_view->length; // Only loop over the first two buffers because the size of the third buffer // is always data dependent for all current Arrow types. for (int i = 0; i < 2; i++) { int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8; // Initialize with a value that will cause an error if accidentally used uninitialized // Need to suppress the clang-tidy warning because gcc warns for possible use int64_t min_buffer_size_bytes = // NOLINT(clang-analyzer-deadcode.DeadStores) array_view->buffer_views[i].size_bytes + 1; switch (array_view->layout.buffer_type[i]) { case NANOARROW_BUFFER_TYPE_VALIDITY: if (array_view->null_count == 0 && array_view->buffer_views[i].size_bytes == 0) { continue; } min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length); break; case NANOARROW_BUFFER_TYPE_SIZE: min_buffer_size_bytes = element_size_bytes * offset_plus_length; break; case NANOARROW_BUFFER_TYPE_DATA_OFFSET: // Probably don't want/need to rely on the producer to have allocated an // offsets buffer of length 1 for a zero-size array min_buffer_size_bytes = (offset_plus_length != 0) * element_size_bytes * (offset_plus_length + 1); break; case NANOARROW_BUFFER_TYPE_VIEW_OFFSET: min_buffer_size_bytes = (offset_plus_length != 0) * element_size_bytes * offset_plus_length; break; case NANOARROW_BUFFER_TYPE_DATA: min_buffer_size_bytes = _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * offset_plus_length) / 8; break; case NANOARROW_BUFFER_TYPE_TYPE_ID: case NANOARROW_BUFFER_TYPE_UNION_OFFSET: min_buffer_size_bytes = element_size_bytes * offset_plus_length; break; case NANOARROW_BUFFER_TYPE_VARIADIC_DATA: case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE: case NANOARROW_BUFFER_TYPE_NONE: continue; } // Assign or validate buffer size if (array_view->buffer_views[i].size_bytes == -1) { array_view->buffer_views[i].size_bytes = min_buffer_size_bytes; } else if (array_view->buffer_views[i].size_bytes < min_buffer_size_bytes) { ArrowErrorSet(error, "Expected %s array buffer %d to have size >= %" PRId64 " bytes but found " "buffer with %" PRId64 " bytes", ArrowTypeString(array_view->storage_type), i, min_buffer_size_bytes, array_view->buffer_views[i].size_bytes); return EINVAL; } } // For list, fixed-size list and map views, we can validate the number of children switch (array_view->storage_type) { case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_FIXED_SIZE_LIST: case NANOARROW_TYPE_MAP: case NANOARROW_TYPE_LIST_VIEW: case NANOARROW_TYPE_LARGE_LIST_VIEW: if (array_view->n_children != 1) { ArrowErrorSet(error, "Expected 1 child of %s array but found %" PRId64 " child arrays", ArrowTypeString(array_view->storage_type), array_view->n_children); return EINVAL; } break; case NANOARROW_TYPE_RUN_END_ENCODED: if (array_view->n_children != 2) { ArrowErrorSet( error, "Expected 2 children for %s array but found %" PRId64 " child arrays", ArrowTypeString(array_view->storage_type), array_view->n_children); return EINVAL; } break; default: break; } // For struct, the sparse union, and the fixed-size list views, we can validate child // lengths. int64_t child_min_length; switch (array_view->storage_type) { case NANOARROW_TYPE_SPARSE_UNION: case NANOARROW_TYPE_STRUCT: child_min_length = (array_view->offset + array_view->length); for (int64_t i = 0; i < array_view->n_children; i++) { if (array_view->children[i]->length < child_min_length) { ArrowErrorSet(error, "Expected struct child %" PRId64 " to have length >= %" PRId64 " but found child with " "length %" PRId64, i + 1, child_min_length, array_view->children[i]->length); return EINVAL; } } break; case NANOARROW_TYPE_FIXED_SIZE_LIST: child_min_length = (array_view->offset + array_view->length) * array_view->layout.child_size_elements; if (array_view->children[0]->length < child_min_length) { ArrowErrorSet(error, "Expected child of fixed_size_list array to have length >= %" PRId64 " but " "found array with length %" PRId64, child_min_length, array_view->children[0]->length); return EINVAL; } break; case NANOARROW_TYPE_RUN_END_ENCODED: { if (array_view->n_children != 2) { ArrowErrorSet(error, "Expected 2 children for run-end encoded array but found %" PRId64, array_view->n_children); return EINVAL; } struct ArrowArrayView* run_ends_view = array_view->children[0]; struct ArrowArrayView* values_view = array_view->children[1]; int64_t max_length; switch (run_ends_view->storage_type) { case NANOARROW_TYPE_INT16: max_length = INT16_MAX; break; case NANOARROW_TYPE_INT32: max_length = INT32_MAX; break; case NANOARROW_TYPE_INT64: max_length = INT64_MAX; break; default: ArrowErrorSet( error, "Run-end encoded array only supports INT16, INT32 or INT64 run-ends " "but found run-ends type %s", ArrowTypeString(run_ends_view->storage_type)); return EINVAL; } // There is already a check above that offset_plus_length < INT64_MAX if (offset_plus_length > max_length) { ArrowErrorSet(error, "Offset + length of a run-end encoded array must fit in a value" " of the run end type %s but is %" PRId64 " + %" PRId64, ArrowTypeString(run_ends_view->storage_type), array_view->offset, array_view->length); return EINVAL; } if (run_ends_view->length > values_view->length) { ArrowErrorSet(error, "Length of run_ends is greater than the length of values: %" PRId64 " > %" PRId64, run_ends_view->length, values_view->length); return EINVAL; } if (run_ends_view->length == 0 && values_view->length != 0) { ArrowErrorSet(error, "Run-end encoded array has zero length %" PRId64 ", but values array has " "non-zero length", values_view->length); return EINVAL; } if (run_ends_view->null_count != 0) { ArrowErrorSet(error, "Null count must be 0 for run ends array, but is %" PRId64, run_ends_view->null_count); return EINVAL; } break; } default: break; } // Recurse for children for (int64_t i = 0; i < array_view->n_children; i++) { NANOARROW_RETURN_NOT_OK( ArrowArrayViewValidateMinimal(array_view->children[i], error)); } // Recurse for dictionary if (array_view->dictionary != NULL) { NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view->dictionary, error)); } return NANOARROW_OK; } static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view, struct ArrowError* error) { // Perform minimal validation. This will validate or assign // buffer sizes as long as buffer access is not required. NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error)); // Calculate buffer sizes or child lengths that require accessing the offsets // buffer. Where appropriate, validate that the first offset is >= 0. // If a buffer size is marked as unknown, assign it; otherwise, validate it. int64_t offset_plus_length = array_view->offset + array_view->length; int64_t first_offset; int64_t last_offset; switch (array_view->storage_type) { case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_BINARY: if (array_view->buffer_views[1].size_bytes != 0) { first_offset = array_view->buffer_views[1].data.as_int32[array_view->offset]; if (first_offset < 0) { ArrowErrorSet(error, "Expected first offset >= 0 but found %" PRId64, first_offset); return EINVAL; } last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length]; if (last_offset < 0) { ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64, last_offset); return EINVAL; } // If the data buffer size is unknown, assign it; otherwise, check it if (array_view->buffer_views[2].size_bytes == -1) { array_view->buffer_views[2].size_bytes = last_offset; } else if (array_view->buffer_views[2].size_bytes < last_offset) { ArrowErrorSet(error, "Expected %s array buffer 2 to have size >= %" PRId64 " bytes but found " "buffer with %" PRId64 " bytes", ArrowTypeString(array_view->storage_type), last_offset, array_view->buffer_views[2].size_bytes); return EINVAL; } } else if (array_view->buffer_views[2].size_bytes == -1) { // If the data buffer size is unknown and there are no bytes in the offset buffer, // set the data buffer size to 0. array_view->buffer_views[2].size_bytes = 0; } break; case NANOARROW_TYPE_LARGE_STRING: case NANOARROW_TYPE_LARGE_BINARY: if (array_view->buffer_views[1].size_bytes != 0) { first_offset = array_view->buffer_views[1].data.as_int64[array_view->offset]; if (first_offset < 0) { ArrowErrorSet(error, "Expected first offset >= 0 but found %" PRId64, first_offset); return EINVAL; } last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length]; if (last_offset < 0) { ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64, last_offset); return EINVAL; } // If the data buffer size is unknown, assign it; otherwise, check it if (array_view->buffer_views[2].size_bytes == -1) { array_view->buffer_views[2].size_bytes = last_offset; } else if (array_view->buffer_views[2].size_bytes < last_offset) { ArrowErrorSet(error, "Expected %s array buffer 2 to have size >= %" PRId64 " bytes but found " "buffer with %" PRId64 " bytes", ArrowTypeString(array_view->storage_type), last_offset, array_view->buffer_views[2].size_bytes); return EINVAL; } } else if (array_view->buffer_views[2].size_bytes == -1) { // If the data buffer size is unknown and there are no bytes in the offset // buffer, set the data buffer size to 0. array_view->buffer_views[2].size_bytes = 0; } break; case NANOARROW_TYPE_STRUCT: for (int64_t i = 0; i < array_view->n_children; i++) { if (array_view->children[i]->length < offset_plus_length) { ArrowErrorSet(error, "Expected struct child %" PRId64 " to have length >= %" PRId64 " but found child with " "length %" PRId64, i + 1, offset_plus_length, array_view->children[i]->length); return EINVAL; } } break; case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_MAP: if (array_view->buffer_views[1].size_bytes != 0) { first_offset = array_view->buffer_views[1].data.as_int32[array_view->offset]; if (first_offset < 0) { ArrowErrorSet(error, "Expected first offset >= 0 but found %" PRId64, first_offset); return EINVAL; } last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length]; if (last_offset < 0) { ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64, last_offset); return EINVAL; } if (array_view->children[0]->length < last_offset) { ArrowErrorSet(error, "Expected child of %s array to have length >= %" PRId64 " but found array with " "length %" PRId64, ArrowTypeString(array_view->storage_type), last_offset, array_view->children[0]->length); return EINVAL; } } break; case NANOARROW_TYPE_LARGE_LIST: if (array_view->buffer_views[1].size_bytes != 0) { first_offset = array_view->buffer_views[1].data.as_int64[array_view->offset]; if (first_offset < 0) { ArrowErrorSet(error, "Expected first offset >= 0 but found %" PRId64, first_offset); return EINVAL; } last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length]; if (last_offset < 0) { ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64, last_offset); return EINVAL; } if (array_view->children[0]->length < last_offset) { ArrowErrorSet(error, "Expected child of %s array to have length >= %" PRId64 " but found array " "with length %" PRId64, ArrowTypeString(array_view->storage_type), last_offset, array_view->children[0]->length); return EINVAL; } } break; case NANOARROW_TYPE_RUN_END_ENCODED: { struct ArrowArrayView* run_ends_view = array_view->children[0]; if (run_ends_view->length == 0) { break; } int64_t first_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0); if (first_run_end < 1) { ArrowErrorSet( error, "All run ends must be greater than 0 but the first run end is %" PRId64, first_run_end); return EINVAL; } // offset + length < INT64_MAX is checked in ArrowArrayViewValidateMinimal() int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, run_ends_view->length - 1); if (last_run_end < offset_plus_length) { ArrowErrorSet(error, "Last run end is %" PRId64 " but it should be >= (%" PRId64 " + %" PRId64 ")", last_run_end, array_view->offset, array_view->length); return EINVAL; } break; } default: break; } // Recurse for children for (int64_t i = 0; i < array_view->n_children; i++) { NANOARROW_RETURN_NOT_OK( ArrowArrayViewValidateDefault(array_view->children[i], error)); } // Recurse for dictionary if (array_view->dictionary != NULL) { NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view->dictionary, error)); } return NANOARROW_OK; } ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view, const struct ArrowArray* array, struct ArrowError* error) { // Extract information from the array into the array view NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, error)); // Run default validation. Because we've marked all non-NULL buffers as having unknown // size, validation will also update the buffer sizes as it goes. NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view, error)); return NANOARROW_OK; } ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view, const struct ArrowArray* array, struct ArrowError* error) { // Extract information from the array into the array view NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, error)); // Run default validation. Because we've marked all non-NULL buffers as having unknown // size, validation will also update the buffer sizes as it goes. NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error)); return NANOARROW_OK; } static int ArrowAssertIncreasingInt32(struct ArrowBufferView view, struct ArrowError* error) { if (view.size_bytes <= (int64_t)sizeof(int32_t)) { return NANOARROW_OK; } for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int32_t); i++) { if (view.data.as_int32[i] < view.data.as_int32[i - 1]) { ArrowErrorSet(error, "[%" PRId64 "] Expected element size >= 0", i); return EINVAL; } } return NANOARROW_OK; } static int ArrowAssertIncreasingInt64(struct ArrowBufferView view, struct ArrowError* error) { if (view.size_bytes <= (int64_t)sizeof(int64_t)) { return NANOARROW_OK; } for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int64_t); i++) { if (view.data.as_int64[i] < view.data.as_int64[i - 1]) { ArrowErrorSet(error, "[%" PRId64 "] Expected element size >= 0", i); return EINVAL; } } return NANOARROW_OK; } static int ArrowAssertRangeInt8(struct ArrowBufferView view, int8_t min_value, int8_t max_value, struct ArrowError* error) { for (int64_t i = 0; i < view.size_bytes; i++) { if (view.data.as_int8[i] < min_value || view.data.as_int8[i] > max_value) { ArrowErrorSet(error, "[%" PRId64 "] Expected buffer value between %" PRId8 " and %" PRId8 " but found value %" PRId8, i, min_value, max_value, view.data.as_int8[i]); return EINVAL; } } return NANOARROW_OK; } static int ArrowAssertInt8In(struct ArrowBufferView view, const int8_t* values, int64_t n_values, struct ArrowError* error) { for (int64_t i = 0; i < view.size_bytes; i++) { int item_found = 0; for (int64_t j = 0; j < n_values; j++) { if (view.data.as_int8[i] == values[j]) { item_found = 1; break; } } if (!item_found) { ArrowErrorSet(error, "[%" PRId64 "] Unexpected buffer value %" PRId8, i, view.data.as_int8[i]); return EINVAL; } } return NANOARROW_OK; } static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view, struct ArrowError* error) { for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { switch (array_view->layout.buffer_type[i]) { // Only validate the portion of the buffer that is strictly required, // which includes not validating the offset buffer of a zero-length array. case NANOARROW_BUFFER_TYPE_DATA_OFFSET: if (array_view->length == 0) { continue; } if (array_view->layout.element_size_bits[i] == 32) { struct ArrowBufferView sliced_offsets; sliced_offsets.data.as_int32 = array_view->buffer_views[i].data.as_int32 + array_view->offset; sliced_offsets.size_bytes = (array_view->length + 1) * sizeof(int32_t); NANOARROW_RETURN_NOT_OK(ArrowAssertIncreasingInt32(sliced_offsets, error)); } else { struct ArrowBufferView sliced_offsets; sliced_offsets.data.as_int64 = array_view->buffer_views[i].data.as_int64 + array_view->offset; sliced_offsets.size_bytes = (array_view->length + 1) * sizeof(int64_t); NANOARROW_RETURN_NOT_OK(ArrowAssertIncreasingInt64(sliced_offsets, error)); } break; default: break; } } if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION || array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION) { struct ArrowBufferView sliced_type_ids; sliced_type_ids.size_bytes = array_view->length * sizeof(int8_t); if (array_view->length > 0) { sliced_type_ids.data.as_int8 = array_view->buffer_views[0].data.as_int8 + array_view->offset; } else { sliced_type_ids.data.as_int8 = NULL; } if (array_view->union_type_id_map == NULL) { // If the union_type_id map is NULL (e.g., when using ArrowArrayInitFromType() + // ArrowArrayAllocateChildren() + ArrowArrayFinishBuilding()), we don't have enough // information to validate this buffer. ArrowErrorSet(error, "Insufficient information provided for validation of union array"); return EINVAL; } else if (_ArrowParsedUnionTypeIdsWillEqualChildIndices( array_view->union_type_id_map, array_view->n_children, array_view->n_children)) { NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8( sliced_type_ids, 0, (int8_t)(array_view->n_children - 1), error)); } else { NANOARROW_RETURN_NOT_OK(ArrowAssertInt8In(sliced_type_ids, array_view->union_type_id_map + 128, array_view->n_children, error)); } } if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION && array_view->union_type_id_map != NULL) { // Check that offsets refer to child elements that actually exist for (int64_t i = 0; i < array_view->length; i++) { int8_t child_id = ArrowArrayViewUnionChildIndex(array_view, i); int64_t offset = ArrowArrayViewUnionChildOffset(array_view, i); int64_t child_length = array_view->children[child_id]->length; if (offset < 0 || offset > child_length) { ArrowErrorSet(error, "[%" PRId64 "] Expected union offset for child id %" PRId8 " to be between 0 and %" PRId64 " but " "found offset value %" PRId64, i, child_id, child_length, offset); return EINVAL; } } } if (array_view->storage_type == NANOARROW_TYPE_RUN_END_ENCODED) { struct ArrowArrayView* run_ends_view = array_view->children[0]; if (run_ends_view->length > 0) { int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0); for (int64_t i = 1; i < run_ends_view->length; i++) { const int64_t run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, i); if (run_end <= last_run_end) { ArrowErrorSet( error, "Every run end must be strictly greater than the previous run end, " "but run_ends[%" PRId64 " is %" PRId64 " and run_ends[%" PRId64 "] is %" PRId64, i, run_end, i - 1, last_run_end); return EINVAL; } last_run_end = run_end; } } } if (array_view->storage_type == NANOARROW_TYPE_LIST_VIEW || array_view->storage_type == NANOARROW_TYPE_LARGE_LIST_VIEW) { int64_t child_len = array_view->children[0]->length; struct ArrowBufferView offsets, sizes; offsets.data.data = array_view->buffer_views[1].data.data; sizes.data.data = array_view->buffer_views[2].data.data; for (int64_t i = array_view->offset; i < array_view->length + array_view->offset; i++) { int64_t offset, size; if (array_view->storage_type == NANOARROW_TYPE_LIST_VIEW) { offset = offsets.data.as_int32[i]; size = sizes.data.as_int32[i]; } else { offset = offsets.data.as_int64[i]; size = sizes.data.as_int64[i]; } if (offset < 0) { ArrowErrorSet(error, "Invalid negative offset %" PRId64 " at index %" PRId64, offset, i); return EINVAL; } if (size < 0) { ArrowErrorSet(error, "Invalid negative size %" PRId64 " at index %" PRId64, size, i); return EINVAL; } if ((offset + size) > child_len) { ArrowErrorSet(error, "Offset: %" PRId64 " + size: %" PRId64 " at index: %" PRId64 " exceeds length of child view: %" PRId64, offset, size, i, child_len); return EINVAL; } } } // Recurse for children for (int64_t i = 0; i < array_view->n_children; i++) { NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i], error)); } // Dictionary validation not implemented if (array_view->dictionary != NULL) { NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->dictionary, error)); // TODO: validate the indices } return NANOARROW_OK; } ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view, enum ArrowValidationLevel validation_level, struct ArrowError* error) { switch (validation_level) { case NANOARROW_VALIDATION_LEVEL_NONE: return NANOARROW_OK; case NANOARROW_VALIDATION_LEVEL_MINIMAL: return ArrowArrayViewValidateMinimal(array_view, error); case NANOARROW_VALIDATION_LEVEL_DEFAULT: return ArrowArrayViewValidateDefault(array_view, error); case NANOARROW_VALIDATION_LEVEL_FULL: NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view, error)); return ArrowArrayViewValidateFull(array_view, error); } ArrowErrorSet(error, "validation_level not recognized"); return EINVAL; } struct ArrowComparisonInternalState { enum ArrowCompareLevel level; int is_equal; struct ArrowError* reason; }; NANOARROW_CHECK_PRINTF_ATTRIBUTE static void ArrowComparePrependPath( struct ArrowError* out, const char* fmt, ...) { if (out == NULL) { return; } char prefix[128]; prefix[0] = '\0'; va_list args; va_start(args, fmt); int prefix_len = vsnprintf(prefix, sizeof(prefix), fmt, args); va_end(args); if (prefix_len <= 0) { return; } size_t out_len = strlen(out->message); size_t out_len_to_move = sizeof(struct ArrowError) - prefix_len - 1; if (out_len_to_move > out_len) { out_len_to_move = out_len; } memmove(out->message + prefix_len, out->message, out_len_to_move); memcpy(out->message, prefix, prefix_len); out->message[out_len + prefix_len] = '\0'; } #define SET_NOT_EQUAL_AND_RETURN_IF_IMPL(cond_, state_, reason_) \ do { \ if (cond_) { \ ArrowErrorSet(state_->reason, ": %s", reason_); \ state_->is_equal = 0; \ return; \ } \ } while (0) #define SET_NOT_EQUAL_AND_RETURN_IF(condition_, state_) \ SET_NOT_EQUAL_AND_RETURN_IF_IMPL(condition_, state_, #condition_) static void ArrowArrayViewCompareBuffer(const struct ArrowArrayView* actual, const struct ArrowArrayView* expected, int i, struct ArrowComparisonInternalState* state) { SET_NOT_EQUAL_AND_RETURN_IF( actual->buffer_views[i].size_bytes != expected->buffer_views[i].size_bytes, state); int64_t buffer_size = actual->buffer_views[i].size_bytes; if (buffer_size > 0) { SET_NOT_EQUAL_AND_RETURN_IF( memcmp(actual->buffer_views[i].data.data, expected->buffer_views[i].data.data, buffer_size) != 0, state); } } static void ArrowArrayViewCompareIdentical(const struct ArrowArrayView* actual, const struct ArrowArrayView* expected, struct ArrowComparisonInternalState* state) { SET_NOT_EQUAL_AND_RETURN_IF(actual->storage_type != expected->storage_type, state); SET_NOT_EQUAL_AND_RETURN_IF(actual->n_children != expected->n_children, state); SET_NOT_EQUAL_AND_RETURN_IF(actual->dictionary == NULL && expected->dictionary != NULL, state); SET_NOT_EQUAL_AND_RETURN_IF(actual->dictionary != NULL && expected->dictionary == NULL, state); SET_NOT_EQUAL_AND_RETURN_IF(actual->length != expected->length, state); SET_NOT_EQUAL_AND_RETURN_IF(actual->offset != expected->offset, state); SET_NOT_EQUAL_AND_RETURN_IF(actual->null_count != expected->null_count, state); for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { ArrowArrayViewCompareBuffer(actual, expected, i, state); if (!state->is_equal) { ArrowComparePrependPath(state->reason, ".buffers[%d]", i); return; } } for (int64_t i = 0; i < actual->n_children; i++) { ArrowArrayViewCompareIdentical(actual->children[i], expected->children[i], state); if (!state->is_equal) { ArrowComparePrependPath(state->reason, ".children[%" PRId64 "]", i); return; } } if (actual->dictionary != NULL) { ArrowArrayViewCompareIdentical(actual->dictionary, expected->dictionary, state); if (!state->is_equal) { ArrowComparePrependPath(state->reason, ".dictionary"); return; } } } // Top-level entry point to take care of creating, cleaning up, and // propagating the ArrowComparisonInternalState to the caller ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual, const struct ArrowArrayView* expected, enum ArrowCompareLevel level, int* out, struct ArrowError* reason) { struct ArrowComparisonInternalState state; state.level = level; state.is_equal = 1; state.reason = reason; switch (level) { case NANOARROW_COMPARE_IDENTICAL: ArrowArrayViewCompareIdentical(actual, expected, &state); break; default: return EINVAL; } *out = state.is_equal; if (!state.is_equal) { ArrowComparePrependPath(state.reason, "root"); } return NANOARROW_OK; } #undef SET_NOT_EQUAL_AND_RETURN_IF #undef SET_NOT_EQUAL_AND_RETURN_IF_IMPL // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include #include "nanoarrow/nanoarrow.h" struct BasicArrayStreamPrivate { struct ArrowSchema schema; int64_t n_arrays; struct ArrowArray* arrays; int64_t arrays_i; }; static int ArrowBasicArrayStreamGetSchema(struct ArrowArrayStream* array_stream, struct ArrowSchema* schema) { if (array_stream == NULL || array_stream->release == NULL) { return EINVAL; } struct BasicArrayStreamPrivate* private_data = (struct BasicArrayStreamPrivate*)array_stream->private_data; return ArrowSchemaDeepCopy(&private_data->schema, schema); } static int ArrowBasicArrayStreamGetNext(struct ArrowArrayStream* array_stream, struct ArrowArray* array) { if (array_stream == NULL || array_stream->release == NULL) { return EINVAL; } struct BasicArrayStreamPrivate* private_data = (struct BasicArrayStreamPrivate*)array_stream->private_data; if (private_data->arrays_i == private_data->n_arrays) { array->release = NULL; return NANOARROW_OK; } ArrowArrayMove(&private_data->arrays[private_data->arrays_i++], array); return NANOARROW_OK; } static const char* ArrowBasicArrayStreamGetLastError( struct ArrowArrayStream* array_stream) { NANOARROW_UNUSED(array_stream); return NULL; } static void ArrowBasicArrayStreamRelease(struct ArrowArrayStream* array_stream) { if (array_stream == NULL || array_stream->release == NULL) { return; } struct BasicArrayStreamPrivate* private_data = (struct BasicArrayStreamPrivate*)array_stream->private_data; if (private_data->schema.release != NULL) { ArrowSchemaRelease(&private_data->schema); } for (int64_t i = 0; i < private_data->n_arrays; i++) { if (private_data->arrays[i].release != NULL) { ArrowArrayRelease(&private_data->arrays[i]); } } if (private_data->arrays != NULL) { ArrowFree(private_data->arrays); } ArrowFree(private_data); array_stream->release = NULL; } ArrowErrorCode ArrowBasicArrayStreamInit(struct ArrowArrayStream* array_stream, struct ArrowSchema* schema, int64_t n_arrays) { struct BasicArrayStreamPrivate* private_data = (struct BasicArrayStreamPrivate*)ArrowMalloc( sizeof(struct BasicArrayStreamPrivate)); if (private_data == NULL) { return ENOMEM; } ArrowSchemaMove(schema, &private_data->schema); private_data->n_arrays = n_arrays; private_data->arrays = NULL; private_data->arrays_i = 0; if (n_arrays > 0) { private_data->arrays = (struct ArrowArray*)ArrowMalloc(n_arrays * sizeof(struct ArrowArray)); if (private_data->arrays == NULL) { ArrowBasicArrayStreamRelease(array_stream); ArrowFree(private_data); return ENOMEM; } } for (int64_t i = 0; i < private_data->n_arrays; i++) { private_data->arrays[i].release = NULL; } array_stream->get_schema = &ArrowBasicArrayStreamGetSchema; array_stream->get_next = &ArrowBasicArrayStreamGetNext; array_stream->get_last_error = ArrowBasicArrayStreamGetLastError; array_stream->release = ArrowBasicArrayStreamRelease; array_stream->private_data = private_data; return NANOARROW_OK; } void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, int64_t i, struct ArrowArray* array) { struct BasicArrayStreamPrivate* private_data = (struct BasicArrayStreamPrivate*)array_stream->private_data; ArrowArrayMove(array, &private_data->arrays[i]); } ArrowErrorCode ArrowBasicArrayStreamValidate(const struct ArrowArrayStream* array_stream, struct ArrowError* error) { struct BasicArrayStreamPrivate* private_data = (struct BasicArrayStreamPrivate*)array_stream->private_data; struct ArrowArrayView array_view; NANOARROW_RETURN_NOT_OK( ArrowArrayViewInitFromSchema(&array_view, &private_data->schema, error)); for (int64_t i = 0; i < private_data->n_arrays; i++) { if (private_data->arrays[i].release != NULL) { int result = ArrowArrayViewSetArray(&array_view, &private_data->arrays[i], error); if (result != NANOARROW_OK) { ArrowArrayViewReset(&array_view); return result; } } } ArrowArrayViewReset(&array_view); return NANOARROW_OK; } ================================================ FILE: data/lang/cpp/nanoarrow/nanoarrow.h ================================================ // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef NANOARROW_CONFIG_H_INCLUDED #define NANOARROW_CONFIG_H_INCLUDED #define NANOARROW_VERSION_MAJOR 0 #define NANOARROW_VERSION_MINOR 8 #define NANOARROW_VERSION_PATCH 0 #define NANOARROW_VERSION "0.8.0" #define NANOARROW_VERSION_INT \ (NANOARROW_VERSION_MAJOR * 10000 + NANOARROW_VERSION_MINOR * 100 + \ NANOARROW_VERSION_PATCH) // #define NANOARROW_NAMESPACE YourNamespaceHere #if !defined(NANOARROW_CXX_NAMESPACE) #define NANOARROW_CXX_NAMESPACE nanoarrow #endif #define NANOARROW_CXX_NAMESPACE_BEGIN namespace NANOARROW_CXX_NAMESPACE { #define NANOARROW_CXX_NAMESPACE_END } #endif // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef NANOARROW_NANOARROW_TYPES_H_INCLUDED #define NANOARROW_NANOARROW_TYPES_H_INCLUDED #include #include #if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE) #include #include #endif #ifdef __cplusplus extern "C" { #endif // Extra guard for versions of Arrow without the canonical guard #ifndef ARROW_FLAG_DICTIONARY_ORDERED /// \defgroup nanoarrow-arrow-cdata Arrow C Data interface /// /// The Arrow C Data (https://arrow.apache.org/docs/format/CDataInterface.html) /// and Arrow C Stream (https://arrow.apache.org/docs/format/CStreamInterface.html) /// interfaces are part of the /// Arrow Columnar Format specification /// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for /// documentation of these structures. /// /// @{ #ifndef ARROW_C_DATA_INTERFACE #define ARROW_C_DATA_INTERFACE #define ARROW_FLAG_DICTIONARY_ORDERED 1 #define ARROW_FLAG_NULLABLE 2 #define ARROW_FLAG_MAP_KEYS_SORTED 4 struct ArrowSchema { // Array type description const char* format; const char* name; const char* metadata; int64_t flags; int64_t n_children; struct ArrowSchema** children; struct ArrowSchema* dictionary; // Release callback void (*release)(struct ArrowSchema*); // Opaque producer-specific data void* private_data; }; struct ArrowArray { // Array data description int64_t length; int64_t null_count; int64_t offset; int64_t n_buffers; int64_t n_children; const void** buffers; struct ArrowArray** children; struct ArrowArray* dictionary; // Release callback void (*release)(struct ArrowArray*); // Opaque producer-specific data void* private_data; }; #endif // ARROW_C_DATA_INTERFACE #ifndef ARROW_C_STREAM_INTERFACE #define ARROW_C_STREAM_INTERFACE struct ArrowArrayStream { // Callback to get the stream type // (will be the same for all arrays in the stream). // // Return value: 0 if successful, an `errno`-compatible error code otherwise. // // If successful, the ArrowSchema must be released independently from the stream. int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); // Callback to get the next array // (if no error and the array is released, the stream has ended) // // Return value: 0 if successful, an `errno`-compatible error code otherwise. // // If successful, the ArrowArray must be released independently from the stream. int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); // Callback to get optional detailed error information. // This must only be called if the last stream operation failed // with a non-0 return code. // // Return value: pointer to a null-terminated character array describing // the last error, or NULL if no description is available. // // The returned pointer is only valid until the next operation on this stream // (including release). const char* (*get_last_error)(struct ArrowArrayStream*); // Release callback: release the stream's own resources. // Note that arrays returned by `get_next` must be individually released. void (*release)(struct ArrowArrayStream*); // Opaque producer-specific data void* private_data; }; #endif // ARROW_C_STREAM_INTERFACE #endif // ARROW_FLAG_DICTIONARY_ORDERED /// @} // Utility macros #define _NANOARROW_CONCAT(x, y) x##y #define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y) #define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \ do { \ const int NAME = (EXPR); \ if (NAME) return NAME; \ } while (0) #define _NANOARROW_CHECK_RANGE(x_, min_, max_) \ NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL) #define _NANOARROW_CHECK_UPPER_LIMIT(x_, max_) \ NANOARROW_RETURN_NOT_OK((x_ <= max_) ? NANOARROW_OK : EINVAL) #if defined(NANOARROW_DEBUG) #define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \ do { \ const int NAME = (EXPR); \ if (NAME) { \ ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d\n* %s:%d", EXPR_STR, \ NAME, __FILE__, __LINE__); \ return NAME; \ } \ } while (0) #else #define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \ do { \ const int NAME = (EXPR); \ if (NAME) { \ ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d", EXPR_STR, NAME); \ return NAME; \ } \ } while (0) #endif #if defined(NANOARROW_DEBUG) // For checking ArrowErrorSet() calls for valid printf format strings/arguments // If using mingw's c99-compliant printf, we need a different format-checking attribute #if defined(__USE_MINGW_ANSI_STDIO) && defined(__MINGW_PRINTF_FORMAT) #define NANOARROW_CHECK_PRINTF_ATTRIBUTE \ __attribute__((format(__MINGW_PRINTF_FORMAT, 2, 3))) #elif defined(__GNUC__) #define NANOARROW_CHECK_PRINTF_ATTRIBUTE __attribute__((format(printf, 2, 3))) #else #define NANOARROW_CHECK_PRINTF_ATTRIBUTE #endif // For checking calls to functions that return ArrowErrorCode #if defined(__GNUC__) && (__GNUC__ >= 4) #define NANOARROW_CHECK_RETURN_ATTRIBUTE __attribute__((warn_unused_result)) #elif defined(_MSC_VER) && (_MSC_VER >= 1700) #define NANOARROW_CHECK_RETURN_ATTRIBUTE _Check_return_ #else #define NANOARROW_CHECK_RETURN_ATTRIBUTE #endif #else #define NANOARROW_CHECK_RETURN_ATTRIBUTE #define NANOARROW_CHECK_PRINTF_ATTRIBUTE #endif #define NANOARROW_UNUSED(x) (void)(x) /// \brief Return code for success. /// \ingroup nanoarrow-errors #define NANOARROW_OK 0 /// \brief Represents an errno-compatible error code /// \ingroup nanoarrow-errors typedef int ArrowErrorCode; #if defined(NANOARROW_DEBUG) #define ArrowErrorCode NANOARROW_CHECK_RETURN_ATTRIBUTE ArrowErrorCode #endif /// \brief Flags supported by ArrowSchemaViewInit() /// \ingroup nanoarrow-schema-view #define NANOARROW_FLAG_ALL_SUPPORTED \ (ARROW_FLAG_DICTIONARY_ORDERED | ARROW_FLAG_NULLABLE | ARROW_FLAG_MAP_KEYS_SORTED) /// \brief Error type containing a UTF-8 encoded message. /// \ingroup nanoarrow-errors struct ArrowError { /// \brief A character buffer with space for an error message. char message[1024]; }; /// \brief Ensure an ArrowError is null-terminated by zeroing the first character. /// \ingroup nanoarrow-errors /// /// If error is NULL, this function does nothing. static inline void ArrowErrorInit(struct ArrowError* error) { if (error != NULL) { error->message[0] = '\0'; } } /// \brief Get the contents of an error /// \ingroup nanoarrow-errors /// /// If error is NULL, returns "", or returns the contents of the error message /// otherwise. static inline const char* ArrowErrorMessage(struct ArrowError* error) { if (error == NULL) { return ""; } else { return error->message; } } /// \brief Set the contents of an error from an existing null-terminated string /// \ingroup nanoarrow-errors /// /// If error is NULL, this function does nothing. static inline void ArrowErrorSetString(struct ArrowError* error, const char* src) { if (error == NULL) { return; } int64_t src_len = strlen(src); if (src_len >= ((int64_t)sizeof(error->message))) { memcpy(error->message, src, sizeof(error->message) - 1); error->message[sizeof(error->message) - 1] = '\0'; } else { memcpy(error->message, src, src_len); error->message[src_len] = '\0'; } } /// \brief Check the result of an expression and return it if not NANOARROW_OK /// \ingroup nanoarrow-errors #define NANOARROW_RETURN_NOT_OK(EXPR) \ _NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR) /// \brief Check the result of an expression and return it if not NANOARROW_OK, /// adding an auto-generated message to an ArrowError. /// \ingroup nanoarrow-errors /// /// This macro is used to ensure that functions that accept an ArrowError /// as input always set its message when returning an error code (e.g., when calling /// a nanoarrow function that does *not* accept ArrowError). #define NANOARROW_RETURN_NOT_OK_WITH_ERROR(EXPR, ERROR_EXPR) \ _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL( \ _NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, ERROR_EXPR, #EXPR) #if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE) #define NANOARROW_PRINT_AND_DIE(VALUE, EXPR_STR) \ do { \ fprintf(stderr, "%s failed with code %d\n* %s:%d\n", EXPR_STR, (int)(VALUE), \ __FILE__, (int)__LINE__); \ abort(); \ } while (0) #endif #if defined(NANOARROW_DEBUG) #define _NANOARROW_ASSERT_OK_IMPL(NAME, EXPR, EXPR_STR) \ do { \ const int NAME = (EXPR); \ if (NAME) NANOARROW_PRINT_AND_DIE(NAME, EXPR_STR); \ } while (0) /// \brief Assert that an expression's value is NANOARROW_OK /// \ingroup nanoarrow-errors /// /// If nanoarrow was built in debug mode (i.e., defined(NANOARROW_DEBUG) is true), /// print a message to stderr and abort. If nanoarrow was built in release mode, /// this statement has no effect. You can customize fatal error behaviour /// be defining the NANOARROW_PRINT_AND_DIE macro before including nanoarrow.h /// This macro is provided as a convenience for users and is not used internally. #define NANOARROW_ASSERT_OK(EXPR) \ _NANOARROW_ASSERT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, #EXPR) #define _NANOARROW_DCHECK_IMPL(EXPR, EXPR_STR) \ do { \ if (!(EXPR)) NANOARROW_PRINT_AND_DIE(-1, EXPR_STR); \ } while (0) #define NANOARROW_DCHECK(EXPR) _NANOARROW_DCHECK_IMPL(EXPR, #EXPR) #else #define NANOARROW_ASSERT_OK(EXPR) (void)(EXPR) #define NANOARROW_DCHECK(EXPR) #endif static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst) { NANOARROW_DCHECK(src != NULL); NANOARROW_DCHECK(dst != NULL); memcpy(dst, src, sizeof(struct ArrowSchema)); src->release = NULL; } static inline void ArrowSchemaRelease(struct ArrowSchema* schema) { NANOARROW_DCHECK(schema != NULL); schema->release(schema); NANOARROW_DCHECK(schema->release == NULL); } static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst) { NANOARROW_DCHECK(src != NULL); NANOARROW_DCHECK(dst != NULL); memcpy(dst, src, sizeof(struct ArrowArray)); src->release = NULL; } static inline void ArrowArrayRelease(struct ArrowArray* array) { NANOARROW_DCHECK(array != NULL); array->release(array); NANOARROW_DCHECK(array->release == NULL); } static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, struct ArrowArrayStream* dst) { NANOARROW_DCHECK(src != NULL); NANOARROW_DCHECK(dst != NULL); memcpy(dst, src, sizeof(struct ArrowArrayStream)); src->release = NULL; } static inline const char* ArrowArrayStreamGetLastError( struct ArrowArrayStream* array_stream) { NANOARROW_DCHECK(array_stream != NULL); const char* value = array_stream->get_last_error(array_stream); if (value == NULL) { return ""; } else { return value; } } static inline ArrowErrorCode ArrowArrayStreamGetSchema( struct ArrowArrayStream* array_stream, struct ArrowSchema* out, struct ArrowError* error) { NANOARROW_DCHECK(array_stream != NULL); int result = array_stream->get_schema(array_stream, out); if (result != NANOARROW_OK && error != NULL) { ArrowErrorSetString(error, ArrowArrayStreamGetLastError(array_stream)); } return result; } static inline ArrowErrorCode ArrowArrayStreamGetNext( struct ArrowArrayStream* array_stream, struct ArrowArray* out, struct ArrowError* error) { NANOARROW_DCHECK(array_stream != NULL); int result = array_stream->get_next(array_stream, out); if (result != NANOARROW_OK && error != NULL) { ArrowErrorSetString(error, ArrowArrayStreamGetLastError(array_stream)); } return result; } static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* array_stream) { NANOARROW_DCHECK(array_stream != NULL); array_stream->release(array_stream); NANOARROW_DCHECK(array_stream->release == NULL); } static char _ArrowIsLittleEndian(void) { uint32_t check = 1; char first_byte; memcpy(&first_byte, &check, sizeof(char)); return first_byte; } /// \brief Arrow type enumerator /// \ingroup nanoarrow-utils /// /// These names are intended to map to the corresponding arrow::Type::type /// enumerator; however, the numeric values are specifically not equal /// (i.e., do not rely on numeric comparison). enum ArrowType { NANOARROW_TYPE_UNINITIALIZED = 0, NANOARROW_TYPE_NA = 1, NANOARROW_TYPE_BOOL, NANOARROW_TYPE_UINT8, NANOARROW_TYPE_INT8, NANOARROW_TYPE_UINT16, NANOARROW_TYPE_INT16, NANOARROW_TYPE_UINT32, NANOARROW_TYPE_INT32, NANOARROW_TYPE_UINT64, NANOARROW_TYPE_INT64, NANOARROW_TYPE_HALF_FLOAT, NANOARROW_TYPE_FLOAT, NANOARROW_TYPE_DOUBLE, NANOARROW_TYPE_STRING, NANOARROW_TYPE_BINARY, NANOARROW_TYPE_FIXED_SIZE_BINARY, NANOARROW_TYPE_DATE32, NANOARROW_TYPE_DATE64, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TYPE_TIME32, NANOARROW_TYPE_TIME64, NANOARROW_TYPE_INTERVAL_MONTHS, NANOARROW_TYPE_INTERVAL_DAY_TIME, NANOARROW_TYPE_DECIMAL128, NANOARROW_TYPE_DECIMAL256, NANOARROW_TYPE_LIST, NANOARROW_TYPE_STRUCT, NANOARROW_TYPE_SPARSE_UNION, NANOARROW_TYPE_DENSE_UNION, NANOARROW_TYPE_DICTIONARY, NANOARROW_TYPE_MAP, NANOARROW_TYPE_EXTENSION, NANOARROW_TYPE_FIXED_SIZE_LIST, NANOARROW_TYPE_DURATION, NANOARROW_TYPE_LARGE_STRING, NANOARROW_TYPE_LARGE_BINARY, NANOARROW_TYPE_LARGE_LIST, NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO, NANOARROW_TYPE_RUN_END_ENCODED, NANOARROW_TYPE_BINARY_VIEW, NANOARROW_TYPE_STRING_VIEW, NANOARROW_TYPE_DECIMAL32, NANOARROW_TYPE_DECIMAL64, NANOARROW_TYPE_LIST_VIEW, NANOARROW_TYPE_LARGE_LIST_VIEW, }; /// \brief Get a string value of an enum ArrowType value /// \ingroup nanoarrow-utils /// /// Returns NULL for invalid values for type static inline const char* ArrowTypeString(enum ArrowType type); static inline const char* ArrowTypeString(enum ArrowType type) { switch (type) { case NANOARROW_TYPE_NA: return "na"; case NANOARROW_TYPE_BOOL: return "bool"; case NANOARROW_TYPE_UINT8: return "uint8"; case NANOARROW_TYPE_INT8: return "int8"; case NANOARROW_TYPE_UINT16: return "uint16"; case NANOARROW_TYPE_INT16: return "int16"; case NANOARROW_TYPE_UINT32: return "uint32"; case NANOARROW_TYPE_INT32: return "int32"; case NANOARROW_TYPE_UINT64: return "uint64"; case NANOARROW_TYPE_INT64: return "int64"; case NANOARROW_TYPE_HALF_FLOAT: return "half_float"; case NANOARROW_TYPE_FLOAT: return "float"; case NANOARROW_TYPE_DOUBLE: return "double"; case NANOARROW_TYPE_STRING: return "string"; case NANOARROW_TYPE_BINARY: return "binary"; case NANOARROW_TYPE_FIXED_SIZE_BINARY: return "fixed_size_binary"; case NANOARROW_TYPE_DATE32: return "date32"; case NANOARROW_TYPE_DATE64: return "date64"; case NANOARROW_TYPE_TIMESTAMP: return "timestamp"; case NANOARROW_TYPE_TIME32: return "time32"; case NANOARROW_TYPE_TIME64: return "time64"; case NANOARROW_TYPE_INTERVAL_MONTHS: return "interval_months"; case NANOARROW_TYPE_INTERVAL_DAY_TIME: return "interval_day_time"; case NANOARROW_TYPE_DECIMAL32: return "decimal32"; case NANOARROW_TYPE_DECIMAL64: return "decimal64"; case NANOARROW_TYPE_DECIMAL128: return "decimal128"; case NANOARROW_TYPE_DECIMAL256: return "decimal256"; case NANOARROW_TYPE_LIST: return "list"; case NANOARROW_TYPE_STRUCT: return "struct"; case NANOARROW_TYPE_SPARSE_UNION: return "sparse_union"; case NANOARROW_TYPE_DENSE_UNION: return "dense_union"; case NANOARROW_TYPE_DICTIONARY: return "dictionary"; case NANOARROW_TYPE_MAP: return "map"; case NANOARROW_TYPE_EXTENSION: return "extension"; case NANOARROW_TYPE_FIXED_SIZE_LIST: return "fixed_size_list"; case NANOARROW_TYPE_DURATION: return "duration"; case NANOARROW_TYPE_LARGE_STRING: return "large_string"; case NANOARROW_TYPE_LARGE_BINARY: return "large_binary"; case NANOARROW_TYPE_LARGE_LIST: return "large_list"; case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: return "interval_month_day_nano"; case NANOARROW_TYPE_RUN_END_ENCODED: return "run_end_encoded"; case NANOARROW_TYPE_BINARY_VIEW: return "binary_view"; case NANOARROW_TYPE_STRING_VIEW: return "string_view"; case NANOARROW_TYPE_LIST_VIEW: return "list_view"; case NANOARROW_TYPE_LARGE_LIST_VIEW: return "large_list_view"; default: return NULL; } } /// \brief Arrow time unit enumerator /// \ingroup nanoarrow-utils /// /// These names and values map to the corresponding arrow::TimeUnit::type /// enumerator. enum ArrowTimeUnit { NANOARROW_TIME_UNIT_SECOND = 0, NANOARROW_TIME_UNIT_MILLI = 1, NANOARROW_TIME_UNIT_MICRO = 2, NANOARROW_TIME_UNIT_NANO = 3 }; /// \brief Validation level enumerator /// \ingroup nanoarrow-array enum ArrowValidationLevel { /// \brief Do not validate buffer sizes or content. NANOARROW_VALIDATION_LEVEL_NONE = 0, /// \brief Validate buffer sizes that depend on array length but do not validate buffer /// sizes that depend on buffer data access. NANOARROW_VALIDATION_LEVEL_MINIMAL = 1, /// \brief Validate all buffer sizes, including those that require buffer data access, /// but do not perform any checks that are O(1) along the length of the buffers. NANOARROW_VALIDATION_LEVEL_DEFAULT = 2, /// \brief Validate all buffer sizes and all buffer content. This is useful in the /// context of untrusted input or input that may have been corrupted in transit. NANOARROW_VALIDATION_LEVEL_FULL = 3 }; /// \brief Comparison level enumerator /// \ingroup nanoarrow-utils enum ArrowCompareLevel { /// \brief Consider arrays equal if buffers contain identical content /// and have identical offset, null count, and length. Note that this is /// a much stricter check than logical equality, which would take into /// account potentially different content of null slots, arrays with a /// non-zero offset, and other considerations. NANOARROW_COMPARE_IDENTICAL, }; /// \brief Get a string value of an enum ArrowTimeUnit value /// \ingroup nanoarrow-utils /// /// Returns NULL for invalid values for time_unit static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit); static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) { switch (time_unit) { case NANOARROW_TIME_UNIT_SECOND: return "s"; case NANOARROW_TIME_UNIT_MILLI: return "ms"; case NANOARROW_TIME_UNIT_MICRO: return "us"; case NANOARROW_TIME_UNIT_NANO: return "ns"; default: return NULL; } } /// \brief Functional types of buffers as described in the Arrow Columnar Specification /// \ingroup nanoarrow-array-view enum ArrowBufferType { NANOARROW_BUFFER_TYPE_NONE, NANOARROW_BUFFER_TYPE_VALIDITY, NANOARROW_BUFFER_TYPE_TYPE_ID, NANOARROW_BUFFER_TYPE_UNION_OFFSET, NANOARROW_BUFFER_TYPE_DATA_OFFSET, NANOARROW_BUFFER_TYPE_DATA, NANOARROW_BUFFER_TYPE_VARIADIC_DATA, NANOARROW_BUFFER_TYPE_VARIADIC_SIZE, NANOARROW_BUFFER_TYPE_VIEW_OFFSET, NANOARROW_BUFFER_TYPE_SIZE, }; /// \brief The maximum number of fixed buffers in an ArrowArrayView or ArrowLayout /// \ingroup nanoarrow-array-view #define NANOARROW_MAX_FIXED_BUFFERS 3 /// \brief An non-owning view of a string /// \ingroup nanoarrow-utils struct ArrowStringView { /// \brief A pointer to the start of the string /// /// If size_bytes is 0, this value may be NULL. const char* data; /// \brief The size of the string in bytes, /// /// (Not including the null terminator.) int64_t size_bytes; }; /// \brief Return a view of a const C string /// \ingroup nanoarrow-utils static inline struct ArrowStringView ArrowCharView(const char* value); static inline struct ArrowStringView ArrowCharView(const char* value) { struct ArrowStringView out; out.data = value; if (value) { out.size_bytes = (int64_t)strlen(value); } else { out.size_bytes = 0; } return out; } union ArrowBufferViewData { const void* data; const int8_t* as_int8; const uint8_t* as_uint8; const int16_t* as_int16; const uint16_t* as_uint16; const int32_t* as_int32; const uint32_t* as_uint32; const int64_t* as_int64; const uint64_t* as_uint64; const double* as_double; const float* as_float; const char* as_char; const union ArrowBinaryView* as_binary_view; }; /// \brief An non-owning view of a buffer /// \ingroup nanoarrow-utils struct ArrowBufferView { /// \brief A pointer to the start of the buffer /// /// If size_bytes is 0, this value may be NULL. union ArrowBufferViewData data; /// \brief The size of the buffer in bytes int64_t size_bytes; }; /// \brief Array buffer allocation and deallocation /// \ingroup nanoarrow-buffer /// /// Container for allocate, reallocate, and free methods that can be used /// to customize allocation and deallocation of buffers when constructing /// an ArrowArray. struct ArrowBufferAllocator { /// \brief Reallocate a buffer or return NULL if it cannot be reallocated uint8_t* (*reallocate)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size, int64_t new_size); /// \brief Deallocate a buffer allocated by this allocator void (*free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size); /// \brief Opaque data specific to the allocator void* private_data; }; typedef void (*ArrowBufferDeallocatorCallback)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size); /// \brief An owning mutable view of a buffer /// \ingroup nanoarrow-buffer struct ArrowBuffer { /// \brief A pointer to the start of the buffer /// /// If capacity_bytes is 0, this value may be NULL. uint8_t* data; /// \brief The size of the buffer in bytes int64_t size_bytes; /// \brief The capacity of the buffer in bytes int64_t capacity_bytes; /// \brief The allocator that will be used to reallocate and/or free the buffer struct ArrowBufferAllocator allocator; }; /// \brief An owning mutable view of a bitmap /// \ingroup nanoarrow-bitmap struct ArrowBitmap { /// \brief An ArrowBuffer to hold the allocated memory struct ArrowBuffer buffer; /// \brief The number of bits that have been appended to the bitmap int64_t size_bits; }; /// \brief A description of an arrangement of buffers /// \ingroup nanoarrow-utils /// /// Contains the minimum amount of information required to /// calculate the size of each buffer in an ArrowArray knowing only /// the length and offset of the array. struct ArrowLayout { /// \brief The function of each buffer enum ArrowBufferType buffer_type[NANOARROW_MAX_FIXED_BUFFERS]; /// \brief The data type of each buffer enum ArrowType buffer_data_type[NANOARROW_MAX_FIXED_BUFFERS]; /// \brief The size of an element each buffer or 0 if this size is variable or unknown int64_t element_size_bits[NANOARROW_MAX_FIXED_BUFFERS]; /// \brief The number of elements in the child array per element in this array for a /// fixed-size list int64_t child_size_elements; }; /// \brief A non-owning view of an ArrowArray /// \ingroup nanoarrow-array-view /// /// This data structure provides access to the values contained within /// an ArrowArray with fields provided in a more readily-extractible /// form. You can re-use an ArrowArrayView for multiple ArrowArrays /// with the same storage type, use it to represent a hypothetical /// ArrowArray that does not exist yet, or use it to validate the buffers /// of a future ArrowArray. struct ArrowArrayView { /// \brief The underlying ArrowArray or NULL if it has not been set or /// if the buffers in this ArrowArrayView are not backed by an ArrowArray. const struct ArrowArray* array; /// \brief The number of elements from the physical start of the buffers. int64_t offset; /// \brief The number of elements in this view. int64_t length; /// \brief A cached null count or -1 to indicate that this value is unknown. int64_t null_count; /// \brief The type used to store values in this array /// /// This type represents only the minimum required information to /// extract values from the array buffers (e.g., for a Date32 array, /// this value will be NANOARROW_TYPE_INT32). For dictionary-encoded /// arrays, this will be the index type. enum ArrowType storage_type; /// \brief The buffer types, strides, and sizes of this Array's buffers struct ArrowLayout layout; /// \brief This Array's buffers as ArrowBufferView objects struct ArrowBufferView buffer_views[NANOARROW_MAX_FIXED_BUFFERS]; /// \brief The number of children of this view int64_t n_children; /// \brief Pointers to views of this array's children struct ArrowArrayView** children; /// \brief Pointer to a view of this array's dictionary struct ArrowArrayView* dictionary; /// \brief Union type id to child index mapping /// /// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer /// such that child_index == union_type_id_map[type_id] and /// type_id == union_type_id_map[128 + child_index]. This value may be /// NULL in the case where child_id == type_id. int8_t* union_type_id_map; /// \brief Number of variadic buffers int32_t n_variadic_buffers; /// \brief Pointers to variadic buffers of binary/string_view arrays const void** variadic_buffers; /// \brief Size of each variadic buffer int64_t* variadic_buffer_sizes; }; // Used as the private data member for ArrowArrays allocated here and accessed // internally within inline ArrowArray* helpers. struct ArrowArrayPrivateData { // Holder for the validity buffer (or first buffer for union types, which are // the only type whose first buffer is not a valdiity buffer) struct ArrowBitmap bitmap; // Holder for additional buffers as required struct ArrowBuffer buffers[NANOARROW_MAX_FIXED_BUFFERS - 1]; // The array of pointers to buffers. This must be updated after a sequence // of appends to synchronize its values with the actual buffer addresses // (which may have been reallocated during that time) const void** buffer_data; // The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown enum ArrowType storage_type; // The buffer arrangement for the storage type struct ArrowLayout layout; // Flag to indicate if there are non-sequence union type ids. // In the future this could be replaced with a type id<->child mapping // to support constructing unions in append mode where type_id != child_index int8_t union_type_id_is_child_index; // Number of variadic buffers for binary view types int32_t n_variadic_buffers; // Variadic buffers for binary view types struct ArrowBuffer* variadic_buffers; // The current offset used to build list views int64_t list_view_offset; }; /// \brief A representation of an interval. /// \ingroup nanoarrow-utils struct ArrowInterval { /// \brief The type of interval being used enum ArrowType type; /// \brief The number of months represented by the interval int32_t months; /// \brief The number of days represented by the interval int32_t days; /// \brief The number of ms represented by the interval int32_t ms; /// \brief The number of ns represented by the interval int64_t ns; }; /// \brief Zero initialize an Interval with a given unit /// \ingroup nanoarrow-utils static inline void ArrowIntervalInit(struct ArrowInterval* interval, enum ArrowType type) { memset(interval, 0, sizeof(struct ArrowInterval)); interval->type = type; } /// \brief A representation of a fixed-precision decimal number /// \ingroup nanoarrow-utils /// /// This structure should be initialized with ArrowDecimalInit() once and /// values set using ArrowDecimalSetInt(), ArrowDecimalSetBytes128(), /// or ArrowDecimalSetBytes256(). struct ArrowDecimal { /// \brief An array of 64-bit integers of n_words length defined in native-endian order. /// For a 32-bit decimal value, index 0 will be a 32-bit integer value. uint64_t words[4]; /// \brief The number of significant digits this decimal number can represent int32_t precision; /// \brief The number of digits after the decimal point. This can be negative. int32_t scale; /// \brief The number of 64-bit words in the words array. For the special case of a /// 32-bit decimal value, this will be 0. int n_words; /// \brief Cached value used by the implementation int high_word_index; /// \brief Cached value used by the implementation int low_word_index; }; /// \brief Initialize a decimal with a given set of type parameters /// \ingroup nanoarrow-utils static inline void ArrowDecimalInit(struct ArrowDecimal* decimal, int32_t bitwidth, int32_t precision, int32_t scale) { memset(decimal->words, 0, sizeof(decimal->words)); decimal->precision = precision; decimal->scale = scale; // n_words will be 0 for bitwidth == 32 decimal->n_words = (int)(bitwidth / 8 / sizeof(uint64_t)); if (_ArrowIsLittleEndian()) { decimal->low_word_index = 0; decimal->high_word_index = decimal->n_words > 0 ? decimal->n_words - 1 : 0; } else { decimal->low_word_index = decimal->n_words > 0 ? decimal->n_words - 1 : 0; decimal->high_word_index = 0; } } /// \brief Get a signed integer value of a sufficiently small ArrowDecimal /// /// This does not check if the decimal's precision sufficiently small to fit /// within the signed 64-bit integer range (A precision less than or equal /// to 18 is sufficiently small). static inline int64_t ArrowDecimalGetIntUnsafe(const struct ArrowDecimal* decimal) { if (decimal->n_words == 0) { int32_t value; memcpy(&value, decimal->words, sizeof(int32_t)); return value; } return (int64_t)decimal->words[decimal->low_word_index]; } /// \brief Copy the bytes of this decimal into a sufficiently large buffer /// \ingroup nanoarrow-utils static inline void ArrowDecimalGetBytes(const struct ArrowDecimal* decimal, uint8_t* out) { if (decimal->n_words == 0) { memcpy(out, decimal->words, sizeof(int32_t)); } else { memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t)); } } /// \brief Returns 1 if the value represented by decimal is >= 0 or -1 otherwise /// \ingroup nanoarrow-utils static inline int64_t ArrowDecimalSign(const struct ArrowDecimal* decimal) { if (decimal->n_words == 0) { return ArrowDecimalGetIntUnsafe(decimal) >= 0 ? 1 : -1; } else { return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63); } } /// \brief Sets the integer value of this decimal /// \ingroup nanoarrow-utils static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t value) { if (decimal->n_words == 0) { int32_t value32 = (int32_t)value; memcpy(decimal->words, &value32, sizeof(int32_t)); return; } if (value < 0) { memset(decimal->words, 0xff, decimal->n_words * sizeof(uint64_t)); } else { memset(decimal->words, 0, decimal->n_words * sizeof(uint64_t)); } decimal->words[decimal->low_word_index] = value; } /// \brief Negate the value of this decimal in place /// \ingroup nanoarrow-utils static inline void ArrowDecimalNegate(struct ArrowDecimal* decimal) { if (decimal->n_words == 0) { int32_t value; memcpy(&value, decimal->words, sizeof(int32_t)); value = -value; memcpy(decimal->words, &value, sizeof(int32_t)); return; } uint64_t carry = 1; if (decimal->low_word_index == 0) { for (int i = 0; i < decimal->n_words; i++) { uint64_t elem = decimal->words[i]; elem = ~elem + carry; carry &= (elem == 0); decimal->words[i] = elem; } } else { for (int i = decimal->low_word_index; i >= 0; i--) { uint64_t elem = decimal->words[i]; elem = ~elem + carry; carry &= (elem == 0); decimal->words[i] = elem; } } } /// \brief Copy bytes from a buffer into this decimal /// \ingroup nanoarrow-utils static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal, const uint8_t* value) { if (decimal->n_words == 0) { memcpy(decimal->words, value, sizeof(int32_t)); } else { memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t)); } } #ifdef __cplusplus } #endif #endif // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef NANOARROW_H_INCLUDED #define NANOARROW_H_INCLUDED #include #include #include // If using CMake, optionally pass -DNANOARROW_NAMESPACE=MyNamespace which will set this // define in nanoarrow_config.h. If not, you can optionally #define NANOARROW_NAMESPACE // MyNamespace here. // This section remaps the non-prefixed symbols to the prefixed symbols so that // code written against this build can be used independent of the value of // NANOARROW_NAMESPACE. #ifdef NANOARROW_NAMESPACE #define NANOARROW_CAT(A, B) A##B #define NANOARROW_SYMBOL(A, B) NANOARROW_CAT(A, B) #define ArrowNanoarrowVersion NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersion) #define ArrowNanoarrowVersionInt \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersionInt) #define ArrowMalloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMalloc) #define ArrowRealloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowRealloc) #define ArrowFree NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowFree) #define ArrowBufferAllocatorDefault \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferAllocatorDefault) #define ArrowBufferDeallocator \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferDeallocator) #define ArrowErrorSet NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorSet) #define ArrowLayoutInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowLayoutInit) #define ArrowDecimalSetDigits NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalSetDigits) #define ArrowDecimalAppendDigitsToBuffer \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalAppendDigitsToBuffer) #define ArrowDecimalAppendStringToBuffer \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalAppendStringToBuffer) #define ArrowSchemaInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInit) #define ArrowSchemaInitFromType \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInitFromType) #define ArrowSchemaSetType NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetType) #define ArrowSchemaSetTypeStruct \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeStruct) #define ArrowSchemaSetTypeFixedSize \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeFixedSize) #define ArrowSchemaSetTypeDecimal \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDecimal) #define ArrowSchemaSetTypeRunEndEncoded \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeRunEndEncoded) #define ArrowSchemaSetTypeDateTime \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDateTime) #define ArrowSchemaSetTypeUnion \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeUnion) #define ArrowSchemaDeepCopy NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaDeepCopy) #define ArrowSchemaSetFormat NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetFormat) #define ArrowSchemaSetName NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetName) #define ArrowSchemaSetMetadata \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetMetadata) #define ArrowSchemaAllocateChildren \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateChildren) #define ArrowSchemaAllocateDictionary \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateDictionary) #define ArrowMetadataReaderInit \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderInit) #define ArrowMetadataReaderRead \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderRead) #define ArrowMetadataSizeOf NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataSizeOf) #define ArrowMetadataHasKey NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataHasKey) #define ArrowMetadataGetValue NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataGetValue) #define ArrowMetadataBuilderInit \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderInit) #define ArrowMetadataBuilderAppend \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderAppend) #define ArrowMetadataBuilderSet \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderSet) #define ArrowMetadataBuilderRemove \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderRemove) #define ArrowSchemaViewInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaViewInit) #define ArrowSchemaToString NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaToString) #define ArrowArrayInitFromType \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromType) #define ArrowArrayInitFromSchema \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromSchema) #define ArrowArrayInitFromArrayView \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView) #define ArrowArrayInitFromArrayView \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView) #define ArrowArrayAllocateChildren \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateChildren) #define ArrowArrayAllocateDictionary \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateDictionary) #define ArrowArraySetValidityBitmap \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetValidityBitmap) #define ArrowArraySetBuffer NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetBuffer) #define ArrowArrayReserve NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayReserve) #define ArrowArrayFinishBuilding \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuilding) #define ArrowArrayFinishBuildingDefault \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuildingDefault) #define ArrowArrayViewInitFromType \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromType) #define ArrowArrayViewInitFromSchema \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromSchema) #define ArrowArrayViewAllocateChildren \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateChildren) #define ArrowArrayViewAllocateDictionary \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateDictionary) #define ArrowArrayViewSetLength \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength) #define ArrowArrayViewSetArray \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray) #define ArrowArrayViewSetArrayMinimal \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArrayMinimal) #define ArrowArrayViewValidate \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidate) #define ArrowArrayViewCompare NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewCompare) #define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewReset) #define ArrowBasicArrayStreamInit \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit) #define ArrowBasicArrayStreamSetArray \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamSetArray) #define ArrowBasicArrayStreamValidate \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamValidate) #endif #if (defined _WIN32 || defined __CYGWIN__) && defined(NANOARROW_BUILD_DLL) #if defined(NANOARROW_EXPORT_DLL) #define NANOARROW_DLL __declspec(dllexport) #else #define NANOARROW_DLL __declspec(dllimport) #endif // defined(NANOARROW_EXPORT_DLL) #elif !defined(NANOARROW_DLL) #if defined(__GNUC__) && __GNUC__ >= 4 #define NANOARROW_DLL __attribute__((visibility("default"))) #else #define NANOARROW_DLL #endif // __GNUC__ >= 4 #endif #ifdef __cplusplus extern "C" { #endif /// \defgroup nanoarrow Nanoarrow C library /// /// Except where noted, objects are not thread-safe and clients should /// take care to serialize accesses to methods. /// /// Because this library is intended to be vendored, it provides full type /// definitions and encourages clients to stack or statically allocate /// where convenient. /// \defgroup nanoarrow-malloc Memory management /// /// Non-buffer members of a struct ArrowSchema and struct ArrowArray /// must be allocated using ArrowMalloc() or ArrowRealloc() and freed /// using ArrowFree() for schemas and arrays allocated here. Buffer members /// are allocated using an ArrowBufferAllocator. /// /// @{ /// \brief Allocate like malloc() NANOARROW_DLL void* ArrowMalloc(int64_t size); /// \brief Reallocate like realloc() NANOARROW_DLL void* ArrowRealloc(void* ptr, int64_t size); /// \brief Free a pointer allocated using ArrowMalloc() or ArrowRealloc(). NANOARROW_DLL void ArrowFree(void* ptr); /// \brief Return the default allocator /// /// The default allocator uses ArrowMalloc(), ArrowRealloc(), and /// ArrowFree(). NANOARROW_DLL struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void); /// \brief Create a custom deallocator /// /// Creates a buffer allocator with only a free method that can be used to /// attach a custom deallocator to an ArrowBuffer. This may be used to /// avoid copying an existing buffer that was not allocated using the /// infrastructure provided here (e.g., by an R or Python object). NANOARROW_DLL struct ArrowBufferAllocator ArrowBufferDeallocator( ArrowBufferDeallocatorCallback, void* private_data); /// @} /// \brief Move the contents of an src ArrowSchema into dst and set src->release to NULL /// \ingroup nanoarrow-arrow-cdata static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst); /// \brief Call the release callback of an ArrowSchema /// \ingroup nanoarrow-arrow-cdata static inline void ArrowSchemaRelease(struct ArrowSchema* schema); /// \brief Move the contents of an src ArrowArray into dst and set src->release to NULL /// \ingroup nanoarrow-arrow-cdata static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst); /// \brief Call the release callback of an ArrowArray static inline void ArrowArrayRelease(struct ArrowArray* array); /// \brief Move the contents of an src ArrowArrayStream into dst and set src->release to /// NULL \ingroup nanoarrow-arrow-cdata static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, struct ArrowArrayStream* dst); /// \brief Call the get_schema callback of an ArrowArrayStream /// \ingroup nanoarrow-arrow-cdata /// /// Unlike the get_schema callback, this wrapper checks the return code /// and propagates the error reported by get_last_error into error. This /// makes it significantly less verbose to iterate over array streams /// using NANOARROW_RETURN_NOT_OK()-style error handling. static inline ArrowErrorCode ArrowArrayStreamGetSchema( struct ArrowArrayStream* array_stream, struct ArrowSchema* out, struct ArrowError* error); /// \brief Call the get_next callback of an ArrowArrayStream /// \ingroup nanoarrow-arrow-cdata /// /// Unlike the get_next callback, this wrapper checks the return code /// and propagates the error reported by get_last_error into error. This /// makes it significantly less verbose to iterate over array streams /// using NANOARROW_RETURN_NOT_OK()-style error handling. static inline ArrowErrorCode ArrowArrayStreamGetNext( struct ArrowArrayStream* array_stream, struct ArrowArray* out, struct ArrowError* error); /// \brief Call the get_last_error callback of an ArrowArrayStream /// \ingroup nanoarrow-arrow-cdata /// /// Unlike the get_last_error callback, this function never returns NULL (i.e., /// its result is safe to use in printf-style error formatters). Null values /// from the original callback are reported as /// "". static inline const char* ArrowArrayStreamGetLastError( struct ArrowArrayStream* array_stream); /// \brief Call the release callback of an ArrowArrayStream static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* array_stream); /// \defgroup nanoarrow-errors Error handling /// /// Functions generally return an errno-compatible error code; functions that /// need to communicate more verbose error information accept a pointer /// to an ArrowError. This can be stack or statically allocated. The /// content of the message is undefined unless an error code has been /// returned. If a nanoarrow function is passed a non-null ArrowError pointer, the /// ArrowError pointed to by the argument will be propagated with a /// null-terminated error message. It is safe to pass a NULL ArrowError anywhere /// in the nanoarrow API. /// /// Except where documented, it is generally not safe to continue after a /// function has returned a non-zero ArrowErrorCode. The NANOARROW_RETURN_NOT_OK and /// NANOARROW_ASSERT_OK macros are provided to help propagate errors. C++ clients can use /// the helpers provided in the nanoarrow.hpp header to facilitate using C++ idioms /// for memory management and error propgagtion. /// /// @{ /// \brief Set the contents of an error using printf syntax. /// /// If error is NULL, this function does nothing and returns NANOARROW_OK. NANOARROW_DLL NANOARROW_CHECK_PRINTF_ATTRIBUTE int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...); /// @} /// \defgroup nanoarrow-utils Utility data structures /// /// @{ /// \brief Return a version string in the form "major.minor.patch" NANOARROW_DLL const char* ArrowNanoarrowVersion(void); /// \brief Return an integer that can be used to compare versions sequentially NANOARROW_DLL int ArrowNanoarrowVersionInt(void); /// \brief Initialize a description of buffer arrangements from a storage type NANOARROW_DLL void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type); /// \brief Create a string view from a null-terminated string static inline struct ArrowStringView ArrowCharView(const char* value); /// \brief Sets the integer value of an ArrowDecimal from a string NANOARROW_DLL ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, struct ArrowStringView value); /// \brief Get the integer value of an ArrowDecimal as string NANOARROW_DLL ArrowErrorCode ArrowDecimalAppendDigitsToBuffer( const struct ArrowDecimal* decimal, struct ArrowBuffer* buffer); /// \brief Get the decimal value of an ArrowDecimal as a string NANOARROW_DLL ArrowErrorCode ArrowDecimalAppendStringToBuffer( const struct ArrowDecimal* decimal, struct ArrowBuffer* buffer); /// \brief Get the half float value of a float static inline uint16_t ArrowFloatToHalfFloat(float value); /// \brief Get the float value of a half float static inline float ArrowHalfFloatToFloat(uint16_t value); /// \brief Resolve a chunk index from increasing int64_t offsets /// /// Given a buffer of increasing int64_t offsets that begin with 0 (e.g., offset buffer /// of a large type, run ends of a chunked array implementation), resolve a value v /// where lo <= v < hi such that offsets[v] <= index < offsets[v + 1]. static inline int64_t ArrowResolveChunk64(int64_t index, const int64_t* offsets, int64_t lo, int64_t hi); /// @} /// \defgroup nanoarrow-schema Creating schemas /// /// These functions allocate, copy, and destroy ArrowSchema structures /// /// @{ /// \brief Initialize an ArrowSchema /// /// Initializes the fields and release callback of schema_out. Caller /// is responsible for calling the schema->release callback if /// NANOARROW_OK is returned. NANOARROW_DLL void ArrowSchemaInit(struct ArrowSchema* schema); /// \brief Initialize an ArrowSchema from an ArrowType /// /// A convenience constructor for that calls ArrowSchemaInit() and /// ArrowSchemaSetType() for the common case of constructing an /// unparameterized type. The caller is responsible for calling the schema->release /// callback if NANOARROW_OK is returned. NANOARROW_DLL ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowType type); /// \brief Get a human-readable summary of a Schema /// /// Writes a summary of an ArrowSchema to out (up to n - 1 characters) /// and returns the number of characters required for the output if /// n were sufficiently large. If recursive is non-zero, the result will /// also include children. NANOARROW_DLL int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t n, char recursive); /// \brief Set the format field of a schema from an ArrowType /// /// Initializes the fields and release callback of schema_out. For /// NANOARROW_TYPE_LIST, NANOARROW_TYPE_LARGE_LIST, and /// NANOARROW_TYPE_MAP, the appropriate number of children are /// allocated, initialized, and named; however, the caller must /// ArrowSchemaSetType() on the preinitialized children. Schema must have been initialized /// using ArrowSchemaInit() or ArrowSchemaDeepCopy(). NANOARROW_DLL ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, enum ArrowType type); /// \brief Set the format field and initialize children of a struct schema /// /// The specified number of children are initialized; however, the caller is responsible /// for calling ArrowSchemaSetType() and ArrowSchemaSetName() on each child. /// Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). NANOARROW_DLL ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, int64_t n_children); /// \brief Set the format field of a fixed-size schema /// /// Returns EINVAL for fixed_size <= 0 or for type that is not /// NANOARROW_TYPE_FIXED_SIZE_BINARY or NANOARROW_TYPE_FIXED_SIZE_LIST. /// For NANOARROW_TYPE_FIXED_SIZE_LIST, the appropriate number of children are /// allocated, initialized, and named; however, the caller must /// ArrowSchemaSetType() the first child. Schema must have been initialized using /// ArrowSchemaInit() or ArrowSchemaDeepCopy(). NANOARROW_DLL ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema, enum ArrowType type, int32_t fixed_size); /// \brief Set the format field of a decimal schema /// /// Returns EINVAL for scale <= 0 or for type that is not /// NANOARROW_TYPE_DECIMAL32, NANOARROW_TYPE_DECIMAL64, NANOARROW_TYPE_DECIMAL128 or /// NANOARROW_TYPE_DECIMAL256. Schema must have been initialized using /// ArrowSchemaInit() or ArrowSchemaDeepCopy(). NANOARROW_DLL ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type, int32_t decimal_precision, int32_t decimal_scale); /// \brief Set the format field of a run-end encoded schema /// /// Returns EINVAL for run_end_type that is not /// NANOARROW_TYPE_INT16, NANOARROW_TYPE_INT32 or NANOARROW_TYPE_INT64. /// Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). /// The caller must call `ArrowSchemaSetTypeXXX(schema->children[1])` to /// set the value type. Note that when building arrays using the `ArrowArrayAppendXXX()` /// functions, the run-end encoded array's logical length must be updated manually. NANOARROW_DLL ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema, enum ArrowType run_end_type); /// \brief Set the format field of a time, timestamp, or duration schema /// /// Returns EINVAL for type that is not /// NANOARROW_TYPE_TIME32, NANOARROW_TYPE_TIME64, /// NANOARROW_TYPE_TIMESTAMP, or NANOARROW_TYPE_DURATION. The /// timezone parameter must be NULL for a non-timestamp type. Schema must have been /// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). NANOARROW_DLL ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, enum ArrowType type, enum ArrowTimeUnit time_unit, const char* timezone); /// \brief Set the format field of a union schema /// /// Returns EINVAL for a type that is not NANOARROW_TYPE_DENSE_UNION /// or NANOARROW_TYPE_SPARSE_UNION. The specified number of children are /// allocated, and initialized. NANOARROW_DLL ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowType type, int64_t n_children); /// \brief Make a (recursive) copy of a schema /// /// Allocates and copies fields of schema into schema_out. NANOARROW_DLL ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema, struct ArrowSchema* schema_out); /// \brief Copy format into schema->format /// /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy(). NANOARROW_DLL ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format); /// \brief Copy name into schema->name /// /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy(). NANOARROW_DLL ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name); /// \brief Copy metadata into schema->metadata /// /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy. NANOARROW_DLL ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata); /// \brief Allocate the schema->children array /// /// Includes the memory for each child struct ArrowSchema. /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy(). NANOARROW_DLL ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema, int64_t n_children); /// \brief Allocate the schema->dictionary member /// /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy(). NANOARROW_DLL ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema); /// @} /// \defgroup nanoarrow-metadata Create, read, and modify schema metadata /// /// @{ /// \brief Reader for key/value pairs in schema metadata /// /// The ArrowMetadataReader does not own any data and is only valid /// for the lifetime of the underlying metadata pointer. struct ArrowMetadataReader { /// \brief A metadata string from a schema->metadata field. const char* metadata; /// \brief The current offset into the metadata string int64_t offset; /// \brief The number of remaining keys int32_t remaining_keys; }; /// \brief Initialize an ArrowMetadataReader NANOARROW_DLL ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader, const char* metadata); /// \brief Read the next key/value pair from an ArrowMetadataReader NANOARROW_DLL ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader, struct ArrowStringView* key_out, struct ArrowStringView* value_out); /// \brief The number of bytes in in a key/value metadata string NANOARROW_DLL int64_t ArrowMetadataSizeOf(const char* metadata); /// \brief Check for a key in schema metadata NANOARROW_DLL char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key); /// \brief Extract a value from schema metadata /// /// If key does not exist in metadata, value_out is unmodified NANOARROW_DLL ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key, struct ArrowStringView* value_out); /// \brief Initialize a builder for schema metadata from key/value pairs /// /// metadata can be an existing metadata string or NULL to initialize /// an empty metadata string. NANOARROW_DLL ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer, const char* metadata); /// \brief Append a key/value pair to a buffer containing serialized metadata NANOARROW_DLL ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer, struct ArrowStringView key, struct ArrowStringView value); /// \brief Set a key/value pair to a buffer containing serialized metadata /// /// Ensures that the only entry for key in the metadata is set to value. /// This function maintains the existing position of (the first instance of) /// key if present in the data. NANOARROW_DLL ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer, struct ArrowStringView key, struct ArrowStringView value); /// \brief Remove a key from a buffer containing serialized metadata NANOARROW_DLL ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, struct ArrowStringView key); /// @} /// \defgroup nanoarrow-schema-view Reading schemas /// /// @{ /// \brief A non-owning view of a parsed ArrowSchema /// /// Contains more readily extractable values than a raw ArrowSchema. /// Clients can stack or statically allocate this structure but are /// encouraged to use the provided getters to ensure forward /// compatibility. struct ArrowSchemaView { /// \brief A pointer to the schema represented by this view const struct ArrowSchema* schema; /// \brief The data type represented by the schema /// /// This value may be NANOARROW_TYPE_DICTIONARY if the schema has a /// non-null dictionary member; datetime types are valid values. /// This value will never be NANOARROW_TYPE_EXTENSION (see /// extension_name and/or extension_metadata to check for /// an extension type). enum ArrowType type; /// \brief The storage data type represented by the schema /// /// This value will never be NANOARROW_TYPE_DICTIONARY, NANOARROW_TYPE_EXTENSION /// or any datetime type. This value represents only the type required to /// interpret the buffers in the array. enum ArrowType storage_type; /// \brief The storage layout represented by the schema struct ArrowLayout layout; /// \brief The extension type name if it exists /// /// If the ARROW:extension:name key is present in schema.metadata, /// extension_name.data will be non-NULL. struct ArrowStringView extension_name; /// \brief The extension type metadata if it exists /// /// If the ARROW:extension:metadata key is present in schema.metadata, /// extension_metadata.data will be non-NULL. struct ArrowStringView extension_metadata; /// \brief Format fixed size parameter /// /// This value is set when parsing a fixed-size binary or fixed-size /// list schema; this value is undefined for other types. For a /// fixed-size binary schema this value is in bytes; for a fixed-size /// list schema this value refers to the number of child elements for /// each element of the parent. int32_t fixed_size; /// \brief Decimal bitwidth /// /// This value is set when parsing a decimal type schema; /// this value is undefined for other types. int32_t decimal_bitwidth; /// \brief Decimal precision /// /// This value is set when parsing a decimal type schema; /// this value is undefined for other types. int32_t decimal_precision; /// \brief Decimal scale /// /// This value is set when parsing a decimal type schema; /// this value is undefined for other types. int32_t decimal_scale; /// \brief Format time unit parameter /// /// This value is set when parsing a date/time type. The value is /// undefined for other types. enum ArrowTimeUnit time_unit; /// \brief Format timezone parameter /// /// This value is set when parsing a timestamp type and represents /// the timezone format parameter. This value points to /// data within the schema and is undefined for other types. const char* timezone; /// \brief Union type ids parameter /// /// This value is set when parsing a union type and represents /// type ids parameter. This value points to /// data within the schema and is undefined for other types. const char* union_type_ids; }; /// \brief Initialize an ArrowSchemaView NANOARROW_DLL ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, const struct ArrowSchema* schema, struct ArrowError* error); /// @} /// \defgroup nanoarrow-buffer Owning, growable buffers /// /// @{ /// \brief Initialize an ArrowBuffer /// /// Initialize a buffer with a NULL, zero-size buffer using the default /// buffer allocator. static inline void ArrowBufferInit(struct ArrowBuffer* buffer); /// \brief Set a newly-initialized buffer's allocator /// /// Returns EINVAL if the buffer has already been allocated. static inline ArrowErrorCode ArrowBufferSetAllocator( struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator); /// \brief Reset an ArrowBuffer /// /// Releases the buffer using the allocator's free method if /// the buffer's data member is non-null, sets the data member /// to NULL, and sets the buffer's size and capacity to 0. static inline void ArrowBufferReset(struct ArrowBuffer* buffer); /// \brief Move an ArrowBuffer /// /// Transfers the buffer data and lifecycle management to another /// address and resets buffer. static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst); /// \brief Grow or shrink a buffer to a given size /// /// When shrinking the size of the buffer, the buffer is only reallocated /// if shrink_to_fit is non-zero. static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer, int64_t new_size_bytes, char shrink_to_fit); /// \brief Ensure a buffer has at least a given additional capacity /// /// Ensures that the buffer has space to append at least /// additional_size_bytes, overallocating when required. static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer, int64_t additional_size_bytes); /// \brief Write data to buffer and increment the buffer size /// /// This function does not check that buffer has the required capacity static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data, int64_t size_bytes); /// \brief Write data to buffer and increment the buffer size /// /// This function writes and ensures that the buffer has the required capacity, /// possibly by reallocating the buffer. Like ArrowBufferReserve, this will /// overallocate when reallocation is required. static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer, const void* data, int64_t size_bytes); /// \brief Write fill to buffer and increment the buffer size /// /// This function writes the specified number of fill bytes and /// ensures that the buffer has the required capacity, static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer, uint8_t value, int64_t size_bytes); /// \brief Write an 8-bit integer to a buffer static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer, int8_t value); /// \brief Write an unsigned 8-bit integer to a buffer static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer, uint8_t value); /// \brief Write a 16-bit integer to a buffer static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer, int16_t value); /// \brief Write an unsigned 16-bit integer to a buffer static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer, uint16_t value); /// \brief Write a 32-bit integer to a buffer static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer, int32_t value); /// \brief Write an unsigned 32-bit integer to a buffer static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer, uint32_t value); /// \brief Write a 64-bit integer to a buffer static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer, int64_t value); /// \brief Write an unsigned 64-bit integer to a buffer static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer, uint64_t value); /// \brief Write a double to a buffer static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer, double value); /// \brief Write a float to a buffer static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer, float value); /// \brief Write an ArrowStringView to a buffer static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer* buffer, struct ArrowStringView value); /// \brief Write an ArrowBufferView to a buffer static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer* buffer, struct ArrowBufferView value); /// @} /// \defgroup nanoarrow-bitmap Bitmap utilities /// /// @{ /// \brief Extract a boolean value from a bitmap static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i); /// \brief Set a boolean value to a bitmap to true static inline void ArrowBitSet(uint8_t* bits, int64_t i); /// \brief Set a boolean value to a bitmap to false static inline void ArrowBitClear(uint8_t* bits, int64_t i); /// \brief Set a boolean value to a bitmap static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t value); /// \brief Set a boolean value to a range in a bitmap static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length, uint8_t bits_are_set); /// \brief Count true values in a bitmap static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to); /// \brief Extract int8 boolean values from a range in a bitmap static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset, int64_t length, int8_t* out); /// \brief Extract int32 boolean values from a range in a bitmap static inline void ArrowBitsUnpackInt32(const uint8_t* bits, int64_t start_offset, int64_t length, int32_t* out); /// \brief Initialize an ArrowBitmap /// /// Initialize the builder's buffer, empty its cache, and reset the size to zero static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap); /// \brief Move an ArrowBitmap /// /// Transfers the underlying buffer data and lifecycle management to another /// address and resets the bitmap. static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap* dst); /// \brief Ensure a bitmap builder has at least a given additional capacity /// /// Ensures that the buffer has space to append at least /// additional_size_bits, overallocating when required. static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap, int64_t additional_size_bits); /// \brief Grow or shrink a bitmap to a given size /// /// When shrinking the size of the bitmap, the bitmap is only reallocated /// if shrink_to_fit is non-zero. static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap, int64_t new_size_bits, char shrink_to_fit); /// \brief Reserve space for and append zero or more of the same boolean value to a bitmap static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length); /// \brief Append zero or more of the same boolean value to a bitmap static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length); /// \brief Append boolean values encoded as int8_t to a bitmap /// /// The values must all be 0 or 1. static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap, const int8_t* values, int64_t n_values); /// \brief Append boolean values encoded as int32_t to a bitmap /// /// The values must all be 0 or 1. static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap, const int32_t* values, int64_t n_values); /// \brief Reset a bitmap builder /// /// Releases any memory held by buffer, empties the cache, and resets the size to zero static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap); /// @} /// \defgroup nanoarrow-array Creating arrays /// /// These functions allocate, copy, and destroy ArrowArray structures. /// Once an ArrowArray has been initialized via ArrowArrayInitFromType() /// or ArrowArrayInitFromSchema(), the caller is responsible for releasing /// it using the embedded release callback. /// /// @{ /// \brief Initialize the fields of an array /// /// Initializes the fields and release callback of array. Caller /// is responsible for calling the array->release callback if /// NANOARROW_OK is returned. NANOARROW_DLL ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, enum ArrowType storage_type); /// \brief Initialize the contents of an ArrowArray from an ArrowSchema /// /// Caller is responsible for calling the array->release callback if /// NANOARROW_OK is returned. NANOARROW_DLL ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array, const struct ArrowSchema* schema, struct ArrowError* error); /// \brief Initialize the contents of an ArrowArray from an ArrowArrayView /// /// Caller is responsible for calling the array->release callback if /// NANOARROW_OK is returned. NANOARROW_DLL ArrowErrorCode ArrowArrayInitFromArrayView( struct ArrowArray* array, const struct ArrowArrayView* array_view, struct ArrowError* error); /// \brief Allocate the array->children array /// /// Includes the memory for each child struct ArrowArray, /// whose members are marked as released and may be subsequently initialized /// with ArrowArrayInitFromType() or moved from an existing ArrowArray. /// schema must have been allocated using ArrowArrayInitFromType(). NANOARROW_DLL ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children); /// \brief Allocate the array->dictionary member /// /// Includes the memory for the struct ArrowArray, whose contents /// is marked as released and may be subsequently initialized /// with ArrowArrayInitFromType() or moved from an existing ArrowArray. /// array must have been allocated using ArrowArrayInitFromType() NANOARROW_DLL ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array); /// \brief Set the validity bitmap of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() NANOARROW_DLL void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap); /// \brief Set a buffer of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() NANOARROW_DLL ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i, struct ArrowBuffer* buffer); /// \brief Add variadic buffers to a string or binary view array /// /// array must have been allocated using ArrowArrayInitFromType() static inline ArrowErrorCode ArrowArrayAddVariadicBuffers(struct ArrowArray* array, int32_t n_buffers); /// \brief Get the validity bitmap of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array); /// \brief Get a buffer of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i); /// \brief Start element-wise appending to an ArrowArray /// /// Initializes any values needed to use ArrowArrayAppend*() functions. /// All element-wise appenders append by value and return EINVAL if the exact value /// cannot be represented by the underlying storage type. /// array must have been allocated using ArrowArrayInitFromType() static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array); /// \brief Reserve space for future appends /// /// For buffer sizes that can be calculated (i.e., not string data buffers or /// child array sizes for non-fixed-size arrays), recursively reserve space for /// additional elements. This is useful for reducing the number of reallocations /// that occur using the item-wise appenders. NANOARROW_DLL ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array, int64_t additional_size_elements); /// \brief Append a null value to an array static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n); /// \brief Append an empty, non-null value to an array static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray* array, int64_t n); /// \brief Append a signed integer value to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type or EINVAL otherwise (e.g., value /// is outside the valid array range). static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array, int64_t value); /// \brief Append an unsigned integer value to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type or EINVAL otherwise (e.g., value /// is outside the valid array range). static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array, uint64_t value); /// \brief Append a double value to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type or EINVAL otherwise (e.g., value /// is outside the valid array range or there is an attempt to append /// a non-integer to an array with an integer storage type). static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array, double value); /// \brief Append a string of bytes to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type, EOVERFLOW if appending value would overflow /// the offset type (e.g., if the data buffer would be larger than 2 GB for a /// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a /// binary, string, large binary, large string, or fixed-size binary array, or value is /// the wrong size for a fixed-size binary array). static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array, struct ArrowBufferView value); /// \brief Append a string value to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type, EOVERFLOW if appending value would overflow /// the offset type (e.g., if the data buffer would be larger than 2 GB for a /// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a /// string or large string array). static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array, struct ArrowStringView value); /// \brief Append a Interval to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type or EINVAL otherwise. static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array, const struct ArrowInterval* value); /// \brief Append a decimal value to an array /// /// Returns NANOARROW_OK if array is a decimal array with the appropriate /// bitwidth or EINVAL otherwise. static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, const struct ArrowDecimal* value); /// \brief Finish a nested array element /// /// Appends a non-null element to the array based on the first child's current /// length. Returns NANOARROW_OK if the item was successfully added, EOVERFLOW /// if the child of a list or map array would exceed INT_MAX elements, or EINVAL /// if the underlying storage type is not a struct, list, large list, or fixed-size /// list, or if there was an attempt to add a struct or fixed-size list element where the /// length of the child array(s) did not match the expected length. static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array); /// \brief Finish a union array element /// /// Appends an element to the union type ids buffer and increments array->length. /// For sparse unions, up to one element is added to non type-id children. Returns /// EINVAL if the underlying storage type is not a union, if type_id is not valid, /// or if child sizes after appending are inconsistent. static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* array, int8_t type_id); /// \brief Shrink buffer capacity to the size required /// /// Also applies shrinking to any child arrays. array must have been allocated using /// ArrowArrayInitFromType static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array); /// \brief Finish building an ArrowArray /// /// Flushes any pointers from internal buffers that may have been reallocated /// into array->buffers and checks the actual size of the buffers /// against the expected size based on the final length. /// array must have been allocated using ArrowArrayInitFromType() NANOARROW_DLL ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array, struct ArrowError* error); /// \brief Finish building an ArrowArray with explicit validation /// /// Finish building with an explicit validation level. This could perform less validation /// (i.e. NANOARROW_VALIDATION_LEVEL_NONE or NANOARROW_VALIDATION_LEVEL_MINIMAL) if CPU /// buffer data access is not possible or more validation (i.e., /// NANOARROW_VALIDATION_LEVEL_FULL) if buffer content was obtained from an untrusted or /// corruptible source. NANOARROW_DLL ArrowErrorCode ArrowArrayFinishBuilding( struct ArrowArray* array, enum ArrowValidationLevel validation_level, struct ArrowError* error); /// @} /// \defgroup nanoarrow-array-view Reading arrays /// /// These functions read and validate the contents ArrowArray structures. /// /// @{ /// \brief Initialize the contents of an ArrowArrayView NANOARROW_DLL void ArrowArrayViewInitFromType(struct ArrowArrayView* array_view, enum ArrowType storage_type); /// \brief Move an ArrowArrayView /// /// Transfers the ArrowArrayView data and lifecycle management to another /// address and resets the contents of src. static inline void ArrowArrayViewMove(struct ArrowArrayView* src, struct ArrowArrayView* dst); /// \brief Initialize the contents of an ArrowArrayView from an ArrowSchema NANOARROW_DLL ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view, const struct ArrowSchema* schema, struct ArrowError* error); /// \brief Allocate the array_view->children array /// /// Includes the memory for each child struct ArrowArrayView NANOARROW_DLL ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view, int64_t n_children); /// \brief Allocate array_view->dictionary NANOARROW_DLL ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view); /// \brief Set data-independent buffer sizes from length NANOARROW_DLL void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length); /// \brief Set buffer sizes and data pointers from an ArrowArray NANOARROW_DLL ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view, const struct ArrowArray* array, struct ArrowError* error); /// \brief Set buffer sizes and data pointers from an ArrowArray except for those /// that require dereferencing buffer content. NANOARROW_DLL ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view, const struct ArrowArray* array, struct ArrowError* error); /// \brief Get the number of buffers /// /// The number of buffers referred to by this ArrowArrayView. In may cases this can also /// be calculated from the ArrowLayout member of the ArrowArrayView or ArrowSchemaView; /// however, for binary view and string view types, the number of total buffers depends on /// the number of variadic buffers. static inline int64_t ArrowArrayViewGetNumBuffers(struct ArrowArrayView* array_view); /// \brief Get a view of a specific buffer from an ArrowArrayView /// /// This is the ArrowArrayView equivalent of ArrowArray::buffers[i] that includes /// size information (if known). static inline struct ArrowBufferView ArrowArrayViewGetBufferView( struct ArrowArrayView* array_view, int64_t i); /// \brief Get the function of a specific buffer in an ArrowArrayView /// /// In may cases this can also be obtained from the ArrowLayout member of the /// ArrowArrayView or ArrowSchemaView; however, for binary view and string view types, /// the function of each buffer may be different between two arrays of the same type /// depending on the number of variadic buffers. static inline enum ArrowBufferType ArrowArrayViewGetBufferType( struct ArrowArrayView* array_view, int64_t i); /// \brief Get the data type of a specific buffer in an ArrowArrayView /// /// In may cases this can also be obtained from the ArrowLayout member of the /// ArrowArrayView or ArrowSchemaView; however, for binary view and string view types, /// the data type of each buffer may be different between two arrays of the same type /// depending on the number of variadic buffers. static inline enum ArrowType ArrowArrayViewGetBufferDataType( struct ArrowArrayView* array_view, int64_t i); /// \brief Get the element size (in bits) of a specific buffer in an ArrowArrayView /// /// In may cases this can also be obtained from the ArrowLayout member of the /// ArrowArrayView or ArrowSchemaView; however, for binary view and string view types, /// the element width of each buffer may be different between two arrays of the same type /// depending on the number of variadic buffers. static inline int64_t ArrowArrayViewGetBufferElementSizeBits( struct ArrowArrayView* array_view, int64_t i); /// \brief Performs checks on the content of an ArrowArrayView /// /// If using ArrowArrayViewSetArray() to back array_view with an ArrowArray, /// the buffer sizes and some content (fist and last offset) have already /// been validated at the "default" level. If setting the buffer pointers /// and sizes otherwise, you may wish to perform checks at a different level. See /// documentation for ArrowValidationLevel for the details of checks performed /// at each level. NANOARROW_DLL ArrowErrorCode ArrowArrayViewValidate( struct ArrowArrayView* array_view, enum ArrowValidationLevel validation_level, struct ArrowError* error); /// \brief Compare two ArrowArrayView objects for equality /// /// Given two ArrowArrayView instances, place either 0 (not equal) and /// 1 (equal) at the address pointed to by out. If the comparison determines /// that actual and expected are not equal, a reason will be communicated via /// error if error is non-NULL. /// /// Returns NANOARROW_OK if the comparison completed successfully. NANOARROW_DLL ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual, const struct ArrowArrayView* expected, enum ArrowCompareLevel level, int* out, struct ArrowError* reason); /// \brief Reset the contents of an ArrowArrayView and frees resources NANOARROW_DLL void ArrowArrayViewReset(struct ArrowArrayView* array_view); /// \brief Check for a null element in an ArrowArrayView static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView* array_view, int64_t i); /// \brief Compute null count for an ArrowArrayView static inline int64_t ArrowArrayViewComputeNullCount( const struct ArrowArrayView* array_view); /// \brief Get the type id of a union array element static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView* array_view, int64_t i); /// \brief Get the child index of a union array element static inline int8_t ArrowArrayViewUnionChildIndex( const struct ArrowArrayView* array_view, int64_t i); /// \brief Get the index to use into the relevant union child array static inline int64_t ArrowArrayViewUnionChildOffset( const struct ArrowArrayView* array_view, int64_t i); /// \brief Get an element in an ArrowArrayView as an integer /// /// This function does not check for null values, that values are actually integers, or /// that values are within a valid range for an int64. static inline int64_t ArrowArrayViewGetIntUnsafe(const struct ArrowArrayView* array_view, int64_t i); /// \brief Get an element in an ArrowArrayView as an unsigned integer /// /// This function does not check for null values, that values are actually integers, or /// that values are within a valid range for a uint64. static inline uint64_t ArrowArrayViewGetUIntUnsafe( const struct ArrowArrayView* array_view, int64_t i); /// \brief Get an element in an ArrowArrayView as a double /// /// This function does not check for null values, or /// that values are within a valid range for a double. static inline double ArrowArrayViewGetDoubleUnsafe( const struct ArrowArrayView* array_view, int64_t i); /// \brief Get an element in an ArrowArrayView as an ArrowStringView /// /// This function does not check for null values. static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe( const struct ArrowArrayView* array_view, int64_t i); /// \brief Get an element in an ArrowArrayView as an ArrowBufferView /// /// This function does not check for null values. static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( const struct ArrowArrayView* array_view, int64_t i); /// \brief Get an element in an ArrowArrayView as an ArrowDecimal /// /// This function does not check for null values. The out parameter must /// be initialized with ArrowDecimalInit() with the proper parameters for this /// type before calling this for the first time. static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView* array_view, int64_t i, struct ArrowDecimal* out); /// @} /// \defgroup nanoarrow-basic-array-stream Basic ArrowArrayStream implementation /// /// An implementation of an ArrowArrayStream based on a collection of /// zero or more previously-existing ArrowArray objects. Users should /// initialize and/or validate the contents before transferring the /// responsibility of the ArrowArrayStream elsewhere. /// /// @{ /// \brief Initialize an ArrowArrayStream backed by this implementation /// /// This function moves the ownership of schema to the array_stream. If /// this function returns NANOARROW_OK, the caller is responsible for /// releasing the ArrowArrayStream. NANOARROW_DLL ArrowErrorCode ArrowBasicArrayStreamInit( struct ArrowArrayStream* array_stream, struct ArrowSchema* schema, int64_t n_arrays); /// \brief Set the ith ArrowArray in this ArrowArrayStream. /// /// array_stream must have been initialized with ArrowBasicArrayStreamInit(). /// This function moves the ownership of array to the array_stream. i must /// be greater than or equal to zero and less than the value of n_arrays passed in /// ArrowBasicArrayStreamInit(). Callers are not required to fill all /// n_arrays members (i.e., n_arrays is a maximum bound). NANOARROW_DLL void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, int64_t i, struct ArrowArray* array); /// \brief Validate the contents of this ArrowArrayStream /// /// array_stream must have been initialized with ArrowBasicArrayStreamInit(). /// This function uses ArrowArrayStreamInitFromSchema() and ArrowArrayStreamSetArray() /// to validate the contents of the arrays. NANOARROW_DLL ArrowErrorCode ArrowBasicArrayStreamValidate( const struct ArrowArrayStream* array_stream, struct ArrowError* error); /// @} // Undefine ArrowErrorCode, which may have been defined to annotate functions that return // it to warn for an unused result. #if defined(ArrowErrorCode) #undef ArrowErrorCode #endif // Inline function definitions #ifdef __cplusplus } #endif #endif // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef NANOARROW_BUFFER_INLINE_H_INCLUDED #define NANOARROW_BUFFER_INLINE_H_INCLUDED #include #include #include #ifdef __cplusplus extern "C" { #endif // Modified from Arrow C++ (1eb46f76) cpp/src/arrow/chunk_resolver.h#L133-L162 static inline int64_t ArrowResolveChunk64(int64_t index, const int64_t* offsets, int64_t lo, int64_t hi) { // Similar to std::upper_bound(), but slightly different as our offsets // array always starts with 0. int64_t n = hi - lo; // First iteration does not need to check for n > 1 // (lo < hi is guaranteed by the precondition). NANOARROW_DCHECK(n > 1); do { const int64_t m = n >> 1; const int64_t mid = lo + m; if (index >= offsets[mid]) { lo = mid; n -= m; } else { n = m; } } while (n > 1); return lo; } static inline int64_t ArrowResolveChunk32(int32_t index, const int32_t* offsets, int32_t lo, int32_t hi) { // Similar to std::upper_bound(), but slightly different as our offsets // array always starts with 0. int32_t n = hi - lo; // First iteration does not need to check for n > 1 // (lo < hi is guaranteed by the precondition). NANOARROW_DCHECK(n > 1); do { const int32_t m = n >> 1; const int32_t mid = lo + m; if (index >= offsets[mid]) { lo = mid; n -= m; } else { n = m; } } while (n > 1); return lo; } static inline int64_t _ArrowGrowByFactor(int64_t current_capacity, int64_t new_capacity) { int64_t doubled_capacity = current_capacity * 2; if (doubled_capacity > new_capacity) { return doubled_capacity; } else { return new_capacity; } } // float to half float conversion, adapted from Arrow Go // https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go static inline uint16_t ArrowFloatToHalfFloat(float value) { union { float f; uint32_t b; } u; u.f = value; uint16_t sn = (uint16_t)((u.b >> 31) & 0x1); uint16_t exp = (u.b >> 23) & 0xff; int16_t res = (int16_t)(exp - 127 + 15); uint16_t fc = (uint16_t)(u.b >> 13) & 0x3ff; if (exp == 0) { res = 0; } else if (exp == 0xff) { res = 0x1f; } else if (res > 0x1e) { res = 0x1f; fc = 0; } else if (res < 0x01) { res = 0; fc = 0; } return (uint16_t)((sn << 15) | (uint16_t)(res << 10) | fc); } // half float to float conversion, adapted from Arrow Go // https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go static inline float ArrowHalfFloatToFloat(uint16_t value) { uint32_t sn = (uint32_t)((value >> 15) & 0x1); uint32_t exp = (value >> 10) & 0x1f; uint32_t res = exp + 127 - 15; uint32_t fc = value & 0x3ff; if (exp == 0) { res = 0; } else if (exp == 0x1f) { res = 0xff; } union { float f; uint32_t b; } u; u.b = (uint32_t)(sn << 31) | (uint32_t)(res << 23) | (uint32_t)(fc << 13); return u.f; } static inline void ArrowBufferInit(struct ArrowBuffer* buffer) { buffer->data = NULL; buffer->size_bytes = 0; buffer->capacity_bytes = 0; buffer->allocator = ArrowBufferAllocatorDefault(); } static inline ArrowErrorCode ArrowBufferSetAllocator( struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator) { // This is not a perfect test for "has a buffer already been allocated" // but is likely to catch most cases. if (buffer->data == NULL) { buffer->allocator = allocator; return NANOARROW_OK; } else { return EINVAL; } } static inline void ArrowBufferReset(struct ArrowBuffer* buffer) { buffer->allocator.free(&buffer->allocator, (uint8_t*)buffer->data, buffer->capacity_bytes); ArrowBufferInit(buffer); } static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst) { memcpy(dst, src, sizeof(struct ArrowBuffer)); src->data = NULL; ArrowBufferInit(src); } static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer, int64_t new_size_bytes, char shrink_to_fit) { if (new_size_bytes < 0) { return EINVAL; } int needs_reallocation = new_size_bytes > buffer->capacity_bytes || (shrink_to_fit && new_size_bytes < buffer->capacity_bytes); if (needs_reallocation) { buffer->data = buffer->allocator.reallocate(&buffer->allocator, buffer->data, buffer->capacity_bytes, new_size_bytes); if (buffer->data == NULL && new_size_bytes > 0) { buffer->capacity_bytes = 0; buffer->size_bytes = 0; return ENOMEM; } buffer->capacity_bytes = new_size_bytes; } buffer->size_bytes = new_size_bytes; return NANOARROW_OK; } static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer, int64_t additional_size_bytes) { int64_t min_capacity_bytes = buffer->size_bytes + additional_size_bytes; if (min_capacity_bytes <= buffer->capacity_bytes) { return NANOARROW_OK; } int64_t new_capacity_bytes = _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes); buffer->data = buffer->allocator.reallocate(&buffer->allocator, buffer->data, buffer->capacity_bytes, new_capacity_bytes); if (buffer->data == NULL && new_capacity_bytes > 0) { buffer->capacity_bytes = 0; buffer->size_bytes = 0; return ENOMEM; } buffer->capacity_bytes = new_capacity_bytes; return NANOARROW_OK; } static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data, int64_t size_bytes) { if (size_bytes > 0) { NANOARROW_DCHECK(buffer->data != NULL); memcpy(buffer->data + buffer->size_bytes, data, size_bytes); buffer->size_bytes += size_bytes; } } static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer, const void* data, int64_t size_bytes) { NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes)); ArrowBufferAppendUnsafe(buffer, data, size_bytes); return NANOARROW_OK; } static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer, int8_t value) { return ArrowBufferAppend(buffer, &value, sizeof(int8_t)); } static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer, uint8_t value) { return ArrowBufferAppend(buffer, &value, sizeof(uint8_t)); } static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer, int16_t value) { return ArrowBufferAppend(buffer, &value, sizeof(int16_t)); } static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer, uint16_t value) { return ArrowBufferAppend(buffer, &value, sizeof(uint16_t)); } static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer, int32_t value) { return ArrowBufferAppend(buffer, &value, sizeof(int32_t)); } static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer, uint32_t value) { return ArrowBufferAppend(buffer, &value, sizeof(uint32_t)); } static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer, int64_t value) { return ArrowBufferAppend(buffer, &value, sizeof(int64_t)); } static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer, uint64_t value) { return ArrowBufferAppend(buffer, &value, sizeof(uint64_t)); } static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer, double value) { return ArrowBufferAppend(buffer, &value, sizeof(double)); } static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer, float value) { return ArrowBufferAppend(buffer, &value, sizeof(float)); } static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer* buffer, struct ArrowStringView value) { return ArrowBufferAppend(buffer, value.data, value.size_bytes); } static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer* buffer, struct ArrowBufferView value) { return ArrowBufferAppend(buffer, value.data.data, value.size_bytes); } static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer, uint8_t value, int64_t size_bytes) { if (size_bytes == 0) { return NANOARROW_OK; } NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes)); NANOARROW_DCHECK(buffer->data != NULL); // To help clang-tidy memset(buffer->data + buffer->size_bytes, value, size_bytes); buffer->size_bytes += size_bytes; return NANOARROW_OK; } static const uint8_t _ArrowkBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128}; static const uint8_t _ArrowkFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127}; static const uint8_t _ArrowkPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127}; static const uint8_t _ArrowkTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128}; static const uint8_t _ArrowkBytePopcount[] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; static inline int64_t _ArrowRoundUpToMultipleOf8(int64_t value) { return (value + 7) & ~((int64_t)7); } static inline int64_t _ArrowRoundDownToMultipleOf8(int64_t value) { return (value / 8) * 8; } static inline int64_t _ArrowBytesForBits(int64_t bits) { return (bits >> 3) + ((bits & 7) != 0); } static inline void _ArrowBitsUnpackInt8(const uint8_t word, int8_t* out) { out[0] = (word & 0x1) != 0; out[1] = (word & 0x2) != 0; out[2] = (word & 0x4) != 0; out[3] = (word & 0x8) != 0; out[4] = (word & 0x10) != 0; out[5] = (word & 0x20) != 0; out[6] = (word & 0x40) != 0; out[7] = (word & 0x80) != 0; } static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) { out[0] = (word & 0x1) != 0; out[1] = (word & 0x2) != 0; out[2] = (word & 0x4) != 0; out[3] = (word & 0x8) != 0; out[4] = (word & 0x10) != 0; out[5] = (word & 0x20) != 0; out[6] = (word & 0x40) != 0; out[7] = (word & 0x80) != 0; } static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) { *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) | ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) | ((values[7] + 0x7f) & 0x80)); } static inline void _ArrowBitmapPackInt32(const int32_t* values, uint8_t* out) { *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) | ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) | ((values[7] + 0x7f) & 0x80)); } static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) { return (bits[i >> 3] >> (i & 0x07)) & 1; } static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset, int64_t length, int8_t* out) { if (length == 0) { return; } const int64_t i_begin = start_offset; const int64_t i_end = start_offset + length; const int64_t i_last_valid = i_end - 1; const int64_t bytes_begin = i_begin / 8; const int64_t bytes_last_valid = i_last_valid / 8; if (bytes_begin == bytes_last_valid) { for (int i = 0; i < length; i++) { out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); } return; } // first byte for (int i = 0; i < 8 - (i_begin % 8); i++) { *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); } // middle bytes for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { _ArrowBitsUnpackInt8(bits[i], out); out += 8; } // last byte const int bits_remaining = (int)(i_end % 8 == 0 ? 8 : i_end % 8); for (int i = 0; i < bits_remaining; i++) { *out++ = ArrowBitGet(&bits[bytes_last_valid], i); } } static inline void ArrowBitsUnpackInt32(const uint8_t* bits, int64_t start_offset, int64_t length, int32_t* out) { if (length == 0) { return; } NANOARROW_DCHECK(bits != NULL && out != NULL); const int64_t i_begin = start_offset; const int64_t i_end = start_offset + length; const int64_t i_last_valid = i_end - 1; const int64_t bytes_begin = i_begin / 8; const int64_t bytes_last_valid = i_last_valid / 8; if (bytes_begin == bytes_last_valid) { for (int i = 0; i < length; i++) { out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); } return; } // first byte for (int i = 0; i < 8 - (i_begin % 8); i++) { *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); } // middle bytes for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { _ArrowBitsUnpackInt32(bits[i], out); out += 8; } // last byte const int bits_remaining = (int)(i_end % 8 == 0 ? 8 : i_end % 8); for (int i = 0; i < bits_remaining; i++) { *out++ = ArrowBitGet(&bits[bytes_last_valid], i); } } static inline void ArrowBitSet(uint8_t* bits, int64_t i) { bits[i / 8] |= _ArrowkBitmask[i % 8]; } static inline void ArrowBitClear(uint8_t* bits, int64_t i) { bits[i / 8] &= _ArrowkFlippedBitmask[i % 8]; } static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t bit_is_set) { bits[i / 8] ^= (uint8_t)(((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i / 8])) & _ArrowkBitmask[i % 8]); } static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length, uint8_t bits_are_set) { if (length == 0) { return; } NANOARROW_DCHECK(bits != NULL); const int64_t i_begin = start_offset; const int64_t i_end = start_offset + length; const uint8_t fill_byte = (uint8_t)(-bits_are_set); const int64_t bytes_begin = i_begin / 8; const int64_t bytes_end = i_end / 8 + 1; const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8]; const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8]; if (bytes_end == bytes_begin + 1) { // set bits within a single byte const uint8_t only_byte_mask = i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask); bits[bytes_begin] &= only_byte_mask; bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask); return; } // set/clear trailing bits of first byte bits[bytes_begin] &= first_byte_mask; bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask); if (bytes_end - bytes_begin > 2) { // set/clear whole bytes memset(bits + bytes_begin + 1, fill_byte, (size_t)(bytes_end - bytes_begin - 2)); } if (i_end % 8 == 0) { return; } // set/clear leading bits of last byte bits[bytes_end - 1] &= last_byte_mask; bits[bytes_end - 1] |= (uint8_t)(fill_byte & ~last_byte_mask); } static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t start_offset, int64_t length) { if (length == 0) { return 0; } NANOARROW_DCHECK(bits != NULL); const int64_t i_begin = start_offset; const int64_t i_end = start_offset + length; const int64_t i_last_valid = i_end - 1; const int64_t bytes_begin = i_begin / 8; const int64_t bytes_last_valid = i_last_valid / 8; if (bytes_begin == bytes_last_valid) { // count bits within a single byte const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_end % 8]; const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_begin % 8]; const uint8_t only_byte_mask = i_end % 8 == 0 ? last_byte_mask : (uint8_t)(first_byte_mask & last_byte_mask); const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask; return _ArrowkBytePopcount[byte_masked]; } const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8]; const uint8_t last_byte_mask = i_end % 8 == 0 ? 0 : _ArrowkTrailingBitmask[i_end % 8]; int64_t count = 0; // first byte count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask]; // middle bytes for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { count += _ArrowkBytePopcount[bits[i]]; } // last byte count += _ArrowkBytePopcount[bits[bytes_last_valid] & ~last_byte_mask]; return count; } static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap) { ArrowBufferInit(&bitmap->buffer); bitmap->size_bits = 0; } static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap* dst) { ArrowBufferMove(&src->buffer, &dst->buffer); dst->size_bits = src->size_bits; src->size_bits = 0; } static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap, int64_t additional_size_bits) { int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits; int64_t min_capacity_bytes = _ArrowBytesForBits(min_capacity_bits); int64_t current_size_bytes = bitmap->buffer.size_bytes; int64_t current_capacity_bytes = bitmap->buffer.capacity_bytes; if (min_capacity_bytes <= current_capacity_bytes) { return NANOARROW_OK; } int64_t additional_capacity_bytes = min_capacity_bytes - current_size_bytes; NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(&bitmap->buffer, additional_capacity_bytes)); // Zero out the last byte for deterministic output in the common case // of reserving a known remaining size. We should have returned above // if there was not at least one additional byte to allocate; however, // DCHECK() just to be sure. NANOARROW_DCHECK(bitmap->buffer.capacity_bytes > current_capacity_bytes); bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0; return NANOARROW_OK; } static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap, int64_t new_size_bits, char shrink_to_fit) { if (new_size_bits < 0) { return EINVAL; } int64_t new_size_bytes = _ArrowBytesForBits(new_size_bits); NANOARROW_RETURN_NOT_OK( ArrowBufferResize(&bitmap->buffer, new_size_bytes, shrink_to_fit)); bitmap->size_bits = new_size_bits; return NANOARROW_OK; } static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length) { NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(bitmap, length)); ArrowBitmapAppendUnsafe(bitmap, bits_are_set, length); return NANOARROW_OK; } static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length) { ArrowBitsSetTo(bitmap->buffer.data, bitmap->size_bits, length, bits_are_set); bitmap->size_bits += length; bitmap->buffer.size_bytes = _ArrowBytesForBits(bitmap->size_bits); } static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap, const int8_t* values, int64_t n_values) { if (n_values == 0) { return; } NANOARROW_DCHECK(bitmap->buffer.data != NULL); NANOARROW_DCHECK(values != NULL); const int8_t* values_cursor = values; int64_t n_remaining = n_values; int64_t out_i_cursor = bitmap->size_bits; uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8; // First byte if ((out_i_cursor % 8) != 0) { int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor; for (int i = 0; i < n_partial_bits; i++) { ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]); } out_cursor++; values_cursor += n_partial_bits; n_remaining -= n_partial_bits; } // Middle bytes int64_t n_full_bytes = n_remaining / 8; for (int64_t i = 0; i < n_full_bytes; i++) { _ArrowBitmapPackInt8(values_cursor, out_cursor); values_cursor += 8; out_cursor++; } // Last byte out_i_cursor += n_full_bytes * 8; n_remaining -= n_full_bytes * 8; if (n_remaining > 0) { // Zero out the last byte *out_cursor = 0x00; for (int i = 0; i < n_remaining; i++) { ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]); } out_cursor++; } bitmap->size_bits += n_values; bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data; } static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap, const int32_t* values, int64_t n_values) { if (n_values == 0) { return; } NANOARROW_DCHECK(bitmap->buffer.data != NULL); NANOARROW_DCHECK(values != NULL); const int32_t* values_cursor = values; int64_t n_remaining = n_values; int64_t out_i_cursor = bitmap->size_bits; uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8; // First byte if ((out_i_cursor % 8) != 0) { int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor; for (int i = 0; i < n_partial_bits; i++) { ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, (uint8_t)values[i]); } out_cursor++; values_cursor += n_partial_bits; n_remaining -= n_partial_bits; } // Middle bytes int64_t n_full_bytes = n_remaining / 8; for (int64_t i = 0; i < n_full_bytes; i++) { _ArrowBitmapPackInt32(values_cursor, out_cursor); values_cursor += 8; out_cursor++; } // Last byte out_i_cursor += n_full_bytes * 8; n_remaining -= n_full_bytes * 8; if (n_remaining > 0) { // Zero out the last byte *out_cursor = 0x00; for (int i = 0; i < n_remaining; i++) { ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, (uint8_t)values_cursor[i]); } out_cursor++; } bitmap->size_bits += n_values; bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data; } static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap) { ArrowBufferReset(&bitmap->buffer); bitmap->size_bits = 0; } #ifdef __cplusplus } #endif #endif // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef NANOARROW_ARRAY_INLINE_H_INCLUDED #define NANOARROW_ARRAY_INLINE_H_INCLUDED #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; return &private_data->bitmap; } static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; switch (i) { case 0: return &private_data->bitmap.buffer; case 1: return private_data->buffers; default: if (array->n_buffers > 3 && i == (array->n_buffers - 1)) { // The variadic buffer sizes buffer if for a BinaryView/String view array // is always stored in private_data->buffers[1]; however, from the numbered // buffers perspective this is the array->buffers[array->n_buffers - 1]. return private_data->buffers + 1; } else if (array->n_buffers > 3) { // If there are one or more variadic buffers, they are stored in // private_data->variadic_buffers return private_data->variadic_buffers + (i - 2); } else { // Otherwise, we're just accessing buffer at index 2 (e.g., String/Binary // data buffer or variadic sizes buffer for the case where there are no // variadic buffers) NANOARROW_DCHECK(i == 2); return private_data->buffers + i - 1; } } } // We don't currently support the case of unions where type_id != child_index; // however, these functions are used to keep track of where that assumption // is made. static inline int8_t _ArrowArrayUnionChildIndex(struct ArrowArray* array, int8_t type_id) { NANOARROW_UNUSED(array); return type_id; } static inline int8_t _ArrowArrayUnionTypeId(struct ArrowArray* array, int8_t child_index) { NANOARROW_UNUSED(array); return child_index; } static inline int32_t _ArrowParseUnionTypeIds(const char* type_ids, int8_t* out) { if (*type_ids == '\0') { return 0; } int32_t i = 0; long type_id; char* end_ptr; do { type_id = strtol(type_ids, &end_ptr, 10); if (end_ptr == type_ids || type_id < 0 || type_id > 127) { return -1; } if (out != NULL) { out[i] = (int8_t)type_id; } i++; type_ids = end_ptr; if (*type_ids == '\0') { return i; } else if (*type_ids != ',') { return -1; } else { type_ids++; } } while (1); return -1; } static inline int8_t _ArrowParsedUnionTypeIdsWillEqualChildIndices(const int8_t* type_ids, int64_t n_type_ids, int64_t n_children) { if (n_type_ids != n_children) { return 0; } for (int8_t i = 0; i < n_type_ids; i++) { if (type_ids[i] != i) { return 0; } } return 1; } static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char* type_id_str, int64_t n_children) { int8_t type_ids[128]; int32_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids); return _ArrowParsedUnionTypeIdsWillEqualChildIndices(type_ids, n_type_ids, n_children); } static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; switch (private_data->storage_type) { case NANOARROW_TYPE_UNINITIALIZED: return EINVAL; case NANOARROW_TYPE_SPARSE_UNION: case NANOARROW_TYPE_DENSE_UNION: // Note that this value could be -1 if the type_ids string was invalid if (private_data->union_type_id_is_child_index != 1) { return EINVAL; } else { break; } default: break; } if (private_data->storage_type == NANOARROW_TYPE_UNINITIALIZED) { return EINVAL; } // Initialize any data offset buffer with a single zero for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET && private_data->layout.element_size_bits[i] == 64) { NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0)); } else if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET && private_data->layout.element_size_bits[i] == 32) { NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, i), 0)); } } // Start building any child arrays or dictionaries for (int64_t i = 0; i < array->n_children; i++) { NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i])); } if (array->dictionary != NULL) { NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->dictionary)); } return NANOARROW_OK; } static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) { for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i); NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1)); } for (int64_t i = 0; i < array->n_children; i++) { NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i])); } if (array->dictionary != NULL) { NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->dictionary)); } return NANOARROW_OK; } static inline ArrowErrorCode _ArrowArrayAppendBits(struct ArrowArray* array, int64_t buffer_i, uint8_t value, int64_t n) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; struct ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i); int64_t bytes_required = _ArrowRoundUpToMultipleOf8(private_data->layout.element_size_bits[buffer_i] * (array->length + 1)) / 8; if (bytes_required > buffer->size_bytes) { NANOARROW_RETURN_NOT_OK( ArrowBufferAppendFill(buffer, 0, bytes_required - buffer->size_bytes)); } ArrowBitsSetTo(buffer->data, array->length, n, value); return NANOARROW_OK; } static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal(struct ArrowArray* array, int64_t n, uint8_t is_valid) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; if (n == 0) { return NANOARROW_OK; } // Some type-specific handling switch (private_data->storage_type) { case NANOARROW_TYPE_NA: // (An empty value for a null array *is* a null) array->null_count += n; array->length += n; return NANOARROW_OK; case NANOARROW_TYPE_DENSE_UNION: { // Add one null to the first child and append n references to that child int8_t type_id = _ArrowArrayUnionTypeId(array, 0); NANOARROW_RETURN_NOT_OK( _ArrowArrayAppendEmptyInternal(array->children[0], 1, is_valid)); NANOARROW_RETURN_NOT_OK( ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n)); for (int64_t i = 0; i < n; i++) { NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( ArrowArrayBuffer(array, 1), (int32_t)array->children[0]->length - 1)); } // For the purposes of array->null_count, union elements are never considered "null" // even if some children contain nulls. array->length += n; return NANOARROW_OK; } case NANOARROW_TYPE_SPARSE_UNION: { // Add n nulls to the first child and append n references to that child int8_t type_id = _ArrowArrayUnionTypeId(array, 0); NANOARROW_RETURN_NOT_OK( _ArrowArrayAppendEmptyInternal(array->children[0], n, is_valid)); for (int64_t i = 1; i < array->n_children; i++) { NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], n)); } NANOARROW_RETURN_NOT_OK( ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n)); // For the purposes of array->null_count, union elements are never considered "null" // even if some children contain nulls. array->length += n; return NANOARROW_OK; } case NANOARROW_TYPE_FIXED_SIZE_LIST: NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty( array->children[0], n * private_data->layout.child_size_elements)); break; case NANOARROW_TYPE_STRUCT: for (int64_t i = 0; i < array->n_children; i++) { NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], n)); } break; default: break; } // Append n is_valid bits to the validity bitmap. If we haven't allocated a bitmap yet // and we need to append nulls, do it now. if (!is_valid && private_data->bitmap.buffer.data == NULL) { NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, array->length + n)); ArrowBitmapAppendUnsafe(&private_data->bitmap, 1, array->length); ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n); } else if (private_data->bitmap.buffer.data != NULL) { NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, n)); ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n); } // Add appropriate buffer fill for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i); int64_t size_bytes = private_data->layout.element_size_bits[i] / 8; switch (private_data->layout.buffer_type[i]) { case NANOARROW_BUFFER_TYPE_NONE: case NANOARROW_BUFFER_TYPE_VARIADIC_DATA: case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE: case NANOARROW_BUFFER_TYPE_VALIDITY: // These buffer types don't require initialization for empty appends: // - NONE: No buffer exists // - VARIADIC_*: Handled by child arrays // - VALIDITY: Already handled in previous bitmap logic break; case NANOARROW_BUFFER_TYPE_SIZE: // Size buffers (e.g., string/array lengths) should be zero-initialized: // This ensures empty elements have logical zero-length NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n)); break; case NANOARROW_BUFFER_TYPE_DATA_OFFSET: // Offset buffers require special handling to maintain continuity. // 1. Reserve space for new offset entries NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n)); // 2. Duplicate last offset value for each new (empty) element for (int64_t j = 0; j < n; j++) { ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * (array->length + j), size_bytes); } // 3. Skip next buffer (DATA) since it's paired with offsets // Rationale: Offset buffers are always followed by data buffers // that don't require separate initialization here i++; break; case NANOARROW_BUFFER_TYPE_DATA: // Fixed-width data buffers require zero-initialization: if (private_data->layout.element_size_bits[i] % 8 == 0) { // Byte-aligned: use efficient memset-style fill NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n)); } else { // Bit-packed: use special bitwise initialization NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n)); } break; case NANOARROW_BUFFER_TYPE_VIEW_OFFSET: // View offset buffers (for string/binary view types) require zero-initialization. NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n)); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n)); break; case NANOARROW_BUFFER_TYPE_TYPE_ID: case NANOARROW_BUFFER_TYPE_UNION_OFFSET: // These buffer types should have been handled by the outer type switch and // are not expected here, indicating an internal logic error. return EINVAL; } } array->length += n; array->null_count += n * !is_valid; return NANOARROW_OK; } static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n) { return _ArrowArrayAppendEmptyInternal(array, n, 0); } static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray* array, int64_t n) { return _ArrowArrayAppendEmptyInternal(array, n, 1); } static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array, int64_t value) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); switch (private_data->storage_type) { case NANOARROW_TYPE_INT64: NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(int64_t))); break; case NANOARROW_TYPE_INT32: _NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, (int32_t)value)); break; case NANOARROW_TYPE_INT16: _NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt16(data_buffer, (int16_t)value)); break; case NANOARROW_TYPE_INT8: _NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(data_buffer, (int8_t)value)); break; case NANOARROW_TYPE_UINT64: case NANOARROW_TYPE_UINT32: case NANOARROW_TYPE_UINT16: case NANOARROW_TYPE_UINT8: _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX); return ArrowArrayAppendUInt(array, value); case NANOARROW_TYPE_DOUBLE: NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, (double)value)); break; case NANOARROW_TYPE_FLOAT: NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value)); break; case NANOARROW_TYPE_HALF_FLOAT: NANOARROW_RETURN_NOT_OK( ArrowBufferAppendUInt16(data_buffer, ArrowFloatToHalfFloat((float)value))); break; case NANOARROW_TYPE_BOOL: NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1)); break; default: return EINVAL; } if (private_data->bitmap.buffer.data != NULL) { NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); } array->length++; return NANOARROW_OK; } static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array, uint64_t value) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); switch (private_data->storage_type) { case NANOARROW_TYPE_UINT64: NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(uint64_t))); break; case NANOARROW_TYPE_UINT32: _NANOARROW_CHECK_UPPER_LIMIT(value, UINT32_MAX); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, (uint32_t)value)); break; case NANOARROW_TYPE_UINT16: _NANOARROW_CHECK_UPPER_LIMIT(value, UINT16_MAX); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, (uint16_t)value)); break; case NANOARROW_TYPE_UINT8: _NANOARROW_CHECK_UPPER_LIMIT(value, UINT8_MAX); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, (uint8_t)value)); break; case NANOARROW_TYPE_INT64: case NANOARROW_TYPE_INT32: case NANOARROW_TYPE_INT16: case NANOARROW_TYPE_INT8: _NANOARROW_CHECK_UPPER_LIMIT(value, INT64_MAX); return ArrowArrayAppendInt(array, value); case NANOARROW_TYPE_DOUBLE: NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, (double)value)); break; case NANOARROW_TYPE_FLOAT: NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value)); break; case NANOARROW_TYPE_HALF_FLOAT: NANOARROW_RETURN_NOT_OK( ArrowBufferAppendUInt16(data_buffer, ArrowFloatToHalfFloat((float)value))); break; case NANOARROW_TYPE_BOOL: NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1)); break; default: return EINVAL; } if (private_data->bitmap.buffer.data != NULL) { NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); } array->length++; return NANOARROW_OK; } static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array, double value) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); switch (private_data->storage_type) { case NANOARROW_TYPE_DOUBLE: NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(double))); break; case NANOARROW_TYPE_FLOAT: NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value)); break; case NANOARROW_TYPE_HALF_FLOAT: NANOARROW_RETURN_NOT_OK( ArrowBufferAppendUInt16(data_buffer, ArrowFloatToHalfFloat((float)value))); break; default: return EINVAL; } if (private_data->bitmap.buffer.data != NULL) { NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); } array->length++; return NANOARROW_OK; } // Binary views only have two fixed buffers, but be aware that they must also // always have more 1 buffer to store variadic buffer sizes (even if there are none) #define NANOARROW_BINARY_VIEW_FIXED_BUFFERS 2 #define NANOARROW_BINARY_VIEW_INLINE_SIZE 12 #define NANOARROW_BINARY_VIEW_PREFIX_SIZE 4 #define NANOARROW_BINARY_VIEW_BLOCK_SIZE (32 << 10) // 32KB // The Arrow C++ implementation uses anonymous structs as members // of the ArrowBinaryView. For Cython support in this library, we define // those structs outside of the ArrowBinaryView struct ArrowBinaryViewInlined { int32_t size; uint8_t data[NANOARROW_BINARY_VIEW_INLINE_SIZE]; }; struct ArrowBinaryViewRef { int32_t size; uint8_t prefix[NANOARROW_BINARY_VIEW_PREFIX_SIZE]; int32_t buffer_index; int32_t offset; }; union ArrowBinaryView { struct ArrowBinaryViewInlined inlined; struct ArrowBinaryViewRef ref; int64_t alignment_dummy; }; static inline int32_t ArrowArrayVariadicBufferCount(struct ArrowArray* array) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; return private_data->n_variadic_buffers; } static inline ArrowErrorCode ArrowArrayAddVariadicBuffers(struct ArrowArray* array, int32_t n_buffers) { const int32_t n_current_bufs = ArrowArrayVariadicBufferCount(array); const int32_t nvariadic_bufs_needed = n_current_bufs + n_buffers; struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; private_data->variadic_buffers = (struct ArrowBuffer*)ArrowRealloc( private_data->variadic_buffers, sizeof(struct ArrowBuffer) * nvariadic_bufs_needed); if (private_data->variadic_buffers == NULL) { return ENOMEM; } private_data->n_variadic_buffers = nvariadic_bufs_needed; array->n_buffers = NANOARROW_BINARY_VIEW_FIXED_BUFFERS + 1 + nvariadic_bufs_needed; private_data->buffer_data = (const void**)ArrowRealloc( private_data->buffer_data, array->n_buffers * sizeof(void*)); for (int32_t i = n_current_bufs; i < nvariadic_bufs_needed; i++) { ArrowBufferInit(&private_data->variadic_buffers[i]); private_data->buffer_data[NANOARROW_BINARY_VIEW_FIXED_BUFFERS + i] = NULL; } // Zero out memory for the final buffer (variadic sizes buffer we haven't built yet) private_data->buffer_data[NANOARROW_BINARY_VIEW_FIXED_BUFFERS + nvariadic_bufs_needed] = NULL; // Ensure array->buffers points to a valid value array->buffers = private_data->buffer_data; return NANOARROW_OK; } static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array, struct ArrowBufferView value) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; if (private_data->storage_type == NANOARROW_TYPE_STRING_VIEW || private_data->storage_type == NANOARROW_TYPE_BINARY_VIEW) { struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); union ArrowBinaryView bvt; bvt.inlined.size = (int32_t)value.size_bytes; if (value.size_bytes <= NANOARROW_BINARY_VIEW_INLINE_SIZE) { memcpy(bvt.inlined.data, value.data.as_char, value.size_bytes); memset(bvt.inlined.data + bvt.inlined.size, 0, NANOARROW_BINARY_VIEW_INLINE_SIZE - bvt.inlined.size); } else { int32_t current_n_vbufs = ArrowArrayVariadicBufferCount(array); if (current_n_vbufs == 0 || private_data->variadic_buffers[current_n_vbufs - 1].size_bytes + value.size_bytes > NANOARROW_BINARY_VIEW_BLOCK_SIZE) { const int32_t additional_bufs_needed = 1; NANOARROW_RETURN_NOT_OK( ArrowArrayAddVariadicBuffers(array, additional_bufs_needed)); current_n_vbufs += additional_bufs_needed; } const int32_t buf_index = current_n_vbufs - 1; struct ArrowBuffer* variadic_buf = &private_data->variadic_buffers[buf_index]; memcpy(bvt.ref.prefix, value.data.as_char, NANOARROW_BINARY_VIEW_PREFIX_SIZE); bvt.ref.buffer_index = (int32_t)buf_index; bvt.ref.offset = (int32_t)variadic_buf->size_bytes; NANOARROW_RETURN_NOT_OK( ArrowBufferAppend(variadic_buf, value.data.as_char, value.size_bytes)); } NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &bvt, sizeof(bvt))); } else { struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1); struct ArrowBuffer* data_buffer = ArrowArrayBuffer( array, 1 + (private_data->storage_type != NANOARROW_TYPE_FIXED_SIZE_BINARY)); int32_t offset; int64_t large_offset; int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8; switch (private_data->storage_type) { case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_BINARY: offset = ((int32_t*)offset_buffer->data)[array->length]; if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) { return EOVERFLOW; } offset += (int32_t)value.size_bytes; NANOARROW_RETURN_NOT_OK( ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t))); NANOARROW_RETURN_NOT_OK( ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes)); break; case NANOARROW_TYPE_LARGE_STRING: case NANOARROW_TYPE_LARGE_BINARY: large_offset = ((int64_t*)offset_buffer->data)[array->length]; large_offset += value.size_bytes; NANOARROW_RETURN_NOT_OK( ArrowBufferAppend(offset_buffer, &large_offset, sizeof(int64_t))); NANOARROW_RETURN_NOT_OK( ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes)); break; case NANOARROW_TYPE_FIXED_SIZE_BINARY: if (value.size_bytes != fixed_size_bytes) { return EINVAL; } NANOARROW_RETURN_NOT_OK( ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes)); break; default: return EINVAL; } } if (private_data->bitmap.buffer.data != NULL) { NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); } array->length++; return NANOARROW_OK; } static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array, struct ArrowStringView value) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; struct ArrowBufferView buffer_view; buffer_view.data.data = value.data; buffer_view.size_bytes = value.size_bytes; switch (private_data->storage_type) { case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_LARGE_STRING: case NANOARROW_TYPE_STRING_VIEW: case NANOARROW_TYPE_BINARY: case NANOARROW_TYPE_LARGE_BINARY: case NANOARROW_TYPE_BINARY_VIEW: return ArrowArrayAppendBytes(array, buffer_view); default: return EINVAL; } } static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array, const struct ArrowInterval* value) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); switch (private_data->storage_type) { case NANOARROW_TYPE_INTERVAL_MONTHS: { if (value->type != NANOARROW_TYPE_INTERVAL_MONTHS) { return EINVAL; } NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->months)); break; } case NANOARROW_TYPE_INTERVAL_DAY_TIME: { if (value->type != NANOARROW_TYPE_INTERVAL_DAY_TIME) { return EINVAL; } NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->days)); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->ms)); break; } case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: { if (value->type != NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO) { return EINVAL; } NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->months)); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->days)); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(data_buffer, value->ns)); break; } default: return EINVAL; } if (private_data->bitmap.buffer.data != NULL) { NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); } array->length++; return NANOARROW_OK; } static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, const struct ArrowDecimal* value) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); switch (private_data->storage_type) { case NANOARROW_TYPE_DECIMAL32: if (value->n_words != 0) { return EINVAL; } else { NANOARROW_RETURN_NOT_OK( ArrowBufferAppend(data_buffer, value->words, sizeof(uint32_t))); break; } case NANOARROW_TYPE_DECIMAL64: if (value->n_words != 1) { return EINVAL; } else { NANOARROW_RETURN_NOT_OK( ArrowBufferAppend(data_buffer, value->words, sizeof(uint64_t))); break; } case NANOARROW_TYPE_DECIMAL128: if (value->n_words != 2) { return EINVAL; } else { NANOARROW_RETURN_NOT_OK( ArrowBufferAppend(data_buffer, value->words, 2 * sizeof(uint64_t))); break; } case NANOARROW_TYPE_DECIMAL256: if (value->n_words != 4) { return EINVAL; } else { NANOARROW_RETURN_NOT_OK( ArrowBufferAppend(data_buffer, value->words, 4 * sizeof(uint64_t))); break; } default: return EINVAL; } if (private_data->bitmap.buffer.data != NULL) { NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); } array->length++; return NANOARROW_OK; } static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; int64_t child_length; switch (private_data->storage_type) { case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_MAP: child_length = array->children[0]->length; if (child_length > INT32_MAX) { return EOVERFLOW; } NANOARROW_RETURN_NOT_OK( ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), (int32_t)child_length)); break; case NANOARROW_TYPE_LARGE_LIST: child_length = array->children[0]->length; NANOARROW_RETURN_NOT_OK( ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), child_length)); break; case NANOARROW_TYPE_FIXED_SIZE_LIST: child_length = array->children[0]->length; if (child_length != ((array->length + 1) * private_data->layout.child_size_elements)) { return EINVAL; } break; case NANOARROW_TYPE_LIST_VIEW: { child_length = array->children[0]->length; if (child_length > INT32_MAX) { return EOVERFLOW; } const int32_t last_valid_offset = (int32_t)private_data->list_view_offset; NANOARROW_RETURN_NOT_OK( ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), last_valid_offset)); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( ArrowArrayBuffer(array, 2), (int32_t)child_length - last_valid_offset)); private_data->list_view_offset = child_length; break; } case NANOARROW_TYPE_LARGE_LIST_VIEW: { child_length = array->children[0]->length; const int64_t last_valid_offset = private_data->list_view_offset; NANOARROW_RETURN_NOT_OK( ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), last_valid_offset)); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, 2), child_length - last_valid_offset)); private_data->list_view_offset = child_length; break; } case NANOARROW_TYPE_STRUCT: for (int64_t i = 0; i < array->n_children; i++) { child_length = array->children[i]->length; if (child_length != (array->length + 1)) { return EINVAL; } } break; default: return EINVAL; } if (private_data->bitmap.buffer.data != NULL) { NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); } array->length++; return NANOARROW_OK; } static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* array, int8_t type_id) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; int64_t child_index = _ArrowArrayUnionChildIndex(array, type_id); if (child_index < 0 || child_index >= array->n_children) { return EINVAL; } switch (private_data->storage_type) { case NANOARROW_TYPE_DENSE_UNION: // Append the target child length to the union offsets buffer _NANOARROW_CHECK_RANGE(array->children[child_index]->length, 0, INT32_MAX); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( ArrowArrayBuffer(array, 1), (int32_t)array->children[child_index]->length - 1)); break; case NANOARROW_TYPE_SPARSE_UNION: // Append one empty to any non-target column that isn't already the right length // or abort if appending a null will result in a column with invalid length for (int64_t i = 0; i < array->n_children; i++) { if (i == child_index || array->children[i]->length == (array->length + 1)) { continue; } if (array->children[i]->length != array->length) { return EINVAL; } NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], 1)); } break; default: return EINVAL; } // Write to the type_ids buffer NANOARROW_RETURN_NOT_OK( ArrowBufferAppendInt8(ArrowArrayBuffer(array, 0), (int8_t)type_id)); array->length++; return NANOARROW_OK; } static inline void ArrowArrayViewMove(struct ArrowArrayView* src, struct ArrowArrayView* dst) { memcpy(dst, src, sizeof(struct ArrowArrayView)); ArrowArrayViewInitFromType(src, NANOARROW_TYPE_UNINITIALIZED); } static inline int64_t ArrowArrayViewGetNumBuffers(struct ArrowArrayView* array_view) { switch (array_view->storage_type) { case NANOARROW_TYPE_BINARY_VIEW: case NANOARROW_TYPE_STRING_VIEW: return NANOARROW_BINARY_VIEW_FIXED_BUFFERS + array_view->n_variadic_buffers + 1; default: break; } int64_t n_buffers = 0; for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) { break; } n_buffers++; } return n_buffers; } static inline struct ArrowBufferView ArrowArrayViewGetBufferView( struct ArrowArrayView* array_view, int64_t i) { switch (array_view->storage_type) { case NANOARROW_TYPE_BINARY_VIEW: case NANOARROW_TYPE_STRING_VIEW: if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) { return array_view->buffer_views[i]; } else if (i >= (array_view->n_variadic_buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) { struct ArrowBufferView view; view.data.as_int64 = array_view->variadic_buffer_sizes; view.size_bytes = array_view->n_variadic_buffers * sizeof(double); return view; } else { struct ArrowBufferView view; view.data.data = array_view->variadic_buffers[i - NANOARROW_BINARY_VIEW_FIXED_BUFFERS]; view.size_bytes = array_view->variadic_buffer_sizes[i - NANOARROW_BINARY_VIEW_FIXED_BUFFERS]; return view; } default: // We need this check to avoid -Warray-bounds from complaining if (i >= NANOARROW_MAX_FIXED_BUFFERS) { struct ArrowBufferView view; view.data.data = NULL; view.size_bytes = 0; return view; } else { return array_view->buffer_views[i]; } } } enum ArrowBufferType ArrowArrayViewGetBufferType(struct ArrowArrayView* array_view, int64_t i) { switch (array_view->storage_type) { case NANOARROW_TYPE_BINARY_VIEW: case NANOARROW_TYPE_STRING_VIEW: if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) { return array_view->layout.buffer_type[i]; } else if (i == (array_view->n_variadic_buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) { return NANOARROW_BUFFER_TYPE_VARIADIC_SIZE; } else { return NANOARROW_BUFFER_TYPE_VARIADIC_DATA; } default: // We need this check to avoid -Warray-bounds from complaining if (i >= NANOARROW_MAX_FIXED_BUFFERS) { return NANOARROW_BUFFER_TYPE_NONE; } else { return array_view->layout.buffer_type[i]; } } } static inline enum ArrowType ArrowArrayViewGetBufferDataType( struct ArrowArrayView* array_view, int64_t i) { switch (array_view->storage_type) { case NANOARROW_TYPE_BINARY_VIEW: case NANOARROW_TYPE_STRING_VIEW: if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) { return array_view->layout.buffer_data_type[i]; } else if (i >= (array_view->n_variadic_buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) { return NANOARROW_TYPE_INT64; } else if (array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW) { return NANOARROW_TYPE_BINARY; } else { return NANOARROW_TYPE_STRING; } default: // We need this check to avoid -Warray-bounds from complaining if (i >= NANOARROW_MAX_FIXED_BUFFERS) { return NANOARROW_TYPE_UNINITIALIZED; } else { return array_view->layout.buffer_data_type[i]; } } } static inline int64_t ArrowArrayViewGetBufferElementSizeBits( struct ArrowArrayView* array_view, int64_t i) { switch (array_view->storage_type) { case NANOARROW_TYPE_BINARY_VIEW: case NANOARROW_TYPE_STRING_VIEW: if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) { return array_view->layout.element_size_bits[i]; } else if (i >= (array_view->n_variadic_buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) { return sizeof(int64_t) * 8; } else { return 0; } default: // We need this check to avoid -Warray-bounds from complaining if (i >= NANOARROW_MAX_FIXED_BUFFERS) { return 0; } else { return array_view->layout.element_size_bits[i]; } } } static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView* array_view, int64_t i) { const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8; i += array_view->offset; switch (array_view->storage_type) { case NANOARROW_TYPE_NA: return 0x01; case NANOARROW_TYPE_DENSE_UNION: case NANOARROW_TYPE_SPARSE_UNION: // Unions are "never null" in Arrow land return 0x00; default: return validity_buffer != NULL && !ArrowBitGet(validity_buffer, i); } } static inline int64_t ArrowArrayViewComputeNullCount( const struct ArrowArrayView* array_view) { if (array_view->length == 0) { return 0; } switch (array_view->storage_type) { case NANOARROW_TYPE_NA: return array_view->length; case NANOARROW_TYPE_DENSE_UNION: case NANOARROW_TYPE_SPARSE_UNION: // Unions are "never null" in Arrow land return 0; default: break; } const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8; if (validity_buffer == NULL) { return 0; } return array_view->length - ArrowBitCountSet(validity_buffer, array_view->offset, array_view->length); } static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView* array_view, int64_t i) { switch (array_view->storage_type) { case NANOARROW_TYPE_DENSE_UNION: case NANOARROW_TYPE_SPARSE_UNION: return array_view->buffer_views[0].data.as_int8[array_view->offset + i]; default: return -1; } } static inline int8_t ArrowArrayViewUnionChildIndex( const struct ArrowArrayView* array_view, int64_t i) { int8_t type_id = ArrowArrayViewUnionTypeId(array_view, i); if (array_view->union_type_id_map == NULL) { return type_id; } else { return array_view->union_type_id_map[type_id]; } } static inline int64_t ArrowArrayViewUnionChildOffset( const struct ArrowArrayView* array_view, int64_t i) { switch (array_view->storage_type) { case NANOARROW_TYPE_DENSE_UNION: return array_view->buffer_views[1].data.as_int32[array_view->offset + i]; case NANOARROW_TYPE_SPARSE_UNION: return array_view->offset + i; default: return -1; } } static inline int64_t ArrowArrayViewListChildOffset( const struct ArrowArrayView* array_view, int64_t i) { switch (array_view->storage_type) { case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_MAP: case NANOARROW_TYPE_LIST_VIEW: return array_view->buffer_views[1].data.as_int32[i]; case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_LARGE_LIST_VIEW: return array_view->buffer_views[1].data.as_int64[i]; default: return -1; } } static struct ArrowBufferView ArrowArrayViewGetBytesFromViewArrayUnsafe( const struct ArrowArrayView* array_view, int64_t i) { const union ArrowBinaryView* bv = &array_view->buffer_views[1].data.as_binary_view[i]; struct ArrowBufferView out = {{NULL}, bv->inlined.size}; if (bv->inlined.size <= NANOARROW_BINARY_VIEW_INLINE_SIZE) { out.data.as_uint8 = bv->inlined.data; return out; } out.data.data = array_view->variadic_buffers[bv->ref.buffer_index]; out.data.as_uint8 += bv->ref.offset; return out; } static inline int64_t ArrowArrayViewGetIntUnsafe(const struct ArrowArrayView* array_view, int64_t i) { const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; i += array_view->offset; switch (array_view->storage_type) { case NANOARROW_TYPE_INT64: return data_view->data.as_int64[i]; case NANOARROW_TYPE_UINT64: return data_view->data.as_uint64[i]; case NANOARROW_TYPE_INTERVAL_MONTHS: case NANOARROW_TYPE_INT32: return data_view->data.as_int32[i]; case NANOARROW_TYPE_UINT32: return data_view->data.as_uint32[i]; case NANOARROW_TYPE_INT16: return data_view->data.as_int16[i]; case NANOARROW_TYPE_UINT16: return data_view->data.as_uint16[i]; case NANOARROW_TYPE_INT8: return data_view->data.as_int8[i]; case NANOARROW_TYPE_UINT8: return data_view->data.as_uint8[i]; case NANOARROW_TYPE_DOUBLE: return (int64_t)data_view->data.as_double[i]; case NANOARROW_TYPE_FLOAT: return (int64_t)data_view->data.as_float[i]; case NANOARROW_TYPE_HALF_FLOAT: return (int64_t)ArrowHalfFloatToFloat(data_view->data.as_uint16[i]); case NANOARROW_TYPE_BOOL: return ArrowBitGet(data_view->data.as_uint8, i); default: return INT64_MAX; } } static inline uint64_t ArrowArrayViewGetUIntUnsafe( const struct ArrowArrayView* array_view, int64_t i) { i += array_view->offset; const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; switch (array_view->storage_type) { case NANOARROW_TYPE_INT64: return data_view->data.as_int64[i]; case NANOARROW_TYPE_UINT64: return data_view->data.as_uint64[i]; case NANOARROW_TYPE_INTERVAL_MONTHS: case NANOARROW_TYPE_INT32: return data_view->data.as_int32[i]; case NANOARROW_TYPE_UINT32: return data_view->data.as_uint32[i]; case NANOARROW_TYPE_INT16: return data_view->data.as_int16[i]; case NANOARROW_TYPE_UINT16: return data_view->data.as_uint16[i]; case NANOARROW_TYPE_INT8: return data_view->data.as_int8[i]; case NANOARROW_TYPE_UINT8: return data_view->data.as_uint8[i]; case NANOARROW_TYPE_DOUBLE: return (uint64_t)data_view->data.as_double[i]; case NANOARROW_TYPE_FLOAT: return (uint64_t)data_view->data.as_float[i]; case NANOARROW_TYPE_HALF_FLOAT: return (uint64_t)ArrowHalfFloatToFloat(data_view->data.as_uint16[i]); case NANOARROW_TYPE_BOOL: return ArrowBitGet(data_view->data.as_uint8, i); default: return UINT64_MAX; } } static inline double ArrowArrayViewGetDoubleUnsafe( const struct ArrowArrayView* array_view, int64_t i) { i += array_view->offset; const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; switch (array_view->storage_type) { case NANOARROW_TYPE_INT64: return (double)data_view->data.as_int64[i]; case NANOARROW_TYPE_UINT64: return (double)data_view->data.as_uint64[i]; case NANOARROW_TYPE_INT32: return data_view->data.as_int32[i]; case NANOARROW_TYPE_UINT32: return data_view->data.as_uint32[i]; case NANOARROW_TYPE_INT16: return data_view->data.as_int16[i]; case NANOARROW_TYPE_UINT16: return data_view->data.as_uint16[i]; case NANOARROW_TYPE_INT8: return data_view->data.as_int8[i]; case NANOARROW_TYPE_UINT8: return data_view->data.as_uint8[i]; case NANOARROW_TYPE_DOUBLE: return data_view->data.as_double[i]; case NANOARROW_TYPE_FLOAT: return data_view->data.as_float[i]; case NANOARROW_TYPE_HALF_FLOAT: return ArrowHalfFloatToFloat(data_view->data.as_uint16[i]); case NANOARROW_TYPE_BOOL: return ArrowBitGet(data_view->data.as_uint8, i); default: return DBL_MAX; } } static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe( const struct ArrowArrayView* array_view, int64_t i) { i += array_view->offset; const struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; const char* data_view = array_view->buffer_views[2].data.as_char; struct ArrowStringView view; switch (array_view->storage_type) { case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_BINARY: view.data = data_view + offsets_view->data.as_int32[i]; view.size_bytes = (int64_t)offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i]; break; case NANOARROW_TYPE_LARGE_STRING: case NANOARROW_TYPE_LARGE_BINARY: view.data = data_view + offsets_view->data.as_int64[i]; view.size_bytes = offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i]; break; case NANOARROW_TYPE_FIXED_SIZE_BINARY: view.size_bytes = array_view->layout.element_size_bits[1] / 8; view.data = array_view->buffer_views[1].data.as_char + (i * view.size_bytes); break; case NANOARROW_TYPE_STRING_VIEW: case NANOARROW_TYPE_BINARY_VIEW: { struct ArrowBufferView buf_view = ArrowArrayViewGetBytesFromViewArrayUnsafe(array_view, i); view.data = buf_view.data.as_char; view.size_bytes = buf_view.size_bytes; break; } default: view.data = NULL; view.size_bytes = 0; break; } return view; } static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( const struct ArrowArrayView* array_view, int64_t i) { i += array_view->offset; const struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8; struct ArrowBufferView view; switch (array_view->storage_type) { case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_BINARY: view.size_bytes = (int64_t)offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i]; view.data.as_uint8 = data_view + offsets_view->data.as_int32[i]; break; case NANOARROW_TYPE_LARGE_STRING: case NANOARROW_TYPE_LARGE_BINARY: view.size_bytes = offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i]; view.data.as_uint8 = data_view + offsets_view->data.as_int64[i]; break; case NANOARROW_TYPE_FIXED_SIZE_BINARY: view.size_bytes = array_view->layout.element_size_bits[1] / 8; view.data.as_uint8 = array_view->buffer_views[1].data.as_uint8 + (i * view.size_bytes); break; case NANOARROW_TYPE_STRING_VIEW: case NANOARROW_TYPE_BINARY_VIEW: view = ArrowArrayViewGetBytesFromViewArrayUnsafe(array_view, i); break; default: view.data.data = NULL; view.size_bytes = 0; break; } return view; } static inline void ArrowArrayViewGetIntervalUnsafe( const struct ArrowArrayView* array_view, int64_t i, struct ArrowInterval* out) { const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; const int64_t offset = array_view->offset; const int64_t index = offset + i; switch (array_view->storage_type) { case NANOARROW_TYPE_INTERVAL_MONTHS: { const size_t size = sizeof(int32_t); memcpy(&out->months, data_view + index * size, sizeof(int32_t)); break; } case NANOARROW_TYPE_INTERVAL_DAY_TIME: { const size_t size = sizeof(int32_t) + sizeof(int32_t); memcpy(&out->days, data_view + index * size, sizeof(int32_t)); memcpy(&out->ms, data_view + index * size + 4, sizeof(int32_t)); break; } case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: { const size_t size = sizeof(int32_t) + sizeof(int32_t) + sizeof(int64_t); memcpy(&out->months, data_view + index * size, sizeof(int32_t)); memcpy(&out->days, data_view + index * size + 4, sizeof(int32_t)); memcpy(&out->ns, data_view + index * size + 8, sizeof(int64_t)); break; } default: break; } } static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView* array_view, int64_t i, struct ArrowDecimal* out) { i += array_view->offset; const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; switch (array_view->storage_type) { case NANOARROW_TYPE_DECIMAL32: ArrowDecimalSetBytes(out, data_view + (i * 4)); break; case NANOARROW_TYPE_DECIMAL64: ArrowDecimalSetBytes(out, data_view + (i * 8)); break; case NANOARROW_TYPE_DECIMAL128: ArrowDecimalSetBytes(out, data_view + (i * 16)); break; case NANOARROW_TYPE_DECIMAL256: ArrowDecimalSetBytes(out, data_view + (i * 32)); break; default: memset(out->words, 0, sizeof(out->words)); break; } } #ifdef __cplusplus } #endif #endif ================================================ FILE: data/lang/cpp/pool.cpp ================================================ #include #include #include #include #include #include #include #include #include #include #include #include // needed for foreign interface #include #include #include #include #include #include using namespace std; char* g_tmpdir; uint8_t* foreign_call(const char* socket_filename, size_t mid, ...) __attribute__((sentinel)); // AUTO include statements start // <<>> // AUTO include statements end // Proper linking of cppmorloc requires it be included AFTER the custom modules #include "mlc_arrow.hpp" #include "cppmorloc.hpp" #define PROPAGATE_ERROR(errmsg) \ if(errmsg != NULL) { \ char errmsg_buffer[MAX_ERRMSG_SIZE] = { 0 }; \ snprintf(errmsg_buffer, MAX_ERRMSG_SIZE, "Error C++ pool (%s:%d in %s):\n%s" , __FILE__, __LINE__, __func__, errmsg); \ free(errmsg); \ throw std::runtime_error(errmsg_buffer); \ } #define PROPAGATE_FAIL_PACKET(errmsg) \ if(errmsg != NULL){ \ uint8_t* fail_packet_ = make_fail_packet(errmsg); \ free(errmsg); \ return fail_packet_; \ } // AUTO serialization statements start // <<>> // AUTO serialization statements end std::string interweave_strings(const std::vector& first, const std::vector& second) { // Validate sizes - errors here indicate a bug in the morloc compiler if (first.size() != second.size() + 1) { throw std::invalid_argument("First list must have exactly 1 more element than second list"); } // Pre-calculate total size to avoid reallocations size_t total_size = 0; for (const auto& s : first) total_size += s.size(); for (const auto& s : second) total_size += s.size(); std::string result; result.reserve(total_size); // Interweave the strings for (size_t i = 0; i < second.size(); ++i) { result += first[i]; result += second[i]; } result += first.back(); // Append the final element from first list return result; } // Thread-local list of SHM pointers allocated by _put_value. // Freed after foreign_call returns (args consumed) or at next dispatch start // (result consumed by caller in the synchronous call that returned it). struct ShmEntry { absptr_t ptr; Schema* schema; }; thread_local std::vector _shm_tracker; static void _flush_shm_tracker() { for (auto& e : _shm_tracker) { char* err = NULL; // Only do recursive sub-freeing if we have a schema and this is // the last reference. NULL schema entries (from foreign_call result // tracking) just decrement the refcount. block_header_t* blk = (block_header_t*)((char*)e.ptr - sizeof(block_header_t)); if (e.schema && blk->reference_count <= 1) { shfree_by_schema(e.ptr, e.schema, &err); if (err) { free(err); err = NULL; } } shfree(e.ptr, &err); if (err) { free(err); } } _shm_tracker.clear(); } // Thread-local schema cache: avoids re-parsing the same schema strings Schema* get_cached_schema(const char* schema_str) { thread_local std::unordered_map cache; auto it = cache.find(schema_str); if (it != cache.end()) return it->second; Schema* schema = parse_schema_cpp(schema_str); cache[schema_str] = schema; return schema; } // Transforms a serialized value into a message ready for the socket template uint8_t* _put_value(const T& value, const std::string& schema_str) { Schema* schema = get_cached_schema(schema_str.c_str()); if constexpr (std::is_same_v) { // Arrow export: move table data into SHM, build packet. // const_cast is safe here: the value is always a temporary from // a manifold call, never a truly const object. mlc::ArrowTable& tbl = const_cast(value); relptr_t relptr = tbl.move_to_shm(); uint8_t* packet = make_arrow_data_packet(relptr, schema); if (!packet) { throw std::runtime_error("Failed to create arrow data packet"); } char* err = nullptr; void* shm_ptr = rel2abs(relptr, &err); if (err) { free(err); } if (shm_ptr) { _shm_tracker.push_back({(absptr_t)shm_ptr, nullptr}); } return packet; } else { // Arrow dispatch: if schema hint is "arrow", the C++ type must be mlc::ArrowTable if (schema->hint && strcmp(schema->hint, "arrow") == 0) { throw std::runtime_error("Arrow schema but C++ type is not mlc::ArrowTable"); } void* voidstar = nullptr; try { voidstar = toAnything(schema, value); relptr_t relptr = abs2rel_cpp(voidstar); char* errmsg = nullptr; uint8_t* packet = make_data_packet_auto(voidstar, relptr, schema, &errmsg); if (errmsg) { shfree_cpp(voidstar); PROPAGATE_ERROR(errmsg); } const morloc_packet_header_t* hdr = (const morloc_packet_header_t*)packet; if (hdr->command.data.source == PACKET_SOURCE_RPTR) { // SHM referenced by packet -- track for deferred cleanup _shm_tracker.push_back({(absptr_t)voidstar, schema}); } else { // Data inlined in packet -- free SHM immediately char* free_err = NULL; shfree_by_schema((absptr_t)voidstar, schema, &free_err); if (free_err) { free(free_err); free_err = NULL; } shfree((absptr_t)voidstar, &free_err); if (free_err) { free(free_err); } } return packet; } catch (...) { if (voidstar) shfree_cpp(voidstar); throw; } } } // Use a key to retrieve a value template T _get_value(const uint8_t* packet, const std::string& schema_str){ const morloc_packet_header_t* header = (const morloc_packet_header_t*)packet; uint8_t source = header->command.data.source; uint8_t format = header->command.data.format; if constexpr (std::is_same_v) { // Arrow import: packet -> arrow_from_shm -> ArrowTable Schema* schema = get_cached_schema(schema_str.c_str()); char* errmsg = nullptr; uint8_t* raw = get_morloc_data_packet_value(packet, schema, &errmsg); if (errmsg) { PROPAGATE_ERROR(errmsg); } const arrow_shm_header_t* hdr = (const arrow_shm_header_t*)raw; struct ArrowSchema as; struct ArrowArray aa; char* aerr = nullptr; arrow_from_shm(hdr, &as, &aa, &aerr); if (aerr) { PROPAGATE_ERROR(aerr); } char* ierr = nullptr; shincref((absptr_t)raw, &ierr); if (ierr) { free(ierr); } _shm_tracker.push_back({(absptr_t)raw, nullptr}); return mlc::ArrowTable(std::move(as), std::move(aa)); } else { if (format == PACKET_FORMAT_ARROW) { throw std::runtime_error("Arrow data but C++ type is not mlc::ArrowTable"); } Schema* schema = get_cached_schema(schema_str.c_str()); // Fast path: inline voidstar -- read directly from packet, no SHM needed if (source == PACKET_SOURCE_MESG && format == PACKET_FORMAT_VOIDSTAR) { const uint8_t* payload = packet + sizeof(morloc_packet_header_t) + header->offset; T* dummy = nullptr; return fromAnything(schema, (const void*)payload, dummy, (const void*)payload); } // SHM paths (RPTR or MESG+MSGPACK): existing logic bool is_rptr = (source == PACKET_SOURCE_RPTR); char* errmsg = NULL; uint8_t* voidstar = get_morloc_data_packet_value(packet, schema, &errmsg); if(errmsg != NULL) { PROPAGATE_ERROR(errmsg) } // For RPTR data, increment refcount so the owner's tracker flush // won't destroy data we may still need (e.g. forwarded packets). if (is_rptr) { char* incref_err = NULL; shincref((absptr_t)voidstar, &incref_err); if (incref_err) { free(incref_err); } _shm_tracker.push_back({(absptr_t)voidstar, schema}); } T* dummy = nullptr; return fromAnything(schema, (void*)voidstar, dummy); } } // Hash a value, returning a 16-char hex string template std::string _mlc_hash(const T& value, const std::string& schema_str) { Schema* schema = get_cached_schema(schema_str.c_str()); void* voidstar = toAnything(schema, value); char* errmsg = NULL; char* hex = mlc_hash(voidstar, schema, &errmsg); shfree_cpp(voidstar); if (errmsg != NULL) { PROPAGATE_ERROR(errmsg) } std::string result(hex); free(hex); return result; } // Save a value to file in msgpack format template void _mlc_save(const T& value, const std::string& schema_str, const std::string& path) { Schema* schema = get_cached_schema(schema_str.c_str()); void* voidstar = toAnything(schema, value); char* errmsg = NULL; mlc_save(voidstar, schema, path.c_str(), &errmsg); shfree_cpp(voidstar); if (errmsg != NULL) { PROPAGATE_ERROR(errmsg) } } // Save a value to file in flat voidstar binary format template void _mlc_save_voidstar(const T& value, const std::string& schema_str, const std::string& path) { Schema* schema = get_cached_schema(schema_str.c_str()); void* voidstar = toAnything(schema, value); char* errmsg = NULL; mlc_save_voidstar(voidstar, schema, path.c_str(), &errmsg); shfree_cpp(voidstar); if (errmsg != NULL) { PROPAGATE_ERROR(errmsg) } } // Save a value to file in JSON format template void _mlc_save_json(const T& value, const std::string& schema_str, const std::string& path) { Schema* schema = get_cached_schema(schema_str.c_str()); void* voidstar = toAnything(schema, value); char* errmsg = NULL; mlc_save_json(voidstar, schema, path.c_str(), &errmsg); shfree_cpp(voidstar); if (errmsg != NULL) { PROPAGATE_ERROR(errmsg) } } // Serialize a value to a JSON string template std::string _mlc_show(const T& value, const std::string& schema_str) { Schema* schema = get_cached_schema(schema_str.c_str()); void* voidstar = toAnything(schema, value); char* errmsg = NULL; char* json = mlc_show(voidstar, schema, &errmsg); shfree_cpp(voidstar); if (errmsg != NULL) { PROPAGATE_ERROR(errmsg) } std::string result(json); free(json); return result; } // Deserialize a JSON string to a typed value // Returns std::nullopt on parse failure template std::optional _mlc_read(const std::string& schema_str, const std::string& json_str) { Schema* schema = get_cached_schema(schema_str.c_str()); char* errmsg = NULL; void* voidstar = mlc_read(json_str.c_str(), schema, &errmsg); if (errmsg != NULL) { PROPAGATE_ERROR(errmsg) } if (voidstar == NULL) { return std::nullopt; } T* dummy = nullptr; T result = fromAnything(schema, voidstar, dummy); shfree_cpp(voidstar); return result; } // Load a value from file, auto-detecting format // Returns std::nullopt if file does not exist template std::optional _mlc_load(const std::string& schema_str, const std::string& path) { Schema* schema = get_cached_schema(schema_str.c_str()); char* errmsg = NULL; void* voidstar = mlc_load(path.c_str(), schema, &errmsg); if (errmsg != NULL) { PROPAGATE_ERROR(errmsg) } if (voidstar == NULL) { return std::nullopt; } T* dummy = nullptr; T result = fromAnything(schema, voidstar, dummy); shfree_cpp(voidstar); return result; } uint8_t* foreign_call(const char* socket_filename, size_t mid, ...) { char* errmsg = NULL; va_list args; size_t nargs = 0; char socket_path[128]; snprintf(socket_path, sizeof(socket_path), "%s/%s", g_tmpdir, socket_filename); // Count arguments (must be NULL-terminated) va_start(args, mid); while (va_arg(args, uint8_t*) != NULL) nargs++; va_end(args); // Allocate and populate args array const uint8_t** args_array = (const uint8_t**)malloc((nargs + 1) * sizeof(uint8_t*)); if (!args_array) throw std::runtime_error("malloc failed in foreign_call"); va_start(args, mid); for (size_t i = 0; i < nargs; i++) { args_array[i] = va_arg(args, uint8_t*); } args_array[nargs] = NULL; // Sentinel va_end(args); // Original logic with variadic args converted to array uint8_t* packet = make_morloc_local_call_packet((uint32_t)mid, args_array, nargs, &errmsg); if (errmsg != NULL) { free(args_array); PROPAGATE_ERROR(errmsg) } pool_mark_busy(); uint8_t* result = send_and_receive_over_socket(socket_path, packet, &errmsg); pool_mark_idle(); free(packet); if (errmsg != NULL) { free(args_array); PROPAGATE_ERROR(errmsg) } // Incref the result's SHM so the callee's tracker flush won't destroy // data we may still need (e.g. forwarded result packets). { const morloc_packet_header_t* res_header = (const morloc_packet_header_t*)result; if (res_header->command.data.source == PACKET_SOURCE_RPTR) { size_t relptr = *(size_t*)(result + res_header->offset + sizeof(morloc_packet_header_t)); char* resolve_err = NULL; void* res_voidstar = rel2abs(relptr, &resolve_err); if (resolve_err) { free(resolve_err); resolve_err = NULL; } if (res_voidstar) { char* incref_err = NULL; shincref((absptr_t)res_voidstar, &incref_err); if (incref_err) { free(incref_err); } _shm_tracker.push_back({(absptr_t)res_voidstar, nullptr}); } } } free(args_array); return result; } // AUTO signatures statements start // <<>> // AUTO signatures statements end // AUTO manifolds statements start // <<>> // AUTO manifolds statements end // AUTO dispatch start // <<>> // AUTO dispatch end // Wrappers to adapt compiler-generated dispatch functions to pool_dispatch_fn_t. // These catch C++ exceptions so the C pool_main never sees them. static uint8_t* cpp_local_dispatch(uint32_t mid, const uint8_t** args, size_t nargs, void* ctx) { (void)nargs; (void)ctx; // Free SHM from previous dispatch (result packet consumed by caller) _flush_shm_tracker(); try { return local_dispatch(mid, args); } catch (const std::exception& e) { return make_fail_packet(e.what()); } catch (...) { return make_fail_packet("An unknown error occurred"); } } static uint8_t* cpp_remote_dispatch(uint32_t mid, const uint8_t** args, size_t nargs, void* ctx) { (void)nargs; (void)ctx; try { return remote_dispatch(mid, args); } catch (const std::exception& e) { return make_fail_packet(e.what()); } catch (...) { return make_fail_packet("An unknown error occurred"); } } int main(int argc, char* argv[]) { // Line-buffer stderr so diagnostic output is not lost on pool shutdown. // stdout is left fully buffered for performance (genome-scale piping) // and flushed after each job by pool.c. setvbuf(stderr, NULL, _IOLBF, 0); // Health check: confirm binary links and print version if (argc == 2 && std::string(argv[1]) == "--health") { std::cout << "{\"status\":\"ok\",\"version\":\"__MORLOC_VERSION__\"}" << std::endl; return 0; } if (argc != 4) { std::cerr << "Usage: " << argv[0] << " \n"; return 1; } g_tmpdir = strdup(argv[2]); pool_config_t config = {}; config.local_dispatch = cpp_local_dispatch; config.remote_dispatch = cpp_remote_dispatch; config.dispatch_ctx = NULL; config.concurrency = POOL_THREADS; config.initial_workers = 1; config.dynamic_scaling = true; int result = pool_main(argc, argv, &config); free(g_tmpdir); return result; } ================================================ FILE: data/lang/julia/MorlocRuntime.jl ================================================ """ MorlocRuntime Julia runtime module for morloc. Provides IPC (daemon lifecycle, packet I/O), msgpack-based serialization, and foreign call support. All heavy lifting is done by libmorloc via the thin C bridge (libjuliamorloc.so). Julia handles only the msgpack <-> native type conversion using MsgPack.jl. """ module MorlocRuntime using MsgPack # Path to the bridge shared library (set during morloc init) const LIB_PATH = Ref{String}("") function __init__() # Look for libjuliamorloc.so relative to this file, or in standard locations candidates = [ joinpath(dirname(@__FILE__), "libjuliamorloc.so"), joinpath(dirname(@__FILE__), "..", "lib", "libjuliamorloc.so"), ] # Also check the morloc home lib directory morloc_home = get(ENV, "MORLOC_HOME", joinpath(homedir(), ".local", "share", "morloc")) push!(candidates, joinpath(morloc_home, "lib", "libjuliamorloc.so")) for path in candidates if isfile(path) LIB_PATH[] = path return end end error("Cannot find libjuliamorloc.so. Run `morloc init` first.") end lib() = LIB_PATH[] # -- Error handling -- function check_error(context::String) msg = unsafe_string(ccall((:jlmorloc_last_error, lib()), Cstring, ())) if !isempty(msg) error("$context: $msg") end end # -- Daemon lifecycle -- function start_daemon(socket_path::String, tmpdir::String, shm_basename::String, shm_size::Integer) ptr = ccall((:jlmorloc_start_daemon, lib()), Ptr{Nothing}, (Cstring, Cstring, Cstring, Csize_t), socket_path, tmpdir, shm_basename, UInt(shm_size)) ptr == C_NULL && check_error("start_daemon") return ptr end function close_daemon(daemon::Ptr{Nothing}) ccall((:jlmorloc_close_daemon, lib()), Nothing, (Ptr{Nothing},), daemon) end function wait_for_client(daemon::Ptr{Nothing}) fd = ccall((:jlmorloc_wait_for_client, lib()), Cint, (Ptr{Nothing},), daemon) fd < 0 && check_error("wait_for_client") return fd end # -- Packet I/O -- function stream_from_client(client_fd) out_size = Ref{Csize_t}(0) ptr = ccall((:jlmorloc_stream_from_client, lib()), Ptr{UInt8}, (Cint, Ref{Csize_t}), Int32(client_fd), out_size) ptr == C_NULL && check_error("stream_from_client") return ptr # opaque packet pointer end function send_packet_to_foreign_server(client_fd, packet::Ptr{UInt8}) rc = ccall((:jlmorloc_send_packet, lib()), Cint, (Cint, Ptr{UInt8}), Int32(client_fd), packet) rc != 0 && check_error("send_packet") end function close_socket(fd) ccall((:jlmorloc_close_socket, lib()), Nothing, (Cint,), Int32(fd)) end # -- Packet classification -- function is_ping(packet::Ptr{UInt8}) ccall((:jlmorloc_is_ping, lib()), Cint, (Ptr{UInt8},), packet) != 0 end function is_local_call(packet::Ptr{UInt8}) ccall((:jlmorloc_is_local_call, lib()), Cint, (Ptr{UInt8},), packet) != 0 end function is_remote_call(packet::Ptr{UInt8}) ccall((:jlmorloc_is_remote_call, lib()), Cint, (Ptr{UInt8},), packet) != 0 end function pong(packet::Ptr{UInt8}) result = ccall((:jlmorloc_pong, lib()), Ptr{UInt8}, (Ptr{UInt8},), packet) result == C_NULL && check_error("pong") return result end # -- Call packet parsing -- """ read_morloc_call_packet(packet) -> (mid, args) Parse a call packet into a manifold index and a vector of argument packets. """ function read_morloc_call_packet(packet::Ptr{UInt8}) out_mid = Ref{UInt32}(0) out_nargs = Ref{Csize_t}(0) call_ptr = ccall((:jlmorloc_read_call, lib()), Ptr{Nothing}, (Ptr{UInt8}, Ref{UInt32}, Ref{Csize_t}), packet, out_mid, out_nargs) call_ptr == C_NULL && check_error("read_call") mid = Int(out_mid[]) nargs = Int(out_nargs[]) args = Vector{Ptr{UInt8}}(undef, nargs) for i in 1:nargs args[i] = ccall((:jlmorloc_call_arg, lib()), Ptr{UInt8}, (Ptr{Nothing}, Csize_t), call_ptr, UInt(i - 1)) end ccall((:jlmorloc_free_call, lib()), Nothing, (Ptr{Nothing},), call_ptr) return (mid, args) end # -- Msgpack bridge: serialize/deserialize -- """ Strip the `` prefix from schema strings like `"i4"` -> `"i4"`. """ function strip_schema_prefix(schema_str::String) if !isempty(schema_str) && schema_str[1] == '<' i = findfirst('>', schema_str) if i !== nothing return schema_str[i+1:end] end end return schema_str end """ put_value(value, schema_str) -> Ptr{UInt8} Serialize a Julia value to a morloc data packet via msgpack. """ function put_value(value, schema_str::String) schema = strip_schema_prefix(schema_str) mpk = MsgPack.pack(to_msgpack(value, schema)) pkt = ccall((:jlmorloc_pack, lib()), Ptr{UInt8}, (Ptr{UInt8}, Csize_t, Cstring), mpk, length(mpk), schema) pkt == C_NULL && check_error("pack") return pkt end """ get_value(packet, schema_str) -> Julia value Deserialize a morloc data packet to a Julia value via msgpack. """ function get_value(packet::Ptr{UInt8}, schema_str::String) schema = strip_schema_prefix(schema_str) out_size = Ref{Csize_t}(0) mpk_ptr = ccall((:jlmorloc_unpack, lib()), Ptr{UInt8}, (Ptr{UInt8}, Cstring, Ref{Csize_t}), packet, schema, out_size) mpk_ptr == C_NULL && check_error("unpack") mpk_bytes = unsafe_wrap(Array, mpk_ptr, out_size[]; own=true) raw = MsgPack.unpack(mpk_bytes) return from_msgpack(raw, schema) end # -- Error packet -- function make_fail_packet(msg::String) ccall((:jlmorloc_make_fail_packet, lib()), Ptr{UInt8}, (Cstring,), msg) end # -- Foreign call -- """ foreign_call(tmpdir, socket_name, mid, args) -> Ptr{UInt8} Call another pool (cross-language IPC). args is a vector of packet pointers. """ function foreign_call(tmpdir::String, socket_name::String, mid::Integer, args::Vector{Ptr{UInt8}}) nargs = length(args) result = ccall((:jlmorloc_foreign_call, lib()), Ptr{UInt8}, (Cstring, Cstring, UInt32, Ptr{Ptr{UInt8}}, Csize_t), tmpdir, socket_name, UInt32(mid), args, UInt(nargs)) result == C_NULL && check_error("foreign_call") return result end # -- Type conversion helpers -- # Schema string format: # "b" = bool, "i4" = int32, "i8" = int64, "f8" = float64, "s" = string # "ai4" = array of int32, "t(i4f8s)" = tuple # "m{name:s,age:i4}" = record """ Convert a Julia value to a msgpack-friendly representation based on schema. MsgPack.jl handles most types natively, but we need to ensure correct types for the schema (e.g., Int32 vs Int64). """ function to_msgpack(value, schema::String) if startswith(schema, "a") elem_schema = schema[2:end] return [to_msgpack(v, elem_schema) for v in value] elseif startswith(schema, "t(") inner = schema[3:end-1] schemas = split_tuple_schema(inner) return [to_msgpack(value[i], schemas[i]) for i in 1:length(schemas)] elseif startswith(schema, "m{") inner = schema[3:end-1] fields = split_record_schema(inner) return Dict(k => to_msgpack(value[k], s) for (k, s) in fields) else return to_msgpack_scalar(value, schema) end end function to_msgpack_scalar(value, schema::String) if schema == "b" return Bool(value) elseif schema == "i4" return Int32(value) elseif schema == "i8" return Int64(value) elseif schema == "u4" return UInt32(value) elseif schema == "u8" return UInt64(value) elseif schema == "f4" return Float32(value) elseif schema == "f8" return Float64(value) elseif schema == "s" return String(value) elseif schema == "u" return nothing else return value end end """ Convert a raw msgpack value to a Julia type based on schema. """ function from_msgpack(raw, schema::String) if startswith(schema, "a") elem_schema = schema[2:end] return [from_msgpack(v, elem_schema) for v in raw] elseif startswith(schema, "t(") inner = schema[3:end-1] schemas = split_tuple_schema(inner) return Tuple(from_msgpack(raw[i], schemas[i]) for i in 1:length(schemas)) elseif startswith(schema, "m{") inner = schema[3:end-1] fields = split_record_schema(inner) return Dict(k => from_msgpack(raw[k], s) for (k, s) in fields) else return from_msgpack_scalar(raw, schema) end end function from_msgpack_scalar(raw, schema::String) if schema == "b" return Bool(raw) elseif schema == "i4" return Int32(raw) elseif schema == "i8" return Int64(raw) elseif schema == "u4" return UInt32(raw) elseif schema == "u8" return UInt64(raw) elseif schema == "f4" return Float32(raw) elseif schema == "f8" return Float64(raw) elseif schema == "s" return String(raw) elseif schema == "u" return nothing else return raw end end # -- Schema parsing helpers -- function split_tuple_schema(inner::String) schemas = String[] i = 1 while i <= length(inner) s, i = parse_one_schema(inner, i) push!(schemas, s) end return schemas end function split_record_schema(inner::String) fields = Pair{String,String}[] i = 1 while i <= length(inner) # parse field name colon = findnext(':', inner, i) name = inner[i:colon-1] i = colon + 1 # parse field schema s, i = parse_one_schema(inner, i) push!(fields, name => s) if i <= length(inner) && inner[i] == ',' i += 1 end end return fields end function parse_one_schema(s::String, i::Int) if s[i] == 'a' inner, next_i = parse_one_schema(s, i + 1) return "a" * inner, next_i elseif s[i] == 't' # find matching ')' depth = 0 j = i + 1 while j <= length(s) if s[j] == '('; depth += 1; end if s[j] == ')'; depth -= 1; if depth == 0; break; end; end j += 1 end return s[i:j], j + 1 elseif s[i] == 'm' depth = 0 j = i + 1 while j <= length(s) if s[j] == '{'; depth += 1; end if s[j] == '}'; depth -= 1; if depth == 0; break; end; end j += 1 end return s[i:j], j + 1 elseif s[i] in ('i', 'u', 'f') # numeric: i4, i8, u4, u8, f4, f8 return s[i:i+1], i + 2 elseif s[i] == 's' return "s", i + 1 elseif s[i] == 'b' return "b", i + 1 else error("Unknown schema character: $(s[i]) at position $i in '$s'") end end end # module ================================================ FILE: data/lang/julia/init.sh ================================================ #!/bin/bash set -e MORLOC_HOME="$1" BUILD_DIR="$2" SANITIZE_FLAGS="$3" INCLUDE_DIR="$MORLOC_HOME/include" LIB_DIR="$MORLOC_HOME/lib" LANG_DIR="$MORLOC_HOME/lang/julia" mkdir -p "$LANG_DIR" # Install language descriptor and runtime files cp "$BUILD_DIR/lang.yaml" "$LANG_DIR/" cp "$BUILD_DIR/pool.jl" "$LANG_DIR/" cp "$BUILD_DIR/MorlocRuntime.jl" "$LANG_DIR/" # Compile juliabridge.c -> libjuliamorloc.so gcc -shared -fPIC -O2 $SANITIZE_FLAGS -I"$INCLUDE_DIR" -o "$LIB_DIR/libjuliamorloc.so" \ "$BUILD_DIR/juliabridge.c" -L"$LIB_DIR" -Wl,-rpath,"$LIB_DIR" -lmorloc -lpthread ================================================ FILE: data/lang/julia/juliabridge.c ================================================ /* juliabridge.c -- Thin C bridge between Julia and libmorloc. * * Compiled to libjuliamorloc.so, called from Julia via ccall. * Wraps libmorloc functions that use opaque structs (language_daemon_t, * morloc_call_t, Schema) or the ERRMSG pattern into simple pointer/int * interfaces that Julia's FFI can handle directly. */ #include "morloc.h" #include #include #include /* Thread-local error message buffer */ static __thread char jl_errbuf[4096]; static __thread char* jl_errmsg = NULL; static void clear_err(void) { jl_errmsg = NULL; jl_errbuf[0] = '\0'; } /* Get the last error message (returns "" if none). */ const char* jlmorloc_last_error(void) { return jl_errmsg ? jl_errmsg : ""; } /* -- Daemon lifecycle -- */ void* jlmorloc_start_daemon(const char* socket_path, const char* tmpdir, const char* shm_basename, size_t shm_size) { clear_err(); language_daemon_t* d = start_daemon(socket_path, tmpdir, shm_basename, shm_size, &jl_errmsg); if (!d && jl_errmsg) { snprintf(jl_errbuf, sizeof(jl_errbuf), "%s", jl_errmsg); jl_errmsg = jl_errbuf; } return (void*)d; } void jlmorloc_close_daemon(void* daemon) { language_daemon_t* d = (language_daemon_t*)daemon; close_daemon(&d); } int jlmorloc_wait_for_client(void* daemon) { clear_err(); return wait_for_client((language_daemon_t*)daemon, &jl_errmsg); } /* -- Packet I/O -- */ /* Returns a pointer to the packet bytes. Caller must NOT free this * directly -- it lives in shared memory or was allocated by libmorloc. */ uint8_t* jlmorloc_stream_from_client(int client_fd, size_t* out_size) { clear_err(); uint8_t* pkt = stream_from_client(client_fd, &jl_errmsg); if (pkt && out_size) { /* Packet size is in the first 4 bytes (little-endian uint32) */ uint32_t sz; memcpy(&sz, pkt, sizeof(sz)); *out_size = (size_t)sz; } return pkt; } int jlmorloc_send_packet(int client_fd, uint8_t* packet) { clear_err(); size_t sent = send_packet_to_foreign_server(client_fd, packet, &jl_errmsg); return sent > 0 ? 0 : -1; } void jlmorloc_close_socket(int fd) { close_socket(fd); } /* -- Packet classification -- */ int jlmorloc_is_ping(const uint8_t* packet) { clear_err(); return packet_is_ping(packet, &jl_errmsg) ? 1 : 0; } int jlmorloc_is_local_call(const uint8_t* packet) { clear_err(); return packet_is_local_call(packet, &jl_errmsg) ? 1 : 0; } int jlmorloc_is_remote_call(const uint8_t* packet) { clear_err(); return packet_is_remote_call(packet, &jl_errmsg) ? 1 : 0; } uint8_t* jlmorloc_pong(const uint8_t* packet) { clear_err(); return return_ping(packet, &jl_errmsg); } /* -- Call packet parsing -- */ /* Parse a call packet. Returns the manifold index via out_mid, * the number of arguments via out_nargs, and a pointer to the * morloc_call_t (which the caller must free via jlmorloc_free_call). */ void* jlmorloc_read_call(const uint8_t* packet, uint32_t* out_mid, size_t* out_nargs) { clear_err(); morloc_call_t* call = read_morloc_call_packet(packet, &jl_errmsg); if (!call) return NULL; *out_mid = call->midx; *out_nargs = call->nargs; return (void*)call; } /* Get the i-th argument packet from a parsed call. */ uint8_t* jlmorloc_call_arg(void* call_ptr, size_t i) { morloc_call_t* call = (morloc_call_t*)call_ptr; if (i >= call->nargs) return NULL; return call->args[i]; } void jlmorloc_free_call(void* call_ptr) { if (call_ptr) free_morloc_call((morloc_call_t*)call_ptr); } /* -- Msgpack bridge -- */ /* Convert msgpack bytes + schema string -> morloc data packet. * The schema_str is a compact type descriptor like "i4", "ai4", "m{x:f8}". */ uint8_t* jlmorloc_pack(const char* mpk, size_t mpk_size, const char* schema_str) { clear_err(); Schema* schema = parse_schema(schema_str, &jl_errmsg); if (!schema) return NULL; uint8_t* pkt = make_data_packet_from_mpk(mpk, mpk_size, schema); free_schema(schema); return pkt; } /* Convert a morloc data packet -> msgpack bytes. * Returns a malloc'd buffer; caller must free it. */ char* jlmorloc_unpack(const uint8_t* packet, const char* schema_str, size_t* out_size) { clear_err(); Schema* schema = parse_schema(schema_str, &jl_errmsg); if (!schema) return NULL; char* mpk = NULL; size_t mpk_size = 0; int ok = get_data_packet_as_mpk(packet, schema, &mpk, &mpk_size, &jl_errmsg); free_schema(schema); if (!ok) return NULL; *out_size = mpk_size; return mpk; } /* -- Error packet -- */ uint8_t* jlmorloc_make_fail_packet(const char* msg) { return make_fail_packet(msg); } /* -- Foreign call (cross-pool IPC) -- */ uint8_t* jlmorloc_foreign_call(const char* tmpdir, const char* socket_name, uint32_t mid, uint8_t** arg_packets, size_t nargs) { clear_err(); /* Build the call packet */ uint8_t* call_pkt = make_morloc_local_call_packet( mid, (const uint8_t**)arg_packets, nargs, &jl_errmsg); if (!call_pkt) return NULL; /* Build the socket path */ size_t pathlen = strlen(tmpdir) + 1 + strlen(socket_name) + 1; char* socket_path = (char*)malloc(pathlen); snprintf(socket_path, pathlen, "%s/%s", tmpdir, socket_name); /* Send and receive */ uint8_t* result = send_and_receive_over_socket(socket_path, call_pkt, &jl_errmsg); free(socket_path); free(call_pkt); return result; } /* -- Shared memory init (needed before daemon start in some cases) -- */ int jlmorloc_shinit(const char* basename, int volume, size_t size) { clear_err(); return shinit(basename, volume, size, &jl_errmsg) ? 0 : -1; } void jlmorloc_set_fallback_dir(const char* dir) { shm_set_fallback_dir(dir); } ================================================ FILE: data/lang/julia/lang.yaml ================================================ # Julia language descriptor for morloc compiler # Metadata fields (read by LangRegistry) + descriptor fields (read by generic translator) # Identity and metadata name: jl extension: jl aliases: ["julia"] is_compiled: false run_command: ["julia"] serial_type: "bytes" cost: 5 # Descriptor identity (kept for compatibility with generic translator) ldName: julia ldExtension: jl # Literals ldBoolTrue: "true" ldBoolFalse: "false" ldNullLiteral: "nothing" # Constructors ldListStyle: bracket ldTupleConstructor: "" ldRecordConstructor: "Dict" ldRecordSeparator: "=>" # Access styles ldIndexStyle: one_bracket ldKeyAccess: bracket ldFieldAccess: dot # Serialize/deserialize function names (from juliamorloc.jl) ldSerializeFn: "MorlocRuntime.put_value" ldDeserializeFn: "MorlocRuntime.get_value" ldIntrinsicPrefix: "MorlocRuntime." # Foreign call ldForeignCallFn: "MorlocRuntime.foreign_call" ldForeignCallIntSuffix: "" # Import syntax ldQualifiedImports: false ldIncludeRelToFile: true # Template fields ldAssignOp: "=" ldLambdaTemplate: "({{args}}) -> {{body}}" ldDoBlockExpr: "(() -> {{expr}})" ldDoBlockBlock: "" ldPartialTemplate: "({{bound_args}}) -> {{fn}}({{all_args}})" ldImportTemplate: "include(\"{{path}}\")" ldSocketPathTemplate: "joinpath(global_state[\"tmpdir\"], {{socket}})" ldResourcePackTemplate: "[{{mem}}, {{time}}, {{cpus}}, {{gpus}}]" ldReturnTemplate: "return({{expr}})" ldFuncDefHeader: "function {{name}}({{args}})" ldBlockStyle: end_keyword ldBlockEnd: "end" ldErrorWrapOpen: "" ldErrorWrapClose: [] ldPatternStyle: concat_call ldConcatFn: "string" ldQuoteTerminator: '"' ldQuoteTerminatorEsc: '\"' ldMapStyle: list_comprehension ldDispatchLocalHeader: "dispatch = Dict(" ldDispatchLocalEntry: " {{mid}} => {{name}}," ldDispatchLocalFooter: ")" ldDispatchRemoteHeader: "remote_dispatch = Dict(" ldDispatchRemoteEntry: " {{mid}} => {{name}}_remote," ldDispatchRemoteFooter: ")" # Pool template (loaded from pool.jl at runtime, left empty here) ldPoolTemplate: "" ldBreakMarker: "# <<>>" ldCommentMarker: "#" ================================================ FILE: data/lang/julia/pool.jl ================================================ # Morloc Julia pool template # Single-threaded daemon: accepts one connection at a time. # Add morloc runtime to load path const MORLOC_HOME = get(ENV, "MORLOC_HOME", joinpath(homedir(), ".local", "share", "morloc")) push!(LOAD_PATH, joinpath(MORLOC_HOME, "lang", "julia")) # Global state accessible to manifolds (e.g., tmpdir for foreign calls) global_state = Dict{String,String}() # <<>> using MorlocRuntime # <<>> # <<>> function run_job(client_fd) try client_data = MorlocRuntime.stream_from_client(client_fd) if MorlocRuntime.is_local_call(client_data) (mid, args) = MorlocRuntime.read_morloc_call_packet(client_data) try result = dispatch[mid](args...) catch e result = MorlocRuntime.make_fail_packet(string(e)) end elseif MorlocRuntime.is_remote_call(client_data) (mid, args) = MorlocRuntime.read_morloc_call_packet(client_data) try result = remote_dispatch[mid](args...) catch e result = MorlocRuntime.make_fail_packet(string(e)) end elseif MorlocRuntime.is_ping(client_data) result = MorlocRuntime.pong(client_data) else error("Expected a ping or call type packet") end MorlocRuntime.send_packet_to_foreign_server(client_fd, result) catch e # Best-effort: wrap the error in a fail packet and send it back so the # caller gets a structured error instead of hanging on a closed socket. # Includes the full backtrace so context propagates through the stack. msg = sprint(showerror, e, catch_backtrace()) try result = MorlocRuntime.make_fail_packet(msg) MorlocRuntime.send_packet_to_foreign_server(client_fd, result) catch # Client may already be gone (timed-out ping, broken pipe); ignore. end @error "job failed" exception=(e, catch_backtrace()) finally MorlocRuntime.close_socket(client_fd) end end function main() socket_path = ARGS[1] tmpdir = ARGS[2] shm_basename = ARGS[3] global_state["tmpdir"] = tmpdir daemon = MorlocRuntime.start_daemon(socket_path, tmpdir, shm_basename, 0xffff) # Simple signal handling running = Ref(true) @async begin try while running[] sleep(0.01) end catch end end try while running[] client_fd = MorlocRuntime.wait_for_client(daemon) if client_fd > 0 run_job(client_fd) end end catch e if !(e isa InterruptException) @error "Pool error" exception=(e, catch_backtrace()) end finally MorlocRuntime.close_daemon(daemon) end end main() ================================================ FILE: data/lang/languages.yaml ================================================ # Pairwise language costs for the morloc optimizer # # Same-language function overhead (intra-language call cost) same_language_costs: c: 1 cpp: 1 py: 10 r: 20 # Cost of calling INTO a language from a different language (IPC overhead) cross_language_costs: c: 1001 cpp: 1000 py: 10000 r: 40000 # Special optimized pairs (from -> to) that bypass normal IPC optimized_pairs: - from: cpp to: c cost: 1 # Defaults for unknown/plugin languages default_same_language: 10 default_cross_language: 10000 ================================================ FILE: data/lang/py/Makefile ================================================ all: python3 setup.py build_ext --inplace cp -fs pymorloc.cpython* pymorloc ================================================ FILE: data/lang/py/init.sh ================================================ #!/bin/bash set -e export MORLOC_HOME="$1" BUILD_DIR="$2" SANITIZE_FLAGS="$3" OPT_DIR="$MORLOC_HOME/opt" # Clean stale build artifacts rm -f "$OPT_DIR"/pymorloc.cpython* "$OPT_DIR/pymorloc" rm -rf "$OPT_DIR/build" # Copy files to opt dir cp "$BUILD_DIR/pymorloc.c" "$OPT_DIR/" cp "$BUILD_DIR/setup.py" "$OPT_DIR/" cp "$BUILD_DIR/Makefile" "$OPT_DIR/" # Build pymorloc extension export CFLAGS="$SANITIZE_FLAGS" make -C "$OPT_DIR" -f Makefile ================================================ FILE: data/lang/py/lang.yaml ================================================ # Python language descriptor for morloc compiler # Metadata fields (read by LangRegistry) + descriptor fields (read by generic translator) # Identity and metadata name: py extension: py aliases: ["python", "python3"] is_compiled: false run_command: ["python3"] serial_type: "str" cost: 3 preamble: - 'sys.path = [os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..")), os.path.expanduser("."), os.path.expanduser("{{opt}}"), os.path.expanduser("{{lib}}")] + sys.path' - "import importlib" - "import pymorloc as morloc" # Literals ldBoolTrue: "True" ldBoolFalse: "False" ldNullLiteral: "None" # Constructors ldListStyle: bracket ldTupleConstructor: "" ldRecordConstructor: "OrderedDict" ldRecordSeparator: "=" # Access styles ldIndexStyle: zero_bracket ldKeyAccess: "bracket" ldFieldAccess: dot # Serialize/deserialize ldSerializeFn: "morloc.put_value" ldDeserializeFn: "morloc.get_value" ldIntrinsicPrefix: "morloc." # Foreign call ldForeignCallFn: "morloc.foreign_call" ldForeignCallIntSuffix: "" # Remote call ldRemoteCallFn: "morloc.remote_call" # Record handling ldDictStyleRecords: true ldQuoteRecordKeys: false # Import syntax ldQualifiedImports: true ldIncludeRelToFile: false # Template fields ldAssignOp: "=" ldLambdaTemplate: "lambda {{args}}: {{body}}" ldDoBlockExpr: "(lambda: {{expr}})" ldDoBlockBlock: "" ldPartialTemplate: "functools.partial({{fn_with_context}})" ldImportTemplate: "{{namespace}} = importlib.import_module(\"{{module_path}}\")" ldSocketPathTemplate: "os.path.join(global_state[\"tmpdir\"], {{socket}})" ldResourcePackTemplate: "struct.pack('iiii', {{mem}}, {{time}}, {{cpus}}, {{gpus}})" ldReturnTemplate: "return({{expr}})" ldFuncDefHeader: "def {{name}}({{args}}):" ldBlockStyle: indent ldBlockEnd: "" ldErrorWrapOpen: "try:" ldErrorWrapClose: - "except Exception as e:" - " raise RuntimeError(f\"Error (pool daemon in {{name}}):\\n{e!s}\")" ldPatternStyle: fstring ldQuoteTerminator: '"""' ldQuoteTerminatorEsc: '\"\"\"' ldMapStyle: loop_append ldDispatchLocalHeader: "dispatch = {" ldDispatchLocalEntry: " {{mid}}: {{name}}," ldDispatchLocalFooter: "}" ldDispatchRemoteHeader: "remote_dispatch = {" ldDispatchRemoteEntry: " {{mid}}: {{name}}_remote," ldDispatchRemoteFooter: "}" # Pool template (loaded from pool.py, left empty here) ldPoolTemplate: "" ldBreakMarker: "# <<>>" ldCommentMarker: "#" ================================================ FILE: data/lang/py/pool.py ================================================ import signal import sys import select import os # required for setting path to morloc dependencies import time import copy import array import struct import socket as _socket from collections import OrderedDict from multiprocessing import Process, Value, RawValue import ctypes import functools # Global variables for clean signal handling daemon = None workers = [] global_state = dict() _shutdown_wakeup_fd = -1 # AUTO include sources start # <<>> # AUTO include sources end # Dynamic worker spawning: monkey-patch foreign_call to track busy workers. # Workers atomically increment busy_count before a foreign_call and decrement # after. When busy_count reaches total_workers, a byte is written to a wake-up # pipe to tell the main process to spawn a new worker. _original_foreign_call = morloc.foreign_call _busy_ref = None _total_ref = None _wakeup_fd = -1 def _init_worker_tracking(busy, total, wakeup_fd): global _busy_ref, _total_ref, _wakeup_fd _busy_ref = busy _total_ref = total _wakeup_fd = wakeup_fd morloc.foreign_call = _tracked_foreign_call def _tracked_foreign_call(*args): prev = _busy_ref.value _busy_ref.value = prev + 1 if prev + 1 >= _total_ref.value and _wakeup_fd >= 0: try: os.write(_wakeup_fd, b'!') except OSError: pass try: return _original_foreign_call(*args) finally: _busy_ref.value -= 1 # AUTO include manifolds start # <<>> # AUTO include manifolds end # AUTO include dispatch start # <<>> # AUTO include dispatch end def run_job(client_fd: int) -> None: try: # Free SHM from previous dispatch result (consumed by caller) morloc.flush_shm_tracker() client_data = morloc.stream_from_client(client_fd) if(morloc.is_local_call(client_data)): (mid, args) = morloc.read_morloc_call_packet(client_data) try: result = dispatch[mid](*args) except Exception as e: result = morloc.make_fail_packet(str(e)) elif(morloc.is_remote_call(client_data)): (mid, args) = morloc.read_morloc_call_packet(client_data) try: result = remote_dispatch[mid](*args) except Exception as e: result = morloc.make_fail_packet(str(e)) elif(morloc.is_ping(client_data)): result = morloc.pong(client_data) else: raise ValueError("Expected a ping or call type packet") # Flush stdout BEFORE sending the result back. The nexus prints its # own output (the return value) right after receiving this response. # Both processes share the same stdout fd, so if we flush after sending, # the nexus can print first, causing out-of-order output. sys.stdout.flush() morloc.send_packet_to_foreign_server(client_fd, result) except Exception as e: # Try to send a fail packet back to the caller before giving up. # This may fail (e.g., broken pipe from a timed-out ping), which is OK. try: result = morloc.make_fail_packet(str(e)) morloc.send_packet_to_foreign_server(client_fd, result) except Exception: pass print(f"job failed: {e!s}", file=sys.stderr) finally: # Safety-net flush for any output from error handling paths sys.stdout.flush() # close child copy morloc.close_socket(client_fd) def _send_fd(sock, fd): """Send a file descriptor over a Unix domain socket.""" sock.sendmsg([b'\x00'], [(_socket.SOL_SOCKET, _socket.SCM_RIGHTS, array.array('i', [fd]))]) def _recv_fd(sock): """Receive a file descriptor from a Unix domain socket.""" msg, ancdata, flags, addr = sock.recvmsg(1, _socket.CMSG_SPACE(4)) if not msg and not ancdata: raise EOFError("Connection closed") for cmsg_level, cmsg_type, cmsg_data in ancdata: if (cmsg_level == _socket.SOL_SOCKET and cmsg_type == _socket.SCM_RIGHTS): a = array.array('i') a.frombytes(cmsg_data[:4]) return a[0] raise RuntimeError("No fd received in ancillary data") WORKER_IDLE_TIMEOUT = 5.0 # seconds before an idle worker exits def worker_process(job_fd, tmpdir, shm_basename, shutdown_flag, busy_count, total_workers, wakeup_w): # Reset signal handlers inherited from main. If user code inside run_job # calls multiprocessing.Pool (or anything else that forks and later # SIGTERMs its own children), those grandchildren would otherwise inherit # main's signal_handler and flip the shared shutdown_flag, causing main # to SIGKILL this worker mid-response. See the multiprocessing-py-1 bug. signal.signal(signal.SIGTERM, signal.SIG_DFL) signal.signal(signal.SIGINT, signal.SIG_DFL) morloc.set_fallback_dir(tmpdir) morloc.shinit(shm_basename, 0, 0xffff) _init_worker_tracking(busy_count, total_workers, wakeup_w) sock = _socket.fromfd(job_fd, _socket.AF_UNIX, _socket.SOCK_STREAM) os.close(job_fd) # sock owns a dup'd copy last_activity = time.monotonic() try: while not shutdown_flag.value: rlist, _, _ = select.select([sock.fileno()], [], [], 0.01) if shutdown_flag.value: break if rlist: try: client_fd = _recv_fd(sock) run_job(client_fd) last_activity = time.monotonic() except (EOFError, OSError): break elif total_workers.value > 1 and time.monotonic() - last_activity > WORKER_IDLE_TIMEOUT: break except BaseException as e: # Catch-all for errors that escape run_job's own exception handling: # MemoryError, KeyboardInterrupt, SystemExit, or bugs in the worker # loop itself. Without this, the worker dies silently and the nexus # only sees "failed to read response header" with no indication of # what went wrong in the pool. # # Race condition: the nexus detects the broken socket and may start # its clean_exit tear-down (SIGTERM -> SIGKILL) while this print is # still buffered. We flush immediately to maximize the chance the # message reaches the terminal before we are killed. stderr is # line-buffered (set in __main__), but the flush is a safety net for # edge cases (redirected stderr, forked-process buffer state). import traceback print(f"morloc pool worker fatal error: {e!s}", file=sys.stderr) traceback.print_exc(file=sys.stderr) sys.stderr.flush() finally: sock.close() def signal_handler(sig, frame): global daemon # Ignore further SIGTERM/SIGINT during cleanup. Python processes pending # signals between bytecodes, including while another signal handler is # running, so a second SIGTERM arriving mid-cleanup would otherwise # re-enter this handler and double-free the daemon pointer. try: signal.signal(signal.SIGTERM, signal.SIG_IGN) signal.signal(signal.SIGINT, signal.SIG_IGN) except Exception: pass shutdown_flag.value = True if _shutdown_wakeup_fd >= 0: try: os.write(_shutdown_wakeup_fd, b'!') except OSError: pass # Capture the daemon pointer into a local and clear the global BEFORE # invoking close_daemon. If a pending signal still slips through and # re-enters this handler, it will see daemon=None and skip the free. d = daemon daemon = None if d is not None: morloc.close_daemon(d) def client_listener(job_fd, socket_path, tmpdir, shm_basename, shutdown_flag): global daemon daemon = morloc.start_daemon(socket_path, tmpdir, shm_basename, 0xffff) sock = _socket.fromfd(job_fd, _socket.AF_UNIX, _socket.SOCK_STREAM) os.close(job_fd) # sock owns a dup'd copy while not shutdown_flag.value: try: client_fd = morloc.wait_for_client(daemon) except Exception as e: print(f"In python daemon, failed to connect to client: {e!s}", file=sys.stderr) continue if client_fd > 0: try: _send_fd(sock, client_fd) except Exception as e: print(f"In python daemon, failed to start worker: {e!s}", file=sys.stderr) finally: morloc.close_socket(client_fd) sock.close() if __name__ == "__main__": # Line-buffer stderr so diagnostic output is not lost when pool is killed. # stdout is left fully buffered for performance (genome-scale piping) and # flushed explicitly after each job and during shutdown. sys.stderr.reconfigure(line_buffering=True) shutdown_flag = Value('b', False) # Shared flag signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) # Health check: confirm imports loaded and print version if len(sys.argv) > 1 and sys.argv[1] == "--health": sys.stdout.write('{"status":"ok","version":"__MORLOC_VERSION__"}\n') sys.exit(0) # Process arguments passed from the nexus try: socket_path = sys.argv[1] tmpdir = sys.argv[2] shm_basename = sys.argv[3] except IndexError: print("Usage: script.py ") sys.exit(1) global_state["tmpdir"] = tmpdir # Shared job queue: listener writes fds to write_sock, workers read from read_sock. # Only idle workers (blocked in recvmsg) pick up jobs, preventing the round-robin # deadlock where a callback gets dispatched to a busy worker. read_sock, write_sock = _socket.socketpair(_socket.AF_UNIX, _socket.SOCK_STREAM) num_workers = 1 workers = [] # Shared counters for dynamic worker spawning. # Workers increment busy_count before foreign_call and decrement after. # When all workers are busy, main process spawns a new one. busy_count = RawValue(ctypes.c_int, 0) total_workers = RawValue(ctypes.c_int, num_workers) wakeup_r, wakeup_w = os.pipe() os.set_blocking(wakeup_r, False) _shutdown_wakeup_fd = wakeup_w # Keep a dup of the read end so we can spawn new workers later spare_read_fd = os.dup(read_sock.fileno()) for i in range(num_workers): worker = Process(target=worker_process, args=(read_sock.fileno(), tmpdir, shm_basename, shutdown_flag, busy_count, total_workers, wakeup_w)) worker.start() workers.append(worker) read_sock.close() # main/listener don't need the read end (spare_read_fd kept) # Start client listener process listener_process = Process( target=client_listener, args=(write_sock.fileno(), socket_path, tmpdir, shm_basename, shutdown_flag) ) listener_process.start() write_sock.close() # main doesn't need the write end # Main loop: monitor wake-up pipe, spawn new workers when all are busy, # and reap idle workers that have exited. while not shutdown_flag.value: rlist, _, _ = select.select([wakeup_r], [], [], 0.01) if rlist: try: os.read(wakeup_r, 4096) # drain pipe except OSError: pass # Reap dead workers (idle timeout or error exit) alive = [] for w in workers: if w.is_alive(): alive.append(w) else: w.join(timeout=0) w.close() workers = alive total_workers.value = max(1, len(workers)) # Spawn a new worker if all are busy (or all have exited) if len(workers) == 0 or busy_count.value >= total_workers.value: w = Process(target=worker_process, args=(spare_read_fd, tmpdir, shm_basename, shutdown_flag, busy_count, total_workers, wakeup_w)) w.start() workers.append(w) total_workers.value = len(workers) # Shutdown sequence os.close(wakeup_r) os.close(wakeup_w) os.close(spare_read_fd) # 1. Stop listener first listener_process.terminate() listener_process.join(timeout=0.001) listener_process.kill() listener_process.join() # Final blocking reap listener_process.close() # 2. Terminate workers with escalating force for p in workers: if p.is_alive(): p.kill() p.join() # Final blocking reap p.close() sys.exit(0) ================================================ FILE: data/lang/py/pymorloc.c ================================================ #define PY_SSIZE_T_CLEAN #include "morloc.h" #include "Python.h" #include #include #include #include // boilerplate for numpy support #define PY_ARRAY_UNIQUE_SYMBOL MORLOC_ARRAY_API #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include // SHM tracker for _put_value allocations (deferred cleanup) #define SHM_TRACKER_INIT_CAP 16 typedef struct { absptr_t ptr; Schema* schema; } shm_entry_t; static shm_entry_t* shm_tracker = NULL; static size_t shm_tracker_count = 0; static size_t shm_tracker_cap = 0; static void shm_tracker_push(absptr_t ptr, Schema* schema) { if (shm_tracker_count >= shm_tracker_cap) { size_t new_cap = shm_tracker_cap ? shm_tracker_cap * 2 : SHM_TRACKER_INIT_CAP; shm_entry_t* new_buf = (shm_entry_t*)realloc(shm_tracker, new_cap * sizeof(shm_entry_t)); if (!new_buf) return; shm_tracker = new_buf; shm_tracker_cap = new_cap; } shm_tracker[shm_tracker_count].ptr = ptr; shm_tracker[shm_tracker_count].schema = schema; shm_tracker_count++; } static void flush_shm_tracker(void) { for (size_t i = 0; i < shm_tracker_count; i++) { char* err = NULL; block_header_t* blk = (block_header_t*)((char*)shm_tracker[i].ptr - sizeof(block_header_t)); if (shm_tracker[i].schema && blk->reference_count <= 1) { shfree_by_schema(shm_tracker[i].ptr, shm_tracker[i].schema, &err); if (err) { free(err); err = NULL; } } shfree(shm_tracker[i].ptr, &err); if (err) { free(err); } if (shm_tracker[i].schema) { free_schema(shm_tracker[i].schema); } } shm_tracker_count = 0; } #define NOTHING #define MAYFAIL \ char* child_errmsg_ = NULL; \ // Returns a strdup'd string that the caller must free, or NULL. char* get_prior_err(){ char* prior_err = NULL; if (PyErr_Occurred()) { // Fetch existing exception PyObject *type, *value, *traceback; PyErr_Fetch(&type, &value, &traceback); // Extract error message PyObject* str = PyObject_Str(value); // Convert exception to string if (str) { const char* raw = PyUnicode_AsUTF8(str); if (raw) { prior_err = strdup(raw); } Py_DECREF(str); } Py_XDECREF(type); Py_XDECREF(value); Py_XDECREF(traceback); } return prior_err; } #define PyTRY(fun, ...) \ fun(__VA_ARGS__ __VA_OPT__(,) &child_errmsg_); \ if(child_errmsg_ != NULL){ \ char* prior_err = get_prior_err(); \ if(prior_err == NULL){ \ PyErr_Format(PyExc_RuntimeError, "Error (%s:%d in %s):\n%s", __FILE__, __LINE__, __func__, child_errmsg_); \ } else { \ PyErr_Format(PyExc_RuntimeError, "%s\nError (%s:%d in %s):\n%s", prior_err, __FILE__, __LINE__, __func__, child_errmsg_); \ free(prior_err); \ } \ goto error; \ } #define PyRAISE(msg, ...) { \ char* prior_err_ = get_prior_err(); \ if(prior_err_ == NULL){ \ PyErr_Format(PyExc_RuntimeError, "Error (%s:%d in %s):\n" msg "\n", __FILE__, __LINE__, __func__, ##__VA_ARGS__); \ } else { \ PyErr_Format(PyExc_RuntimeError, "%s\nError (%s:%d in %s):\n" msg "\n", prior_err_, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \ free(prior_err_); \ } \ goto error; \ } #define PyTRACE(cond) \ if(cond){ \ char* prior_err = get_prior_err(); \ if(prior_err != NULL){ \ PyErr_Format(PyExc_TypeError, "Error (%s:%d in %s):\n%s", __FILE__, __LINE__, __func__, prior_err); \ free(prior_err); \ goto error; \ } \ } PyObject* numpy_module = NULL; // This function will be called to import numpy if, and only if, a numpy feature // is used. This avoids the agonizingly long numpy import time. void* import_numpy() { numpy_module = PyImport_ImportModule("numpy"); if(numpy_module == NULL){ PyRAISE("NumPy is not available"); } import_array(); error: return NULL; } // Map morloc schema element type to numpy type number static int schema_to_npy_type(morloc_serial_type type) { switch (type) { case MORLOC_BOOL: return NPY_BOOL; case MORLOC_SINT8: return NPY_INT8; case MORLOC_SINT16: return NPY_INT16; case MORLOC_SINT32: return NPY_INT32; case MORLOC_SINT64: return NPY_INT64; case MORLOC_UINT8: return NPY_UINT8; case MORLOC_UINT16: return NPY_UINT16; case MORLOC_UINT32: return NPY_UINT32; case MORLOC_UINT64: return NPY_UINT64; case MORLOC_FLOAT32: return NPY_FLOAT32; case MORLOC_FLOAT64: return NPY_FLOAT64; default: return -1; } } PyObject* fromAnything(const Schema* schema, const void* data, const void* base_ptr){ MAYFAIL PyObject* obj = NULL; switch (schema->type) { case MORLOC_NIL: Py_RETURN_NONE; case MORLOC_BOOL: obj = PyBool_FromLong(*(bool*)data); break; case MORLOC_SINT8: obj = PyLong_FromLong(*(int8_t*)data); break; case MORLOC_SINT16: obj = PyLong_FromLong(*(int16_t*)data); break; case MORLOC_SINT32: obj = PyLong_FromLong(*(int32_t*)data); break; case MORLOC_SINT64: obj = PyLong_FromLongLong(*(int64_t*)data); break; case MORLOC_UINT8: obj = PyLong_FromUnsignedLong(*(uint8_t*)data); break; case MORLOC_UINT16: obj = PyLong_FromUnsignedLong(*(uint16_t*)data); break; case MORLOC_UINT32: obj = PyLong_FromUnsignedLong(*(uint32_t*)data); break; case MORLOC_UINT64: obj = PyLong_FromUnsignedLongLong(*(uint64_t*)data); break; case MORLOC_FLOAT32: obj = PyFloat_FromDouble(*(float*)data); break; case MORLOC_FLOAT64: obj = PyFloat_FromDouble(*(double*)data); break; case MORLOC_STRING: { Array* str_array = (Array*)data; void* tmp_ptr = NULL; if (str_array->size != 0) { tmp_ptr = PyTRY(resolve_relptr, str_array->data, base_ptr); } if (schema->hint != NULL && strcmp(schema->hint, "bytes") == 0) { // load binary data as a python bytes object if (str_array->size == 0) { obj = PyBytes_FromStringAndSize("", 0); // empty bytes object } else { obj = PyBytes_FromStringAndSize(tmp_ptr, str_array->size); } if (!obj) { PyRAISE("Failed to parse data as bytes"); } } else if (schema->hint != NULL && strcmp(schema->hint, "bytearray") == 0) { // load binary data as a python bytearray object if (str_array->size == 0) { obj = PyByteArray_FromStringAndSize("", 0); // empty bytearray object } else { obj = PyByteArray_FromStringAndSize(tmp_ptr, str_array->size); } if (!obj) { PyRAISE("Failed to parse data as bytearray"); } } else { // otherwise, load this as a str type if (str_array->size == 0) { obj = PyUnicode_New(0, 127); // empty string object } else { obj = PyUnicode_FromStringAndSize(tmp_ptr, str_array->size); } if (!obj) { PyRAISE("Failed to parse data as string"); } } break; } case MORLOC_ARRAY: { Array* array = (Array*)data; if (schema->hint != NULL && strcmp(schema->hint, "numpy.ndarray") == 0) { import_numpy(); Schema* element_schema = schema->parameters[0]; npy_intp dims[] = {array->size}; void* absptr = NULL; int nd = 1; // number of dimensions int type_num; // Determine the NumPy type number based on the element schema switch (element_schema->type) { case MORLOC_BOOL: type_num = NPY_BOOL; break; case MORLOC_SINT8: type_num = NPY_INT8; break; case MORLOC_SINT16: type_num = NPY_INT16; break; case MORLOC_SINT32: type_num = NPY_INT32; break; case MORLOC_SINT64: type_num = NPY_INT64; break; case MORLOC_UINT8: type_num = NPY_UINT8; break; case MORLOC_UINT16: type_num = NPY_UINT16; break; case MORLOC_UINT32: type_num = NPY_UINT32; break; case MORLOC_UINT64: type_num = NPY_UINT64; break; case MORLOC_FLOAT32: type_num = NPY_FLOAT32; break; case MORLOC_FLOAT64: type_num = NPY_FLOAT64; break; default: PyRAISE("Unsupported element type for NumPy array"); } absptr = PyTRY(resolve_relptr, array->data, base_ptr); // Create the NumPy array obj = PyArray_SimpleNewFromData(nd, dims, type_num, absptr); if(obj == NULL) { PyRAISE("Failed to parse data"); } // Note that we do not want to give ownership to Python // This is shared memory, which means, python should not mutate // it. } else if (schema->hint != NULL && strcmp(schema->hint, "bytearray") == 0) { // Create a Python bytearray object void* absptr = PyTRY(resolve_relptr, array->data, base_ptr); obj = PyByteArray_FromStringAndSize((const char*)absptr, array->size); if (!obj) { PyErr_SetString(PyExc_TypeError, "Failed to create bytearray"); goto error; } // Note: Similar to the numpy case, we don't want to give ownership to Python. // The bytearray is created from a copy of the data, so no additional handling is needed. } else if (schema->parameters[0]->type == MORLOC_UINT8) { // Create a Python bytes object for UINT8 arrays void* tmp_ptr = PyTRY(resolve_relptr, array->data, base_ptr); obj = PyBytes_FromStringAndSize((const char*)tmp_ptr, array->size); if (obj == NULL) { PyRAISE("Failed to one bytes") } } else if (schema->hint == NULL || (schema->hint != NULL && strcmp(schema->hint, "list") == 0)) { // For other types, create a standard list obj = PyList_New(array->size); if(obj == NULL){ PyRAISE("Failed to one string"); } if(array->size > 0){ char* start = (char*) PyTRY(resolve_relptr, array->data, base_ptr); size_t width = schema->parameters[0]->width; Schema* element_schema = schema->parameters[0]; for (size_t i = 0; i < array->size; i++) { PyObject* item = fromAnything(element_schema, start + width * i, base_ptr); if (!item || PyList_SetItem(obj, i, item) < 0) { Py_XDECREF(item); PyRAISE("Failed to access element in list") } } } } else { PyRAISE("Unexpected array hint"); } break; } case MORLOC_TUPLE: { obj = PyTuple_New(schema->size); if(obj == NULL){ PyRAISE("Failed in tuple"); } for (size_t i = 0; i < schema->size; i++) { void* item_ptr = (char*)data + schema->offsets[i]; PyObject* item = fromAnything(schema->parameters[i], item_ptr, base_ptr); if (!item || PyTuple_SetItem(obj, i, item) < 0) { Py_XDECREF(item); PyRAISE("Failed to access tuple element"); } } break; } case MORLOC_MAP: { obj = PyDict_New(); if(obj == NULL){ PyRAISE("Failed in map"); } for (size_t i = 0; i < schema->size; i++) { void* item_ptr = (char*)data + schema->offsets[i]; PyObject* value = fromAnything(schema->parameters[i], item_ptr, base_ptr); PyObject* key = PyUnicode_FromString(schema->keys[i]); if (!value || !key || PyDict_SetItem(obj, key, value) < 0) { Py_XDECREF(value); Py_XDECREF(key); PyRAISE("Failed to access map element"); } Py_DECREF(key); Py_DECREF(value); } break; } case MORLOC_OPTIONAL: { uint8_t tag = *(const uint8_t*)data; if (tag == 0) { Py_RETURN_NONE; } obj = fromAnything(schema->parameters[0], (const char*)data + schema->offsets[0], base_ptr); if (!obj) { PyRAISE("Failed to deserialize optional inner value"); } break; } case MORLOC_TENSOR: { import_numpy(); const Tensor* tensor = (const Tensor*)data; size_t ndim = schema_tensor_ndim(schema); int type_num = schema_to_npy_type(schema->parameters[0]->type); if (type_num < 0) { PyRAISE("Unsupported tensor element type"); } if (tensor->total_elements == 0) { npy_intp zero_dims[1] = {0}; obj = PyArray_SimpleNew(1, zero_dims, type_num); break; } const int64_t* shape = (const int64_t*)resolve_relptr(tensor->shape, base_ptr, NULL); const void* tdata = resolve_relptr(tensor->data, base_ptr, NULL); npy_intp np_dims[5]; for (size_t i = 0; i < ndim; i++) np_dims[i] = (npy_intp)shape[i]; // Create numpy array as a copy (R/W) from the data obj = PyArray_SimpleNewFromData((int)ndim, np_dims, type_num, (void*)tdata); if (!obj) { PyRAISE("Failed to create numpy array from tensor"); } // Make a copy so the array owns its data (SHM may be freed) PyObject* owned = PyArray_NewCopy((PyArrayObject*)obj, NPY_CORDER); Py_DECREF(obj); obj = owned; if (!obj) { PyRAISE("Failed to copy tensor data"); } break; } default: PyRAISE("Unsupported schema type"); } return obj; error: Py_XDECREF(obj); return NULL; } #define HANDLE_SINT_TYPE(CTYPE, PYLONG_FUNC, MIN, MAX) \ do { \ if (!PyLong_Check(obj)) { \ PyErr_Format(PyExc_TypeError, "Expected int for %s, but got %s", #CTYPE, Py_TYPE(obj)->tp_name); \ goto error; \ } \ long long value = PYLONG_FUNC(obj); \ if (value < MIN || value > MAX || PyErr_Occurred()) { \ PyErr_Format(PyExc_OverflowError, "Integer overflow for %s", #CTYPE); \ goto error; \ } \ *(CTYPE*)dest = (CTYPE)value; \ } while(0) #define HANDLE_UINT_TYPE(CTYPE, PYLONG_FUNC, MAX) \ do { \ if (!PyLong_Check(obj)) { \ PyErr_Format(PyExc_TypeError, "Expected int for %s, but got %s", #CTYPE, Py_TYPE(obj)->tp_name); \ goto error; \ } \ unsigned long long value = PYLONG_FUNC(obj); \ if (value > MAX || PyErr_Occurred()) { \ PyErr_Format(PyExc_OverflowError, "Integer overflow for %s", #CTYPE); \ goto error; \ } \ *(CTYPE*)dest = (CTYPE)value; \ } while(0) ssize_t get_shm_size(const Schema* schema, PyObject* obj) { switch (schema->type) { case MORLOC_NIL: case MORLOC_BOOL: case MORLOC_SINT8: case MORLOC_SINT16: case MORLOC_SINT32: case MORLOC_SINT64: case MORLOC_UINT8: case MORLOC_UINT16: case MORLOC_UINT32: case MORLOC_UINT64: case MORLOC_FLOAT32: case MORLOC_FLOAT64: return schema->width; case MORLOC_STRING: case MORLOC_ARRAY: if (schema->type == MORLOC_STRING && !(PyUnicode_Check(obj) || PyBytes_Check(obj) || PyByteArray_Check(obj) )) { PyRAISE("Expected str or bytes for MORLOC_STRING, but got %s", Py_TYPE(obj)->tp_name); } if (schema->type == MORLOC_ARRAY && !(PyList_Check(obj) || PyBytes_Check(obj) || PyByteArray_Check(obj) || PyObject_HasAttrString(obj, "__array_interface__"))) { PyRAISE("Expected list, bytes, bytearray, or numpy array for MORLOC_ARRAY, but got %s", Py_TYPE(obj)->tp_name); } { ssize_t required_size = 0; // worst-case cursor alignment padding for element data required_size += (ssize_t)(schema_alignment(schema->parameters[0]) - 1); if (PyList_Check(obj)) { Py_ssize_t list_size = PyList_Size(obj); size_t element_width = schema->parameters[0]->width; switch(schema->parameters[0]->type){ case MORLOC_NIL: case MORLOC_BOOL: case MORLOC_SINT8: case MORLOC_SINT16: case MORLOC_SINT32: case MORLOC_SINT64: case MORLOC_UINT8: case MORLOC_UINT16: case MORLOC_UINT32: case MORLOC_UINT64: case MORLOC_FLOAT32: case MORLOC_FLOAT64: required_size += list_size * element_width; break; case MORLOC_STRING: case MORLOC_ARRAY: case MORLOC_TUPLE: case MORLOC_MAP: case MORLOC_OPTIONAL: for(size_t i = 0; i < (size_t)list_size; i++){ required_size += get_shm_size(schema->parameters[0], PyList_GetItem(obj, i)); } break; } } else if (PyObject_HasAttrString(obj, "__array_interface__")) { import_numpy(); PyArrayObject *arr = (PyArrayObject *)obj; npy_intp *dims = PyArray_DIMS(arr); int ndim = PyArray_NDIM(arr); size_t total_elements = 1; for (int i = 0; i < ndim; i++) { total_elements *= dims[i]; } required_size += total_elements * PyArray_ITEMSIZE(arr); } else if (PyBytes_Check(obj)) { required_size += (ssize_t)PyBytes_GET_SIZE(obj); } else if (PyByteArray_Check(obj)) { required_size += (ssize_t)PyByteArray_GET_SIZE(obj); } else if (PyUnicode_Check(obj)) { PyUnicode_AsUTF8AndSize(obj, &required_size); } else { PyRAISE("Unsupported data type"); } required_size += sizeof(Array); return required_size; } case MORLOC_TUPLE: if (!PyTuple_Check(obj) && !PyList_Check(obj)) { PyRAISE("Expected tuple or list for MORLOC_TUPLE, but got %s", Py_TYPE(obj)->tp_name); } { Py_ssize_t size = PyTuple_Check(obj) ? PyTuple_Size(obj) : PyList_Size(obj); if ((size_t)size != schema->size) { PyRAISE("Tuple/List size mismatch"); } size_t required_size = schema->width; for (Py_ssize_t i = 0; i < size; ++i) { PyObject* item = PyTuple_Check(obj) ? PyTuple_GetItem(obj, i) : PyList_GetItem(obj, i); ssize_t element_size = get_shm_size(schema->parameters[i], item); if(element_size != -1){ if ((size_t)element_size > schema->parameters[i]->width) { required_size += (size_t)element_size - schema->parameters[i]->width; } } else { return -1; } } return (ssize_t)required_size; } case MORLOC_MAP: if (!PyDict_Check(obj)) { PyRAISE("Expected dict for MORLOC_MAP, but got %s", Py_TYPE(obj)->tp_name); } { size_t required_size = schema->width; for (size_t i = 0; i < schema->size; ++i) { PyObject* key = PyUnicode_FromString(schema->keys[i]); PyObject* value = PyDict_GetItem(obj, key); Py_DECREF(key); if (value) { ssize_t element_size = get_shm_size(schema->parameters[i], value); if(element_size != -1){ if ((size_t)element_size > schema->parameters[i]->width) { required_size += (size_t)element_size - schema->parameters[i]->width; } } else { return -1; } } } return (ssize_t)required_size; } case MORLOC_OPTIONAL: if (obj == Py_None) { return (ssize_t)schema->width; } { ssize_t inner_size = get_shm_size(schema->parameters[0], obj); if (inner_size == -1) return -1; ssize_t extra = (inner_size > (ssize_t)schema->parameters[0]->width) ? inner_size - (ssize_t)schema->parameters[0]->width : 0; return (ssize_t)schema->width + extra; } case MORLOC_TENSOR: { import_numpy(); int type_num = schema_to_npy_type(schema->parameters[0]->type); if (type_num < 0) { PyRAISE("Unsupported tensor element type"); } PyArrayObject* arr = (PyArrayObject*)PyArray_FROM_OTF(obj, type_num, NPY_ARRAY_C_CONTIGUOUS); if (!arr) { PyRAISE("Expected numpy array for MORLOC_TENSOR"); } size_t total = (size_t)PyArray_SIZE(arr); size_t elem_width = schema->parameters[0]->width; ssize_t required = (ssize_t)sizeof(Tensor); required += (ssize_t)(_Alignof(int64_t) - 1); required += (ssize_t)(schema_tensor_ndim(schema) * sizeof(int64_t)); required += (ssize_t)(schema_alignment(schema->parameters[0]) - 1); required += (ssize_t)(total * elem_width); Py_DECREF(arr); return required; } default: PyRAISE("Unsupported schema type"); } PyRAISE("Reached the unreachable"); error: return -1; } int to_voidstar_r(void* dest, void** cursor, const Schema* schema, PyObject* obj) { MAYFAIL switch (schema->type) { case MORLOC_NIL: if (obj != Py_None) { PyRAISE("Expected None for MORLOC_NIL, but got %s", Py_TYPE(obj)->tp_name); } *((int8_t*)dest) = (int8_t)0; break; case MORLOC_BOOL: if (!PyBool_Check(obj)) { PyRAISE("Expected bool for MORLOC_BOOL, but got %s", Py_TYPE(obj)->tp_name); } *((bool*)dest) = (obj == Py_True); break; case MORLOC_SINT8: HANDLE_SINT_TYPE(int8_t, PyLong_AsLongLong, INT8_MIN, INT8_MAX); break; case MORLOC_SINT16: HANDLE_SINT_TYPE(int16_t, PyLong_AsLongLong, INT16_MIN, INT16_MAX); break; case MORLOC_SINT32: HANDLE_SINT_TYPE(int32_t, PyLong_AsLongLong, INT32_MIN, INT32_MAX); break; case MORLOC_SINT64: HANDLE_SINT_TYPE(int64_t, PyLong_AsLongLong, INT64_MIN, INT64_MAX); break; case MORLOC_UINT8: HANDLE_UINT_TYPE(uint8_t, PyLong_AsUnsignedLongLong, UINT8_MAX); break; case MORLOC_UINT16: HANDLE_UINT_TYPE(uint16_t, PyLong_AsUnsignedLongLong, UINT16_MAX); break; case MORLOC_UINT32: HANDLE_UINT_TYPE(uint32_t, PyLong_AsUnsignedLongLong, UINT32_MAX); break; case MORLOC_UINT64: HANDLE_UINT_TYPE(uint64_t, PyLong_AsUnsignedLongLong, UINT64_MAX); break; case MORLOC_FLOAT32: if (!PyFloat_Check(obj)) { PyRAISE("Expected float for MORLOC_FLOAT32, but got %s", Py_TYPE(obj)->tp_name); } *((float*)dest) = (float)PyFloat_AsDouble(obj); break; case MORLOC_FLOAT64: if(PyFloat_Check(obj)) { *((double*)dest) = PyFloat_AsDouble(obj); } else if(PyLong_Check(obj)){ *((double*)dest) = (double)PyLong_AsLongLong(obj); } else { PyRAISE("Expected float or int for MORLOC_FLOAT64, but got %s", Py_TYPE(obj)->tp_name); } break; case MORLOC_STRING: case MORLOC_ARRAY: if (schema->type == MORLOC_STRING && !(PyUnicode_Check(obj) || PyBytes_Check(obj) || PyByteArray_Check(obj))) { PyRAISE("Expected str or bytes for MORLOC_STRING, but got %s", Py_TYPE(obj)->tp_name); } if (schema->type == MORLOC_ARRAY && !(PyList_Check(obj) || PyBytes_Check(obj) || PyByteArray_Check(obj) || PyObject_HasAttrString(obj, "__array_interface__"))) { PyRAISE("Expected list, bytes, bytearray, or numpy array for MORLOC_ARRAY, but got %s", Py_TYPE(obj)->tp_name); } { Py_ssize_t size; // "bytes" type is mutable, so it exposes a non-const pointer char* mutable_data = NULL; // strings type are immutable, so const const char* immutable_data = NULL; if (PyList_Check(obj)) { size = PyList_Size(obj); } else if (PyBytes_Check(obj)) { // This needs non-const data PyBytes_AsStringAndSize(obj, &mutable_data, &size); } else if (PyByteArray_Check(obj)) { mutable_data = PyByteArray_AS_STRING(obj); size = PyByteArray_GET_SIZE(obj); } else if (schema->type == MORLOC_ARRAY && PyObject_HasAttrString(obj, "__array_interface__")) { // check if it is a numpy array import_numpy(); PyArrayObject* arr = (PyArrayObject*)obj; size = PyArray_SIZE(arr); // This needs const data immutable_data = PyArray_DATA(arr); // Get the data pointer // Verify that the array is contiguous if (!PyArray_ISCONTIGUOUS(arr)) { PyRAISE("NumPy array must be contiguous"); } } else { immutable_data = PyUnicode_AsUTF8AndSize(obj, &size); } Array* result = (Array*)dest; result->size = (size_t)size; if(result->size == 0){ result->data = RELNULL; break; } // align cursor for element data placement *cursor = (void*)ALIGN_UP((uintptr_t)*cursor, schema_alignment(schema->parameters[0])); result->data = PyTRY(abs2rel, *cursor); if (PyList_Check(obj)) { // Fixed size width of each element (variable size data will // be written to the cursor location) size_t width = schema->parameters[0]->width; // Move the cursor to the location immediately after the // fixed sized elements *cursor = (void*)(*(char**)cursor + size * width); char* start = (char*) PyTRY(rel2abs, result->data); Schema* element_schema = schema->parameters[0]; for (Py_ssize_t i = 0; i < size; i++) { PyObject* item = PyList_GetItem(obj, i); if (to_voidstar_r(start + width * i, cursor, element_schema, item) != 0) { goto error; } } } else if (PyBytes_Check(obj) || PyByteArray_Check(obj)){ absptr_t tmp_ptr = PyTRY(rel2abs, result->data); memcpy(tmp_ptr, mutable_data, size); // move cursor to the location after the copied data *cursor = (void*)(*(char**)cursor + size); } else{ size_t width = schema->parameters[0]->width; absptr_t tmp_ptr = PyTRY(rel2abs, result->data); memcpy(tmp_ptr, immutable_data, size * width); // Move the cursor to the location immediately after the // fixed sized elements *cursor = (void*)(*(char**)cursor + size * width); } } break; case MORLOC_TUPLE: if (!PyTuple_Check(obj) && !PyList_Check(obj)) { PyRAISE("Expected tuple or list for MORLOC_TUPLE, but got %s", Py_TYPE(obj)->tp_name); } { Py_ssize_t size = PyTuple_Check(obj) ? PyTuple_Size(obj) : PyList_Size(obj); if ((size_t)size != schema->size) { PyRAISE("Tuple/List size mismatch"); } for (Py_ssize_t i = 0; i < size; ++i) { PyObject* item = PyTuple_Check(obj) ? PyTuple_GetItem(obj, i) : PyList_GetItem(obj, i); if (to_voidstar_r((char*)dest + schema->offsets[i], cursor, schema->parameters[i], item) != 0) { goto error; } } } break; case MORLOC_MAP: if (!PyDict_Check(obj)) { PyRAISE("Expected dict for MORLOC_MAP, but got %s", Py_TYPE(obj)->tp_name); } { for (size_t i = 0; i < schema->size; ++i) { PyObject* key = PyUnicode_FromString(schema->keys[i]); PyObject* value = PyDict_GetItem(obj, key); Py_DECREF(key); if (value) { if (to_voidstar_r((char*)dest + schema->offsets[i], cursor, schema->parameters[i], value) != 0) { goto error; } } } } break; case MORLOC_OPTIONAL: if (obj == Py_None) { *((uint8_t*)dest) = 0; memset((char*)dest + schema->offsets[0], 0, schema->parameters[0]->width); } else { *((uint8_t*)dest) = 1; if (to_voidstar_r((char*)dest + schema->offsets[0], cursor, schema->parameters[0], obj) != 0) { goto error; } } break; case MORLOC_TENSOR: { import_numpy(); int type_num = schema_to_npy_type(schema->parameters[0]->type); if (type_num < 0) { PyRAISE("Unsupported tensor element type"); } PyArrayObject* arr = (PyArrayObject*)PyArray_FROM_OTF(obj, type_num, NPY_ARRAY_C_CONTIGUOUS); if (!arr) { PyRAISE("Expected numpy array for MORLOC_TENSOR"); } int ndim = PyArray_NDIM(arr); npy_intp* np_shape = PyArray_DIMS(arr); size_t total = (size_t)PyArray_SIZE(arr); size_t elem_width = schema->parameters[0]->width; Tensor* tensor = (Tensor*)dest; tensor->total_elements = total; tensor->device_type = 0; tensor->device_id = 0; if (total == 0) { tensor->shape = RELNULL; tensor->data = RELNULL; Py_DECREF(arr); break; } // Write shape array *cursor = (void*)ALIGN_UP((uintptr_t)*cursor, _Alignof(int64_t)); { char* rel_err = NULL; tensor->shape = abs2rel((absptr_t)*cursor, &rel_err); if (rel_err) { free(rel_err); Py_DECREF(arr); PyRAISE("abs2rel failed for tensor shape"); } } int64_t* shape_dst = (int64_t*)*cursor; for (int i = 0; i < ndim; i++) shape_dst[i] = (int64_t)np_shape[i]; *cursor = (char*)*cursor + ndim * sizeof(int64_t); // Write data buffer size_t elem_align = schema_alignment(schema->parameters[0]); *cursor = (void*)ALIGN_UP((uintptr_t)*cursor, elem_align); { char* rel_err = NULL; tensor->data = abs2rel((absptr_t)*cursor, &rel_err); if (rel_err) { free(rel_err); Py_DECREF(arr); PyRAISE("abs2rel failed for tensor data"); } } memcpy(*cursor, PyArray_DATA(arr), total * elem_width); *cursor = (char*)*cursor + total * elem_width; Py_DECREF(arr); } break; default: PyRAISE("Unsupported schema type"); } return 0; error: return -1; } void* to_voidstar(const Schema* schema, PyObject* obj){ MAYFAIL void* dest = NULL; // calculate the required size of the shared memory object ssize_t shm_size = get_shm_size(schema, obj); if(shm_size == -1){ PyRAISE("Schema does not match object"); } // allocate the required memory as a single block dest = PyTRY(shmalloc, (size_t)shm_size); // set the write location of variable size chunks void* cursor = (void*)((char*)dest + schema->width); // write the data to the block int result = to_voidstar_r(dest, &cursor, schema, obj); if (result != 0) { goto error; } return dest; error: if (dest != NULL) { char* free_errmsg = NULL; shfree(dest, &free_errmsg); free(free_errmsg); } return NULL; } static PyObject* pybinding__wait_for_client(PyObject* self, PyObject* args) { MAYFAIL PyObject* daemon_capsule; if (!PyArg_ParseTuple(args, "O", &daemon_capsule)) { PyRAISE("Failed to parse arguments"); } language_daemon_t* daemon = (language_daemon_t*)PyCapsule_GetPointer(daemon_capsule, "language_daemon_t"); int client_fd = PyTRY(wait_for_client, daemon); return PyLong_FromLong((long)client_fd); error: return NULL; } static PyObject* pybinding__start_daemon(PyObject* self, PyObject* args) { MAYFAIL const char* socket_path; const char* tmpdir; const char* shm_basename; size_t shm_default_size; language_daemon_t* daemon = NULL; if (!PyArg_ParseTuple(args, "sssk", &socket_path, &tmpdir, &shm_basename, &shm_default_size)) { goto error; } daemon = PyTRY( start_daemon, socket_path, tmpdir, shm_basename, shm_default_size ); return PyCapsule_New(daemon, "language_daemon_t", NULL); error: FREE(daemon) return NULL; } static PyObject* pybinding__close_daemon(PyObject* self, PyObject* args) { PyObject* daemon_capsule; if (!PyArg_ParseTuple(args, "O", &daemon_capsule)) { PyRAISE("Failed to parse arguments"); } language_daemon_t* daemon = (language_daemon_t*)PyCapsule_GetPointer(daemon_capsule, "language_daemon_t"); if(daemon != NULL){ close_daemon(&daemon); } Py_RETURN_NONE; error: return NULL; } static PyObject* pybinding__read_morloc_call_packet(PyObject* self, PyObject* args){ MAYFAIL char* packet; size_t packet_size; morloc_call_t* call_packet = NULL; PyObject* py_tuple = NULL; PyObject* py_args = NULL; PyObject* py_mid = NULL; if (!PyArg_ParseTuple(args, "y#", &packet, &packet_size)) { PyRAISE("Failed to parse arguments"); } call_packet = PyTRY(read_morloc_call_packet, (const uint8_t*)packet); py_tuple = PyTuple_New(2); if (!py_tuple) { PyRAISE("Allocation failed"); } py_args = PyList_New(call_packet->nargs); if (!py_args) { PyRAISE("Allocation failed"); } py_mid = PyLong_FromLong((long)call_packet->midx); if (!py_mid) { PyRAISE("Allocation failed"); } for(size_t i = 0; i < call_packet->nargs; i++){ size_t arg_packet_size = PyTRY(morloc_packet_size, call_packet->args[i]); PyObject* py_arg = PyBytes_FromStringAndSize( (char*)call_packet->args[i], arg_packet_size ); PyList_SetItem(py_args, i, py_arg); } PyTuple_SetItem(py_tuple, 0, py_mid); PyTuple_SetItem(py_tuple, 1, py_args); py_mid = NULL; // stolen by PyTuple_SetItem py_args = NULL; // stolen by PyTuple_SetItem free_morloc_call(call_packet); return py_tuple; error: if (call_packet) free_morloc_call(call_packet); Py_XDECREF(py_mid); Py_XDECREF(py_args); Py_XDECREF(py_tuple); return NULL; } static PyObject* pybinding__send_packet_to_foreign_server(PyObject* self, PyObject* args){ MAYFAIL int client_fd = 0; uint8_t* packet = NULL; size_t packet_size = 0; if (!PyArg_ParseTuple(args, "iy#", &client_fd, &packet, &packet_size)) { PyRAISE("Failed to parse arguments"); } size_t bytes_sent = PyTRY(send_packet_to_foreign_server, client_fd, packet); return PyLong_FromSize_t(bytes_sent); error: return NULL; } static PyObject* pybinding__stream_from_client(PyObject* self, PyObject* args){ MAYFAIL int client_fd = 0; uint8_t* packet = NULL; if (!PyArg_ParseTuple(args, "i", &client_fd)) { PyRAISE("Failed to parse arguments"); } packet = PyTRY(stream_from_client, client_fd); size_t packet_size = PyTRY(morloc_packet_size, packet); PyObject* retval = PyBytes_FromStringAndSize((char*)packet, packet_size); free(packet); return retval; error: FREE(packet) return NULL; } static PyObject* pybinding__close_socket(PyObject* self, PyObject* args){ int socket_id = 0; if (!PyArg_ParseTuple(args, "i", &socket_id)) { PyRAISE("Failed to parse arguments"); } close_socket(socket_id); Py_RETURN_NONE; error: return NULL; } // Transforms a value into a message ready for the socket static PyObject* pybinding__put_value(PyObject* self, PyObject* args){ MAYFAIL uint8_t* packet = NULL; Schema* schema = NULL; void* voidstar = NULL; size_t packet_size = 0; bool tracked = false; PyObject* obj; const char* schema_str; if (!PyArg_ParseTuple(args, "Os", &obj, &schema_str)) { PyRAISE("Failed to parse arguments"); } schema = PyTRY(parse_schema, schema_str); // Arrow dispatch: if schema hint is "arrow", use Arrow C Data Interface if (schema->hint && strcmp(schema->hint, "arrow") == 0) { // Export pyarrow object via C Data Interface -> copy to shm -> packet struct ArrowSchema arrow_schema; struct ArrowArray arrow_array; // Call obj._export_to_c(arrow_array_ptr, arrow_schema_ptr) PyObject* export_result = PyObject_CallMethod( obj, "_export_to_c", "nn", (Py_ssize_t)&arrow_array, (Py_ssize_t)&arrow_schema); if (!export_result) { free_schema(schema); PyRAISE("Failed to export pyarrow object via C Data Interface"); } Py_DECREF(export_result); char* errmsg = NULL; relptr_t relptr = arrow_to_shm(&arrow_array, &arrow_schema, &errmsg); // Release the exported C Data Interface structs if (arrow_schema.release) arrow_schema.release(&arrow_schema); if (arrow_array.release) arrow_array.release(&arrow_array); if (errmsg) { free_schema(schema); PyErr_SetString(PyExc_RuntimeError, errmsg); free(errmsg); return NULL; } packet = make_arrow_data_packet(relptr, schema); if (!packet) { free_schema(schema); PyRAISE("Failed to create arrow data packet"); } // Track shm for cleanup char* resolve_err = NULL; void* shm_ptr = rel2abs(relptr, &resolve_err); if (resolve_err) { free(resolve_err); } if (shm_ptr) { shm_tracker_push((absptr_t)shm_ptr, NULL); tracked = true; } packet_size = PyTRY(morloc_packet_size, packet); PyObject* retval = PyBytes_FromStringAndSize((char*)packet, packet_size); free(packet); free_schema(schema); return retval; } voidstar = to_voidstar(schema, obj); PyTRACE(voidstar == NULL) // convert to a relative pointer conserved between language servers relptr_t relptr = PyTRY(abs2rel, voidstar); packet = PyTRY(make_data_packet_auto, voidstar, relptr, schema); { const morloc_packet_header_t* hdr = (const morloc_packet_header_t*)packet; if (hdr->command.data.source == PACKET_SOURCE_RPTR) { // SHM referenced by packet -- track for deferred cleanup shm_tracker_push((absptr_t)voidstar, schema); tracked = true; } else { // Data inlined in packet -- free SHM immediately char* free_err = NULL; shfree_by_schema((absptr_t)voidstar, schema, &free_err); if (free_err) { free(free_err); free_err = NULL; } shfree((absptr_t)voidstar, &free_err); if (free_err) { free(free_err); } voidstar = NULL; } } packet_size = PyTRY(morloc_packet_size, packet); { PyObject* retval = PyBytes_FromStringAndSize((char*)packet, packet_size); free(packet); if (!tracked) { free_schema(schema); } return retval; } error: FREE(packet) if (!tracked) { if (voidstar && schema) { char* free_err = NULL; shfree_by_schema((absptr_t)voidstar, schema, &free_err); if (free_err) { free(free_err); free_err = NULL; } shfree((absptr_t)voidstar, &free_err); if (free_err) { free(free_err); } } free_schema(schema); } return NULL; } // Use a key to retrieve a value static PyObject* pybinding__get_value(PyObject* self, PyObject* args){ MAYFAIL uint8_t* voidstar = NULL; Schema* schema = NULL; PyObject* obj = NULL; bool tracked = false; const char* packet; size_t packet_size; const char* schema_str; if (!PyArg_ParseTuple(args, "y#s", &packet, &packet_size, &schema_str)) { PyRAISE("Failed to parse arguments"); } const morloc_packet_header_t* header = (const morloc_packet_header_t*)packet; uint8_t source = header->command.data.source; uint8_t format = header->command.data.format; schema = PyTRY(parse_schema, schema_str) // Arrow dispatch: if packet format is Arrow, import via C Data Interface if (format == PACKET_FORMAT_ARROW) { voidstar = PyTRY(get_morloc_data_packet_value, (uint8_t*)packet, schema); const arrow_shm_header_t* arrow_hdr = (const arrow_shm_header_t*)voidstar; struct ArrowSchema arrow_schema; struct ArrowArray arrow_array; char* arrow_err = NULL; arrow_from_shm(arrow_hdr, &arrow_schema, &arrow_array, &arrow_err); if (arrow_err) { free_schema(schema); PyErr_SetString(PyExc_RuntimeError, arrow_err); free(arrow_err); return NULL; } // Import via pyarrow RecordBatch.from_buffers or _import_from_c PyObject* pyarrow_mod = PyImport_ImportModule("pyarrow"); if (!pyarrow_mod) { if (arrow_schema.release) arrow_schema.release(&arrow_schema); if (arrow_array.release) arrow_array.release(&arrow_array); free_schema(schema); PyRAISE("pyarrow is required for arrow-typed data"); } PyObject* rb_class = PyObject_GetAttrString(pyarrow_mod, "RecordBatch"); Py_DECREF(pyarrow_mod); if (!rb_class) { if (arrow_schema.release) arrow_schema.release(&arrow_schema); if (arrow_array.release) arrow_array.release(&arrow_array); free_schema(schema); PyRAISE("Failed to get pyarrow.RecordBatch"); } // Use RecordBatch._import_from_c(array_ptr, schema_ptr) obj = PyObject_CallMethod(rb_class, "_import_from_c", "nn", (Py_ssize_t)&arrow_array, (Py_ssize_t)&arrow_schema); Py_DECREF(rb_class); // Incref shm so it stays alive while pyarrow references the buffers char* incref_err = NULL; shincref((absptr_t)voidstar, &incref_err); if (incref_err) { free(incref_err); } shm_tracker_push((absptr_t)voidstar, NULL); free_schema(schema); if (!obj) return NULL; return obj; } // Fast path: inline voidstar -- read directly from packet, no SHM needed if (source == PACKET_SOURCE_MESG && format == PACKET_FORMAT_VOIDSTAR) { const uint8_t* payload = (const uint8_t*)packet + sizeof(morloc_packet_header_t) + header->offset; obj = fromAnything(schema, (const void*)payload, (const void*)payload); PyTRACE(obj == NULL) free_schema(schema); return obj; } // SHM paths (RPTR or MESG+MSGPACK) bool is_rptr = (source == PACKET_SOURCE_RPTR); voidstar = PyTRY(get_morloc_data_packet_value, (uint8_t*)packet, schema); // For RPTR data, increment refcount so the owner's tracker flush // won't destroy data we may still need (e.g. forwarded packets). if (is_rptr) { char* incref_err = NULL; shincref((absptr_t)voidstar, &incref_err); if (incref_err) { free(incref_err); } // Track for deferred decref (tracker takes schema ownership) shm_tracker_push((absptr_t)voidstar, schema); tracked = true; } obj = fromAnything(schema, voidstar, NULL); PyTRACE(obj == NULL) if (!tracked) { free_schema(schema); } return obj; error: if (!tracked) { free_schema(schema); } return NULL; } // Free tracked SHM allocations from put_value calls. // Called at dispatch start to free result SHM from previous dispatch. static PyObject* pybinding__flush_shm_tracker(PyObject* self, PyObject* args) { (void)self; (void)args; flush_shm_tracker(); Py_RETURN_NONE; } // Make a foreign call // // Arguments: // 1. socket path // 2. midx // 3. list of arguments, each is bytestring packet static PyObject* pybinding__foreign_call(PyObject* self, PyObject* args) { MAYFAIL char* socket_path; int mid; PyObject* py_args; const uint8_t** arg_packets = NULL; Py_ssize_t nargs; Py_ssize_t i; uint8_t* packet = NULL; uint8_t* result = NULL; size_t result_length = 0; // Parse arguments: string, integer, and sequence if (!PyArg_ParseTuple(args, "siO", &socket_path, &mid, &py_args)) { PyRAISE("Failed to parse argument") } // Verify third argument is a sequence if (!PySequence_Check(py_args)) { PyRAISE("Third argument must be a sequence"); } // Get sequence size and allocate C arrays nargs = PySequence_Size(py_args); arg_packets = (const uint8_t**)calloc(nargs, sizeof(uint8_t*)); if (!arg_packets) { PyErr_NoMemory(); goto error; } // Convert Python bytes to C buffers for (i = 0; i < nargs; i++) { PyObject* item = PySequence_GetItem(py_args, i); if (!PyBytes_Check(item)) { Py_DECREF(item); free(arg_packets); arg_packets = NULL; PyRAISE("All arguments must be bytes objects"); } arg_packets[i] = (const uint8_t*)PyBytes_AsString(item); Py_DECREF(item); } packet = PyTRY(make_morloc_local_call_packet, (uint32_t)mid, arg_packets, (size_t)nargs); free(arg_packets); arg_packets = NULL; result = PyTRY(send_and_receive_over_socket, socket_path, packet); free(packet); packet = NULL; // Incref the result's SHM so the callee's tracker flush won't destroy // data we may still need (e.g. forwarded result packets). { const morloc_packet_header_t* res_header = (const morloc_packet_header_t*)result; if (res_header->command.data.source == PACKET_SOURCE_RPTR) { size_t relptr = *(size_t*)((uint8_t*)result + res_header->offset + sizeof(morloc_packet_header_t)); char* resolve_err = NULL; void* res_voidstar = rel2abs(relptr, &resolve_err); if (resolve_err) { free(resolve_err); resolve_err = NULL; } if (res_voidstar) { char* incref_err = NULL; shincref((absptr_t)res_voidstar, &incref_err); if (incref_err) { free(incref_err); } shm_tracker_push((absptr_t)res_voidstar, NULL); } } } result_length = PyTRY(morloc_packet_size, result); PyObject* retval = PyBytes_FromStringAndSize((char*)result, result_length); free(result); return retval; error: FREE(arg_packets) FREE(packet) return NULL; } static PyObject* pybinding__remote_call(PyObject* self, PyObject* args) { MAYFAIL int midx; char* socket_base; char* cache_path; PyObject* res_struct; // python struct that is converted to a resource_t struct PyObject* arg_packets_obj; // python list of bytes types const uint8_t** arg_packets = NULL; uint8_t* result = NULL; if (!PyArg_ParseTuple(args, "issOO", &midx, &socket_base, &cache_path, &res_struct, &arg_packets_obj)) { PyRAISE("Failed to parse arguments"); } if (!PyBytes_Check(res_struct)) { PyRAISE("res_struct must be a bytes object from struct.pack()"); } // Ensure the resources struct is the right size if (PyBytes_Size(res_struct) != sizeof(resources_t)) { PyRAISE("Struct size mismatch"); } resources_t* res = (resources_t*)PyBytes_AsString(res_struct); PyTRACE(res == NULL) Py_ssize_t nargs = PyList_Size(arg_packets_obj); arg_packets = calloc(nargs, sizeof(uint8_t*)); if (arg_packets == NULL) { PyRAISE("Memory allocation failed"); } for (Py_ssize_t i = 0; i < nargs; i++) { PyObject* packet_obj = PyList_GetItem(arg_packets_obj, i); if (!PyBytes_Check(packet_obj)) { PyRAISE("Packets must be bytes"); } arg_packets[i] = (uint8_t*)PyBytes_AsString(packet_obj); } result = PyTRY( remote_call, midx, socket_base, cache_path, res, arg_packets, (size_t)nargs ); free(arg_packets); if (result == NULL) Py_RETURN_NONE; size_t result_length = PyTRY(morloc_packet_size, result); PyObject* py_result = PyBytes_FromStringAndSize((char*)result, result_length); free(result); return py_result; error: if (result != NULL){ free(result); } if (arg_packets != NULL){ // The elements are handled by Python and should not be freed free(arg_packets); } return NULL; } static PyObject* pybinding__is_ping(PyObject* self, PyObject* args) { MAYFAIL char* packet; size_t packet_size; if (!PyArg_ParseTuple(args, "y#", &packet, &packet_size)) { PyRAISE("Failed to parse arguments"); } bool is_ping = PyTRY(packet_is_ping, (uint8_t*)packet); PyObject* obj = PyBool_FromLong((long)is_ping); return obj; error: return NULL; } static PyObject* pybinding__is_local_call(PyObject* self, PyObject* args) { MAYFAIL char* packet; size_t packet_size; if (!PyArg_ParseTuple(args, "y#", &packet, &packet_size)) { PyRAISE("Failed to parse arguments"); } bool is_local_call = PyTRY(packet_is_local_call, (uint8_t*)packet); PyObject* obj = PyBool_FromLong((long)is_local_call); return obj; error: return NULL; } static PyObject* pybinding__is_remote_call(PyObject* self, PyObject* args) { MAYFAIL char* packet; size_t packet_size; if (!PyArg_ParseTuple(args, "y#", &packet, &packet_size)) { PyRAISE("Failed to parse arguments"); } bool is_remote_call = PyTRY(packet_is_remote_call, (uint8_t*)packet); PyObject* obj = PyBool_FromLong((long)is_remote_call); return obj; error: return NULL; } static PyObject* pybinding__pong(PyObject* self, PyObject* args) { MAYFAIL char* packet; size_t packet_size; uint8_t* pong = NULL; if (!PyArg_ParseTuple(args, "y#", &packet, &packet_size)) { PyRAISE("Failed to parse arguments"); } pong = PyTRY(return_ping, (uint8_t*)packet); size_t pong_size = PyTRY(morloc_packet_size, pong); { PyObject* retval = PyBytes_FromStringAndSize((char*)pong, pong_size); free(pong); return retval; } error: FREE(pong) return NULL; } static PyObject* pybinding__set_fallback_dir(PyObject* self, PyObject* args) { const char* dir; if (!PyArg_ParseTuple(args, "s", &dir)) { return NULL; } shm_set_fallback_dir(dir); Py_RETURN_NONE; } static PyObject* pybinding__shinit(PyObject* self, PyObject* args) { MAYFAIL shm_t* shm = NULL; const char* shm_basename; size_t volume_index; size_t shm_default_size; if (!PyArg_ParseTuple(args, "skk", &shm_basename, &volume_index, &shm_default_size)) { PyRAISE("Failed to parse arguments"); } shm = PyTRY( shinit, shm_basename, volume_index, shm_default_size ); return PyCapsule_New(shm, "shm_t", NULL); error: FREE(shm) return NULL; } static PyObject* pybinding__make_fail_packetg(PyObject* self, PyObject* args) { MAYFAIL const char* packet_errmsg; uint8_t* packet = NULL; if (!PyArg_ParseTuple(args, "s", &packet_errmsg)) { PyRAISE("Failed to parse arguments"); } packet = make_fail_packet(packet_errmsg); size_t packet_size = PyTRY(morloc_packet_size, packet); { PyObject* retval = PyBytes_FromStringAndSize((char*)packet, packet_size); free(packet); return retval; } error: FREE(packet) return NULL; } static PyObject* pybinding__mlc_hash(PyObject* self, PyObject* args) { MAYFAIL PyObject* obj; const char* schema_str; Schema* schema = NULL; void* voidstar = NULL; char* hex = NULL; if (!PyArg_ParseTuple(args, "Os", &obj, &schema_str)) { PyRAISE("Failed to parse arguments"); } schema = PyTRY(parse_schema, schema_str); voidstar = to_voidstar(schema, obj); PyTRACE(voidstar == NULL) hex = PyTRY(mlc_hash, voidstar, schema); { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); { PyObject* retval = PyUnicode_FromString(hex); free(hex); return retval; } error: if (voidstar) { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); FREE(hex) return NULL; } static PyObject* pybinding__mlc_save(PyObject* self, PyObject* args) { MAYFAIL PyObject* obj; const char* schema_str; const char* path; Schema* schema = NULL; void* voidstar = NULL; if (!PyArg_ParseTuple(args, "Oss", &obj, &schema_str, &path)) { PyRAISE("Failed to parse arguments"); } schema = PyTRY(parse_schema, schema_str); voidstar = to_voidstar(schema, obj); PyTRACE(voidstar == NULL) PyTRY(mlc_save, voidstar, schema, path); { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); Py_RETURN_NONE; error: if (voidstar) { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); return NULL; } static PyObject* pybinding__mlc_save_voidstar(PyObject* self, PyObject* args) { MAYFAIL PyObject* obj; const char* schema_str; const char* path; Schema* schema = NULL; void* voidstar = NULL; if (!PyArg_ParseTuple(args, "Oss", &obj, &schema_str, &path)) { PyRAISE("Failed to parse arguments"); } schema = PyTRY(parse_schema, schema_str); voidstar = to_voidstar(schema, obj); PyTRACE(voidstar == NULL) PyTRY(mlc_save_voidstar, voidstar, schema, path); { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); Py_RETURN_NONE; error: if (voidstar) { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); return NULL; } static PyObject* pybinding__mlc_save_json(PyObject* self, PyObject* args) { MAYFAIL PyObject* obj; const char* schema_str; const char* path; Schema* schema = NULL; void* voidstar = NULL; if (!PyArg_ParseTuple(args, "Oss", &obj, &schema_str, &path)) { PyRAISE("Failed to parse arguments"); } schema = PyTRY(parse_schema, schema_str); voidstar = to_voidstar(schema, obj); PyTRACE(voidstar == NULL) PyTRY(mlc_save_json, voidstar, schema, path); { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); Py_RETURN_NONE; error: if (voidstar) { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); return NULL; } static PyObject* pybinding__mlc_show(PyObject* self, PyObject* args) { MAYFAIL PyObject* obj; const char* schema_str; Schema* schema = NULL; void* voidstar = NULL; char* json = NULL; if (!PyArg_ParseTuple(args, "Os", &obj, &schema_str)) { PyRAISE("Failed to parse arguments"); } schema = PyTRY(parse_schema, schema_str); voidstar = to_voidstar(schema, obj); PyTRACE(voidstar == NULL) json = PyTRY(mlc_show, voidstar, schema); { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); { PyObject* retval = PyUnicode_FromString(json); free(json); return retval; } error: if (voidstar) { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); FREE(json) return NULL; } static PyObject* pybinding__mlc_read(PyObject* self, PyObject* args) { MAYFAIL const char* schema_str; const char* json_str; Schema* schema = NULL; void* voidstar = NULL; if (!PyArg_ParseTuple(args, "ss", &schema_str, &json_str)) { PyRAISE("Failed to parse arguments"); } schema = PyTRY(parse_schema, schema_str); { char* errmsg = NULL; voidstar = mlc_read(json_str, schema, &errmsg); if (errmsg != NULL) { free(errmsg); } } if (voidstar == NULL) { free_schema(schema); Py_RETURN_NONE; } { PyObject* obj = fromAnything(schema, voidstar, NULL); char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); free_schema(schema); PyTRACE(obj == NULL) return obj; } error: if (voidstar) { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); return NULL; } static PyObject* pybinding__mlc_load(PyObject* self, PyObject* args) { MAYFAIL const char* schema_str; const char* path; Schema* schema = NULL; void* voidstar = NULL; if (!PyArg_ParseTuple(args, "ss", &schema_str, &path)) { PyRAISE("Failed to parse arguments"); } schema = PyTRY(parse_schema, schema_str); voidstar = PyTRY(mlc_load, path, schema); if (voidstar == NULL) { free_schema(schema); Py_RETURN_NONE; } { PyObject* obj = fromAnything(schema, voidstar, NULL); char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); free_schema(schema); PyTRACE(obj == NULL) return obj; } error: if (voidstar) { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); return NULL; } static PyMethodDef Methods[] = { {"set_fallback_dir", pybinding__set_fallback_dir, METH_VARARGS, "Set fallback directory for file-backed shared memory"}, {"shinit", pybinding__shinit, METH_VARARGS, "Open the shared memory pool"}, {"start_daemon", pybinding__start_daemon, METH_VARARGS, "Initialize the shared memory and socket for the python daemon"}, {"close_daemon", pybinding__close_daemon, METH_VARARGS, "Banish the daemon back to the abyss from whence it came"}, {"wait_for_client", pybinding__wait_for_client, METH_VARARGS, "Listen over a pipe until a client packet arrives"}, {"read_morloc_call_packet", pybinding__read_morloc_call_packet, METH_VARARGS, "Parse a morloc call packet"}, {"send_packet_to_foreign_server", pybinding__send_packet_to_foreign_server, METH_VARARGS, "Send data to a foreign server"}, {"stream_from_client", pybinding__stream_from_client, METH_VARARGS, "Stream data from the client"}, {"close_socket", pybinding__close_socket, METH_VARARGS, "Close the socket"}, {"flush_shm_tracker", pybinding__flush_shm_tracker, METH_NOARGS, "Free tracked SHM allocations from put_value calls"}, {"foreign_call", pybinding__foreign_call, METH_VARARGS, "Send a call packet to a foreign pool"}, {"get_value", pybinding__get_value, METH_VARARGS, "Convert a packet to a Python value"}, {"put_value", pybinding__put_value, METH_VARARGS, "Convert a Python value to a packet"}, {"is_ping", pybinding__is_ping, METH_VARARGS, "Packet is a ping"}, {"is_local_call", pybinding__is_local_call, METH_VARARGS, "Packet is a local call"}, {"is_remote_call", pybinding__is_remote_call, METH_VARARGS, "Packet is a remote call"}, {"pong", pybinding__pong, METH_VARARGS, "Return a ping"}, {"make_fail_packet", pybinding__make_fail_packetg, METH_VARARGS, "Create a fail packet from an error message"}, {"remote_call", pybinding__remote_call, METH_VARARGS, "Make a call to a remote cluster"}, {"mlc_hash", pybinding__mlc_hash, METH_VARARGS, "Hash a value using xxhash"}, {"mlc_save", pybinding__mlc_save, METH_VARARGS, "Save a value to file in msgpack format"}, {"mlc_save_voidstar", pybinding__mlc_save_voidstar, METH_VARARGS, "Save a value to file in flat voidstar binary format"}, {"mlc_save_json", pybinding__mlc_save_json, METH_VARARGS, "Save a value to file in JSON format"}, {"mlc_load", pybinding__mlc_load, METH_VARARGS, "Load a value from file"}, {"mlc_show", pybinding__mlc_show, METH_VARARGS, "Serialize a value to JSON string"}, {"mlc_read", pybinding__mlc_read, METH_VARARGS, "Deserialize a JSON string to a value"}, {NULL, NULL, 0, NULL} // this is a sentinel value }; static struct PyModuleDef pymorloc = { PyModuleDef_HEAD_INIT, "pymorloc", "Python interface to Morloc binary and MessagePack data", -1, Methods }; PyMODINIT_FUNC PyInit_pymorloc(void) { return PyModule_Create(&pymorloc); } ================================================ FILE: data/lang/py/setup.py ================================================ import os from setuptools import setup, Extension import numpy as np try: np_include_path = np.get_include() except AttributeError: raise RuntimeError("Numpy is required to build this extension") morloc_home = os.environ.get( 'MORLOC_HOME', os.path.expanduser('~/.local/share/morloc') ) module = Extension( 'pymorloc', sources=['pymorloc.c'], include_dirs=[ os.path.join(morloc_home, 'include'), np_include_path ], library_dirs=[os.path.join(morloc_home, 'lib')], runtime_library_dirs=[os.path.join(morloc_home, 'lib')], libraries=['morloc'] ) setup( name='pymorloc', version='0.1', ext_modules=[module], extras_require={ 'numpy': ['numpy'] } ) ================================================ FILE: data/lang/r/init.sh ================================================ #!/bin/bash set -e MORLOC_HOME="$1" BUILD_DIR="$2" SANITIZE_FLAGS="$3" INCLUDE_DIR="$MORLOC_HOME/include" LIB_DIR="$MORLOC_HOME/lib" # Write source to include dir (R CMD SHLIB expects it there) cp "$BUILD_DIR/rmorloc.c" "$INCLUDE_DIR/" # Compile directly with gcc so SANITIZE_FLAGS can be passed at both compile # and link time. R CMD SHLIB drops sanitizer flags from the link step, which # would leave librmorloc.so with unresolved ubsan symbols (R itself is not # built with ubsan, so dlopen would fail with: # "undefined symbol: __ubsan_handle_type_mismatch_v1_abort"). # # We only link against R's core shared lib (libR). R's full --ldflags include # libs the R interpreter uses (pcre2, tirpc, icu...) that may not be present # as separate dev packages on the build machine; R-loadable shared objects # resolve those symbols through the loaded R interpreter at dlopen time, not # at link time. R_CPPFLAGS=$(R CMD config --cppflags) R_HOME=$(R RHOME) gcc $R_CPPFLAGS -I"$INCLUDE_DIR" $SANITIZE_FLAGS -fpic -O2 \ -c "$INCLUDE_DIR/rmorloc.c" -o "$INCLUDE_DIR/rmorloc.o" gcc -shared $SANITIZE_FLAGS \ -Wl,-Bsymbolic-functions -Wl,-z,relro \ -o "$LIB_DIR/librmorloc.so" "$INCLUDE_DIR/rmorloc.o" \ -L"$LIB_DIR" -Wl,-rpath,"$LIB_DIR" -lmorloc -lpthread \ -L"$R_HOME/lib" -lR # Clean up rm -f "$INCLUDE_DIR/rmorloc.c" "$INCLUDE_DIR/rmorloc.o" ================================================ FILE: data/lang/r/lang.yaml ================================================ # R language descriptor for morloc compiler # Metadata fields (read by LangRegistry) + descriptor fields (read by generic translator) # Identity and metadata name: r extension: R aliases: [] is_compiled: false run_command: ["Rscript"] serial_type: "character" cost: 4 preamble: - 'dyn.load("{{home}}/lib/librmorloc.so")' - '.morloc.srcdir <- normalizePath(file.path(dirname(sub("^--file=", "", grep("^--file=", commandArgs(FALSE), value=TRUE)[1])), "..", ".."), mustWork=FALSE)' - '.morloc.source <- function(p) source(ifelse(startsWith(p, "/"), p, file.path(.morloc.srcdir, p)), chdir=TRUE)' # Literals ldBoolTrue: "TRUE" ldBoolFalse: "FALSE" ldNullLiteral: "NULL" # Constructors ldListStyle: type_dependent ldTupleConstructor: "list" ldRecordConstructor: "list" ldRecordSeparator: "=" # Access styles ldIndexStyle: one_double_bracket ldKeyAccess: "double_bracket" ldFieldAccess: dollar # Serialize/deserialize ldSerializeFn: "morloc_put_value" ldDeserializeFn: "morloc_get_value" ldIntrinsicPrefix: "morloc_" # Foreign call ldForeignCallFn: "morloc_foreign_call" ldForeignCallIntSuffix: "L" ldIntLiteralSuffix: "L" # Remote call ldRemoteCallFn: "morloc_remote_call" # Record handling ldDictStyleRecords: false ldQuoteRecordKeys: false # Import syntax ldQualifiedImports: false ldIncludeRelToFile: false # Template fields ldAssignOp: "<-" ldLambdaTemplate: "function({{args}}) { {{body}} }" ldDoBlockExpr: "(function() {{expr}})" ldDoBlockBlock: "(function(){\n{{body}}\n})" ldPartialTemplate: "function({{bound_args}}) { {{fn}}({{all_args}}) }" ldImportTemplate: ".morloc.source(\"{{path}}\")" ldSocketPathTemplate: "paste0(global_state$tmpdir, \"/\", {{socket}})" ldResourcePackTemplate: "list(memory={{mem}}L, time={{time}}L, cpus={{cpus}}L, gpus={{gpus}}L)" ldReturnTemplate: "return({{expr}})" ldFuncDefHeader: "{{name}} <- function({{args}})" ldBlockStyle: braces ldBlockEnd: "}" ldErrorWrapOpen: "" ldErrorWrapClose: [] ldPatternStyle: concat_call ldConcatFn: "paste0" ldQuoteTerminator: '"' ldQuoteTerminatorEsc: '\"' ldAtomicTypes: ["integer", "numeric", "double", "logical", "character"] ldAtomicListFn: "c" ldGenericListFn: "list" ldMapStyle: apply_callback ldDispatchLocalHeader: ".dispatch <- list()" ldDispatchLocalEntry: ".dispatch[[{{mid}}L]] <- {{name}}" ldDispatchLocalFooter: "" ldDispatchRemoteHeader: ".remote_dispatch <- list()" ldDispatchRemoteEntry: ".remote_dispatch[[{{mid}}L]] <- {{name}}_remote" ldDispatchRemoteFooter: "" # Pool template (loaded from pool.R, left empty here) ldPoolTemplate: "" ldBreakMarker: "# <<>>" ldCommentMarker: "#" ================================================ FILE: data/lang/r/pool.R ================================================ # AUTO include sources start # <<>> # AUTO include sources end morloc_is_ping <- function(...){ .Call("morloc_is_ping", ...) } morloc_pong <- function(...){ .Call("morloc_pong", ...) } morloc_is_local_call <- function(...){ .Call("morloc_is_local_call", ...) } morloc_is_remote_call <- function(...){ .Call("morloc_is_remote_call", ...) } morloc_make_fail_packet <- function(...){ .Call("morloc_make_fail_packet", ...) } morloc_wait_for_client <- function(...){ .Call("morloc_wait_for_client", ...) } morloc_stream_from_client <- function(...){ .Call("morloc_stream_from_client", ...) } morloc_read_morloc_call_packet <- function(...){ .Call("morloc_read_morloc_call_packet", ...) } morloc_send_packet_to_foreign_server <- function(...){ .Call("morloc_send_packet_to_foreign_server", ...) } morloc_close_socket <- function(...){ .Call("morloc_close_socket", ...) } morloc_start_daemon <- function(...){ .Call("morloc_start_daemon", ...) } morloc_shinit <- function(...){ .Call("morloc_shinit", ...) } morloc_foreign_call <- function(...){ .Call("morloc_foreign_call", ...) } morloc_get_value <- function(...){ .Call("morloc_get_value", ...) } morloc_put_value <- function(...){ .Call("morloc_put_value", ...) } morloc_mlc_show <- function(...){ .Call("morloc_mlc_show", ...) } morloc_socketpair <- function(...){ .Call("morloc_socketpair", ...) } morloc_fork <- function(...){ .Call("morloc_fork", ...) } morloc_send_fd <- function(...){ .Call("morloc_send_fd", ...) } morloc_recv_fd <- function(...){ .Call("morloc_recv_fd", ...) } morloc_kill <- function(...){ .Call("morloc_kill", ...) } morloc_waitpid <- function(...){ .Call("morloc_waitpid", ...) } morloc_install_sigterm_handler <- function(...){ .Call("morloc_install_sigterm_handler", ...) } morloc_is_shutting_down <- function(...){ .Call("morloc_is_shutting_down", ...) } morloc_waitpid_blocking <- function(...){ .Call("morloc_waitpid_blocking", ...) } morloc_detach_daemon <- function(...){ .Call("morloc_detach_daemon", ...) } morloc_shared_counter_create <- function(...){ .Call("morloc_shared_counter_create", ...) } morloc_shared_counter_inc <- function(...){ .Call("morloc_shared_counter_inc", ...) } morloc_shared_counter_dec <- function(...){ .Call("morloc_shared_counter_dec", ...) } morloc_shared_counter_read <- function(...){ .Call("morloc_shared_counter_read", ...) } morloc_pipe <- function(...){ .Call("morloc_pipe", ...) } morloc_write_byte <- function(...){ .Call("morloc_write_byte", ...) } morloc_close_fd <- function(...){ .Call("morloc_close_fd", ...) } morloc_worker_loop_c <- function(...){ .Call("morloc_worker_loop_c", ...) } morloc_set_line_buffered <- function(...){ .Call("morloc_set_line_buffered", ...) } morloc_exit <- function(...){ .Call("morloc_exit", ...) } global_state <- list() # Dynamic worker spawning: monkey-patch morloc_foreign_call to track busy workers. # Workers atomically increment a shared counter before a foreign_call and # decrement after. When all workers are busy, a byte is written to a wake-up # pipe to tell the dispatcher to spawn a new worker. .orig_foreign_call <- morloc_foreign_call .busy_counter <- NULL .wakeup_fd <- NULL .n_workers_total <- 0L morloc_foreign_call <- function(...) { val <- morloc_shared_counter_inc(.busy_counter) if (val >= .n_workers_total && !is.null(.wakeup_fd)) { tryCatch(morloc_write_byte(.wakeup_fd, as.raw(0x21)), error = function(e) NULL) } on.exit(morloc_shared_counter_dec(.busy_counter)) .orig_foreign_call(...) } # AUTO include manifolds start # <<>> # AUTO include manifolds end # AUTO include dispatch start # <<>> # AUTO include dispatch end worker_loop <- function(pipe_fd) { morloc_worker_loop_c(pipe_fd, .dispatch, .remote_dispatch) } main <- function(socket_path, tmpdir, shm_basename) { # Force line-buffered stdout/stderr so output from user functions is not lost # when the nexus kills the pool process group. morloc_set_line_buffered() morloc_install_sigterm_handler() daemon <- morloc_start_daemon(socket_path, tmpdir, shm_basename, 0xffff) n_workers <- 1L # Shared job queue: dispatcher writes fds to fd[1], workers read from fd[2]. # Only idle workers (blocked in recvmsg) pick up jobs, preventing the # round-robin deadlock where a callback gets dispatched to a busy worker. job_queue <- morloc_socketpair() # Shared counter for dynamic worker spawning busy_counter <- morloc_shared_counter_create() wakeup <- morloc_pipe() # c(read_fd, write_fd) # Set globals so the monkey-patched morloc_foreign_call can use them. # Forked children inherit these values. .busy_counter <<- busy_counter .wakeup_fd <<- wakeup[2L] .n_workers_total <<- n_workers pids <- integer(n_workers) for (i in seq_len(n_workers)) { pid <- morloc_fork() if (pid == 0L) { morloc_detach_daemon(daemon) morloc_close_socket(job_queue[1L]) # child doesn't write morloc_close_fd(wakeup[1L]) # child doesn't read wakeup pipe worker_loop(job_queue[2L]) morloc_exit(0L) } pids[i] <- pid } # Keep job_queue[2L] open so dynamically spawned children can use it on.exit({ tryCatch(morloc_close_socket(job_queue[1L]), error = function(e) NULL) tryCatch(morloc_close_socket(job_queue[2L]), error = function(e) NULL) tryCatch(morloc_close_fd(wakeup[1L]), error = function(e) NULL) tryCatch(morloc_close_fd(wakeup[2L]), error = function(e) NULL) for (pid in pids) { if (pid > 0L) { tryCatch(morloc_kill(pid, 9L), error = function(e) NULL) tryCatch(morloc_waitpid_blocking(pid), error = function(e) NULL) } } }) # Dispatch loop - idle workers pull from shared queue. # After each dispatch cycle, check if all workers are busy and spawn more. while (!morloc_is_shutting_down()) { client_fd <- morloc_wait_for_client(daemon) if (client_fd > 0L) { tryCatch({ morloc_send_fd(job_queue[1L], client_fd) }, error = function(e) { cat(paste("Failed to dispatch job:", e$message, "\n"), file = stderr()) }, finally = { morloc_close_socket(client_fd) }) } # Dynamic worker spawning: if all workers are blocked in foreign_call, # spawn a new one so incoming callbacks can still be served. current_busy <- morloc_shared_counter_read(busy_counter) if (current_busy >= n_workers) { pid <- morloc_fork() if (pid == 0L) { morloc_detach_daemon(daemon) morloc_close_socket(job_queue[1L]) morloc_close_fd(wakeup[1L]) worker_loop(job_queue[2L]) morloc_exit(0L) } pids <- c(pids, pid) n_workers <- n_workers + 1L .n_workers_total <<- n_workers } } } args <- commandArgs(trailingOnly = TRUE) # Health check: confirm sources loaded and print version if (length(args) == 1 && args[1] == "--health") { cat('{"status":"ok","version":"__MORLOC_VERSION__"}\n') quit(status = 0) } if (length(args) != 3) { cat("Usage: Rscript pool.R \n", file=stderr()) quit(status = 1) } socket_path <- args[1] tmpdir <- args[2] shm_basename <- args[3] global_state$tmpdir <- tmpdir tryCatch( { main(socket_path, tmpdir, shm_basename) }, error = function(e) { stop(paste("Pool failed:", e$message)) }) # Use _exit to avoid R cleanup which triggers heap corruption on glibc >= 2.39 # (R's finalizers attempt to free objects in SHM-related C extensions) morloc_exit(0L) ================================================ FILE: data/lang/r/rmorloc.c ================================================ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "morloc.h" // {{{ macros #define MAYFAIL char* child_errmsg_ = NULL; #define R_TRY(fun, ...) \ fun(__VA_ARGS__ __VA_OPT__(,) &child_errmsg_); \ if(child_errmsg_ != NULL){ \ error("Error in R pool (%s:%d in %s):\n%s", __FILE__, __LINE__, __func__, child_errmsg_); \ } #define R_TRY_WITH(clean, fun, ...) \ fun(__VA_ARGS__ __VA_OPT__(,) &child_errmsg_); \ if(child_errmsg_ != NULL){ \ clean; \ error("Error in R pool (%s:%d in %s):\n%s", __FILE__, __LINE__, __func__, child_errmsg_); \ } #define MORLOC_ERROR(msg, ...) error("Error in R pool (%s:%d in %s):" msg, __FILE__, __LINE__, __func__, ##__VA_ARGS__); /// }}} // {{{ to_voidstar static size_t get_shm_size(const Schema* schema, SEXP obj) { size_t size = 0; switch (schema->type) { case MORLOC_NIL: case MORLOC_BOOL: case MORLOC_SINT8: case MORLOC_SINT16: case MORLOC_SINT32: case MORLOC_SINT64: case MORLOC_UINT8: case MORLOC_UINT16: case MORLOC_UINT32: case MORLOC_UINT64: case MORLOC_FLOAT32: case MORLOC_FLOAT64: return schema->width; case MORLOC_STRING: case MORLOC_ARRAY: { size_t length = (size_t)LENGTH(obj); size = sizeof(Array); // worst-case cursor alignment padding for element data size += schema_alignment(schema->parameters[0]) - 1; const char* str; switch (TYPEOF(obj)) { case CHARSXP: str = CHAR(obj); size += (size_t)strlen(str); // Do not include null terminator break; case STRSXP: if (LENGTH(obj) == 1) { str = CHAR(STRING_ELT(obj, 0)); size += (size_t)strlen(str); // Do not include null terminator } else { if(schema->parameters[0]->type == MORLOC_STRING){ for(size_t i = 0; i < length; i++){ size += get_shm_size(schema->parameters[0], STRING_ELT(obj, i)); } } else { MORLOC_ERROR("Expected character vector of length 1, but got length %zu", length); } } break; case VECSXP: // This handles lists for (int i = 0; i < length; i++) { size += get_shm_size(schema->parameters[0], VECTOR_ELT(obj, i)); } break; case LGLSXP: case INTSXP: case REALSXP: case RAWSXP: size += length * schema->parameters[0]->width; break; default: MORLOC_ERROR("Unsupported type in get_shm_size array: %s", type2char(TYPEOF(obj))); } return size; } case MORLOC_TUPLE: if (!isVectorList(obj)) { MORLOC_ERROR("Expected list for MORLOC_TUPLE, but got %s", type2char(TYPEOF(obj))); } { size_t array_size = (size_t)xlength(obj); if (array_size != schema->size) { MORLOC_ERROR("Expected tuple of length %zu, but found list of length %zu", schema->size, size); } size = schema->width; for (R_xlen_t i = 0; i < (R_xlen_t)array_size; ++i) { SEXP item = VECTOR_ELT(obj, i); size_t elem = get_shm_size(schema->parameters[i], item); if (elem > schema->parameters[i]->width) { size += elem - schema->parameters[i]->width; } } return size; } case MORLOC_MAP: { if (isNewList(obj)) { // Handle named list size = schema->width; SEXP names = getAttrib(obj, R_NamesSymbol); if (names == R_NilValue) { error("List must have names for MORLOC_MAP"); } for (size_t i = 0; i < schema->size; ++i) { SEXP key = PROTECT(mkChar(schema->keys[i])); int index = -1; for (int j = 0; j < length(obj); j++) { if (strcmp(CHAR(STRING_ELT(names, j)), CHAR(key)) == 0) { index = j; break; } } if (index != -1) { SEXP value = VECTOR_ELT(obj, index); size_t elem = get_shm_size(schema->parameters[i], value); if (elem > schema->parameters[i]->width) { size += elem - schema->parameters[i]->width; } } UNPROTECT(1); } return size; } else { error("Expected a named list for MORLOC_MAP"); } } case MORLOC_OPTIONAL: if (obj == R_NilValue) { return schema->width; } { size_t inner_size = get_shm_size(schema->parameters[0], obj); size = schema->width; if (inner_size > schema->parameters[0]->width) { size += inner_size - schema->parameters[0]->width; } return size; } case MORLOC_TENSOR: { size_t ndim = schema_tensor_ndim(schema); size_t elem_width = schema->parameters[0]->width; SEXP dim = getAttrib(obj, R_DimSymbol); size_t total = 1; if (dim != R_NilValue) { for (int i = 0; i < length(dim); i++) total *= (size_t)INTEGER(dim)[i]; } else { total = (size_t)XLENGTH(obj); } size = sizeof(Tensor); size += _Alignof(int64_t) - 1; size += ndim * sizeof(int64_t); size += schema_alignment(schema->parameters[0]) - 1; size += total * elem_width; return size; } default: MORLOC_ERROR("Unhandled schema type"); break; } return size; } #define HANDLE_SINT_TYPE(CTYPE, MIN, MAX) \ do { \ if (!(isInteger(obj) || isReal(obj))) { \ MORLOC_ERROR("Expected integer for %s, but got %s", #CTYPE, type2char(TYPEOF(obj))); \ } \ double value = asReal(obj); \ if (value < MIN || value > MAX) { \ MORLOC_ERROR("Integer overflow for %s", #CTYPE); \ } \ *(CTYPE*)dest = (CTYPE)value; \ } while(0) #define HANDLE_UINT_TYPE(CTYPE, MAX) \ do { \ if (!(isInteger(obj) || isReal(obj))) { \ MORLOC_ERROR("Expected integer for %s, but got %s", #CTYPE, type2char(TYPEOF(obj))); \ } \ double value = asReal(obj); \ if (value < 0 || value > MAX) { \ MORLOC_ERROR("Integer overflow for %s", #CTYPE); \ } \ *(CTYPE*)dest = (CTYPE)value; \ } while(0) static void* to_voidstar_r(void* dest, void** cursor, SEXP obj, const Schema* schema){ MAYFAIL switch (schema->type) { case MORLOC_NIL: if (obj != R_NilValue) { MORLOC_ERROR("Expected NULL for MORLOC_NIL, but got %s", type2char(TYPEOF(obj))); } *((int8_t*)dest) = (int8_t)0; break; case MORLOC_BOOL: if (!isLogical(obj)) { MORLOC_ERROR("Expected logical for MORLOC_BOOL, but got %s", type2char(TYPEOF(obj))); } *((uint8_t*)dest) = (uint8_t)((LOGICAL(obj)[0] == TRUE) ? 1 : 0); break; case MORLOC_SINT8: HANDLE_SINT_TYPE(int8_t, INT8_MIN, INT8_MAX); break; case MORLOC_SINT16: HANDLE_SINT_TYPE(int16_t, INT16_MIN, INT16_MAX); break; case MORLOC_SINT32: HANDLE_SINT_TYPE(int32_t, INT32_MIN, INT32_MAX); break; case MORLOC_SINT64: HANDLE_SINT_TYPE(int64_t, INT64_MIN, INT64_MAX); break; case MORLOC_UINT8: HANDLE_UINT_TYPE(uint8_t, UINT8_MAX); break; case MORLOC_UINT16: HANDLE_UINT_TYPE(uint16_t, UINT16_MAX); break; case MORLOC_UINT32: HANDLE_UINT_TYPE(uint32_t, UINT32_MAX); break; case MORLOC_UINT64: HANDLE_UINT_TYPE(uint64_t, UINT64_MAX); break; case MORLOC_FLOAT32: if (!(isReal(obj) || isInteger(obj))) { MORLOC_ERROR("Expected numeric for MORLOC_FLOAT32, but got %s", type2char(TYPEOF(obj))); } *((float*)dest) = (float)asReal(obj); break; case MORLOC_FLOAT64: if (!(isReal(obj) || isInteger(obj))) { MORLOC_ERROR("Expected numeric for MORLOC_FLOAT64, but got %s", type2char(TYPEOF(obj))); } *((double*)dest) = asReal(obj); break; case MORLOC_STRING: { const char* str = NULL; size_t length = 0; switch(TYPEOF(obj)){ case CHARSXP: str = CHAR(obj); length = (size_t)strlen(str); break; case STRSXP: if (LENGTH(obj) == 1) { str = CHAR(STRING_ELT(obj, 0)); length = (size_t)strlen(str); } else { MORLOC_ERROR("Expected character of length 1"); } break; case RAWSXP: str = RAW(obj); length = LENGTH(obj); break; default: MORLOC_ERROR("Expected a character type"); break; } Array* array = (Array*)dest; array->size = length; // Do not include null terminator if(length > 0){ // align cursor for element data placement *cursor = (void*)ALIGN_UP((uintptr_t)*cursor, schema_alignment(schema->parameters[0])); array->data = R_TRY(abs2rel, *cursor); absptr_t tmp_ptr = R_TRY(rel2abs, array->data); memcpy(tmp_ptr, str, array->size); } else { array->data = RELNULL; } // move cursor to the location after the copied data *cursor = (void*)(*(char**)cursor + array->size); } break; case MORLOC_ARRAY: Array* array = (Array*)dest; array->size = (size_t)length(obj); if(array->size == 0){ array->data = RELNULL; break; } // align cursor for element data placement *cursor = (void*)ALIGN_UP((uintptr_t)*cursor, schema_alignment(schema->parameters[0])); array->data = R_TRY(abs2rel, *cursor); Schema* element_schema = schema->parameters[0]; char* start; switch (TYPEOF(obj)) { case STRSXP: { if(element_schema->type == MORLOC_STRING){ // set the cursor the the location after the array headers *cursor = (void*)(*(char**)cursor + array->size * element_schema->width); start = R_TRY(rel2abs, array->data); for(size_t i = 0; i < array->size; i++){ SEXP elem = STRING_ELT(obj, i); to_voidstar_r(start + i * element_schema->width, cursor, elem, element_schema); } } else { MORLOC_ERROR("Expected character vector of length 1, but got length %ld", array->size); } } break; case RAWSXP: // Raw vectors if (element_schema->type != MORLOC_UINT8) { MORLOC_ERROR("Expected MORLOC_UINT8 for raw vector"); } absptr_t tmp_ptr = R_TRY(rel2abs, array->data); memcpy(tmp_ptr, RAW(obj), array->size * sizeof(uint8_t)); *cursor = (void*)(*(char**)cursor + array->size * sizeof(uint8_t)); break; case VECSXP: // This handles lists *cursor = (void*)(*(char**)cursor + array->size * element_schema->width); start = R_TRY(rel2abs, array->data); for (int i = 0; i < array->size; i++) { SEXP elem = VECTOR_ELT(obj, i); to_voidstar_r(start + i * element_schema->width, cursor, elem, element_schema); } break; case LGLSXP: *cursor = (void*)(*(char**)cursor + array->size * element_schema->width); start = R_TRY(rel2abs, array->data); for (int i = 0; i < array->size; i++) { SEXP elem = PROTECT(ScalarLogical(LOGICAL(obj)[i])); to_voidstar_r(start + i * element_schema->width, cursor, elem, element_schema); UNPROTECT(1); } break; case INTSXP: *cursor = (void*)(*(char**)cursor + array->size * element_schema->width); start = R_TRY(rel2abs, array->data); for (int i = 0; i < array->size; i++) { SEXP elem = PROTECT(ScalarInteger(INTEGER(obj)[i])); to_voidstar_r(start + i * element_schema->width, cursor, elem, element_schema); UNPROTECT(1); } break; case REALSXP: *cursor = (void*)(*(char**)cursor + array->size * element_schema->width); start = R_TRY(rel2abs, array->data); for (int i = 0; i < array->size; i++) { SEXP elem = PROTECT(ScalarReal(REAL(obj)[i])); to_voidstar_r(start + i * element_schema->width, cursor, elem, element_schema); UNPROTECT(1); } break; default: MORLOC_ERROR("Unsupported type in to_voidstar array: %s", type2char(TYPEOF(obj))); } break; case MORLOC_TUPLE: if (!isVectorList(obj)) { MORLOC_ERROR("Expected list for MORLOC_TUPLE, but got %s", type2char(TYPEOF(obj))); } { R_xlen_t size = xlength(obj); if ((size_t)size != schema->size) { MORLOC_ERROR("Expected tuple of length %zu, but found list of length %zu", schema->size, size); } for (R_xlen_t i = 0; i < size; ++i) { SEXP item = VECTOR_ELT(obj, i); to_voidstar_r(dest + schema->offsets[i], cursor, item, schema->parameters[i]); } } break; case MORLOC_MAP: { if (isNewList(obj)) { // Handle named list SEXP names = getAttrib(obj, R_NamesSymbol); if (names == R_NilValue) { MORLOC_ERROR("List must have names for MORLOC_MAP"); } for (size_t i = 0; i < schema->size; ++i) { SEXP key = PROTECT(mkChar(schema->keys[i])); int index = -1; for (int j = 0; j < length(obj); j++) { if (strcmp(CHAR(STRING_ELT(names, j)), CHAR(key)) == 0) { index = j; break; } } if (index != -1) { SEXP value = VECTOR_ELT(obj, index); to_voidstar_r(dest + schema->offsets[i], cursor, value, schema->parameters[i]); } UNPROTECT(1); } } else { MORLOC_ERROR("Expected a named list for MORLOC_MAP"); } } break; case MORLOC_OPTIONAL: if (obj == R_NilValue) { *((uint8_t*)dest) = 0; memset((char*)dest + schema->offsets[0], 0, schema->parameters[0]->width); } else { *((uint8_t*)dest) = 1; to_voidstar_r((char*)dest + schema->offsets[0], cursor, obj, schema->parameters[0]); } break; case MORLOC_TENSOR: { size_t ndim = schema_tensor_ndim(schema); size_t elem_width = schema->parameters[0]->width; // Get shape from dim attribute (or length for 1D) SEXP dim = getAttrib(obj, R_DimSymbol); int64_t shape[5]; size_t total = 1; if (dim != R_NilValue) { for (size_t i = 0; i < ndim; i++) { shape[i] = (int64_t)INTEGER(dim)[i]; total *= (size_t)shape[i]; } } else { shape[0] = (int64_t)XLENGTH(obj); total = (size_t)shape[0]; } Tensor* tensor = (Tensor*)dest; tensor->total_elements = total; tensor->device_type = 0; tensor->device_id = 0; if (total == 0) { tensor->shape = RELNULL; tensor->data = RELNULL; break; } // Write shape *cursor = (void*)ALIGN_UP((uintptr_t)*cursor, _Alignof(int64_t)); tensor->shape = R_TRY(abs2rel, (absptr_t)*cursor); int64_t* shape_dst = (int64_t*)*cursor; for (size_t i = 0; i < ndim; i++) shape_dst[i] = shape[i]; *cursor = (char*)*cursor + ndim * sizeof(int64_t); // Write data: transpose from column-major (R) to row-major (C) size_t data_align = schema_alignment(schema->parameters[0]); *cursor = (void*)ALIGN_UP((uintptr_t)*cursor, data_align); tensor->data = R_TRY(abs2rel, (absptr_t)*cursor); // Coerce R object to match schema element type SEXP coerced = obj; int need_protect = 0; morloc_serial_type etype = schema->parameters[0]->type; if ((etype == MORLOC_FLOAT64 || etype == MORLOC_FLOAT32) && !isReal(obj)) { coerced = PROTECT(coerceVector(obj, REALSXP)); need_protect = 1; } else if (etype != MORLOC_FLOAT64 && etype != MORLOC_FLOAT32 && etype != MORLOC_BOOL && !isInteger(obj)) { coerced = PROTECT(coerceVector(obj, INTSXP)); need_protect = 1; } if (ndim == 1) { // 1D: no transpose needed if (isReal(coerced)) { memcpy(*cursor, REAL(coerced), total * elem_width); } else if (isInteger(coerced)) { memcpy(*cursor, INTEGER(coerced), total * elem_width); } else if (isLogical(coerced)) { int* src = LOGICAL(coerced); uint8_t* dst = (uint8_t*)*cursor; for (size_t i = 0; i < total; i++) dst[i] = (uint8_t)(src[i] != 0); } } else if (ndim == 2) { size_t nrows = (size_t)shape[0]; size_t ncols = (size_t)shape[1]; if (isReal(coerced)) { double* src = REAL(coerced); double* dst = (double*)*cursor; for (size_t r = 0; r < nrows; r++) for (size_t c = 0; c < ncols; c++) dst[r * ncols + c] = src[c * nrows + r]; } else if (isInteger(coerced)) { int* src = INTEGER(coerced); int* dst = (int*)*cursor; for (size_t r = 0; r < nrows; r++) for (size_t c = 0; c < ncols; c++) dst[r * ncols + c] = src[c * nrows + r]; } } else { size_t col_strides[5]; col_strides[0] = 1; for (size_t d = 1; d < ndim; d++) col_strides[d] = col_strides[d-1] * (size_t)shape[d-1]; size_t row_strides[5]; row_strides[ndim-1] = 1; for (size_t d = ndim-1; d > 0; d--) row_strides[d-1] = row_strides[d] * (size_t)shape[d]; if (isReal(coerced)) { double* src = REAL(coerced); double* dst = (double*)*cursor; for (size_t i = 0; i < total; i++) { size_t rem = i; size_t col_idx = 0; for (size_t d = 0; d < ndim; d++) { size_t coord = rem / row_strides[d]; rem %= row_strides[d]; col_idx += coord * col_strides[d]; } dst[i] = src[col_idx]; } } else if (isInteger(coerced)) { int* src = INTEGER(coerced); int* dst = (int*)*cursor; for (size_t i = 0; i < total; i++) { size_t rem = i; size_t col_idx = 0; for (size_t d = 0; d < ndim; d++) { size_t coord = rem / row_strides[d]; rem %= row_strides[d]; col_idx += coord * col_strides[d]; } dst[i] = src[col_idx]; } } } if (need_protect) UNPROTECT(1); *cursor = (char*)*cursor + total * elem_width; } break; default: MORLOC_ERROR("Unhandled schema type"); break; } return dest; } // NOTE: If to_voidstar_r calls error() (via MORLOC_ERROR or R_TRY), the shared // memory at dest leaks. This only happens on type mismatches (a development-time // bug) and the memory is reclaimed when the pool process exits. static void* to_voidstar(SEXP obj, const Schema* schema) { MAYFAIL size_t total_size = get_shm_size(schema, obj); void* dest = R_TRY(shmalloc, total_size); void* cursor = (void*)((char*)dest + schema->width); return to_voidstar_r(dest, &cursor, obj, schema); } // }}} to_voidstar // {{{ from_voidstar static SEXP from_voidstar(const void* data, const Schema* schema, const void* base_ptr) { MAYFAIL if(data == NULL){ MORLOC_ERROR("NULL data (%s:%d in %s)", __FILE__, __LINE__, __func__); } if(schema == NULL){ MORLOC_ERROR("NULL schema (%s:%d in %s)", __FILE__, __LINE__, __func__); } SEXP obj = R_NilValue; switch (schema->type) { case MORLOC_NIL: return R_NilValue; case MORLOC_BOOL: obj = ScalarLogical((bool)*(uint8_t*)data); break; case MORLOC_SINT8: obj = ScalarInteger((int)(*(int8_t*)data)); break; case MORLOC_SINT16: obj = ScalarInteger((int)(*(int16_t*)data)); break; case MORLOC_SINT32: obj = ScalarInteger(*(int32_t*)data); break; case MORLOC_SINT64: obj = ScalarReal((double)(*(int64_t*)data)); break; case MORLOC_UINT8: obj = ScalarInteger((int)(*(uint8_t*)data)); break; case MORLOC_UINT16: obj = ScalarInteger((int)(*(uint16_t*)data)); break; case MORLOC_UINT32: obj = ScalarReal((double)(*(uint32_t*)data)); break; case MORLOC_UINT64: obj = ScalarReal((double)(*(uint64_t*)data)); break; case MORLOC_FLOAT32: obj = ScalarReal((double)(*(float*)data)); break; case MORLOC_FLOAT64: obj = ScalarReal(*(double*)data); break; case MORLOC_STRING: { if (schema->hint != NULL && strcmp(schema->hint, "raw") == 0){ Array* raw_array = (Array*)data; if(raw_array->size > 0){ void* tmp_ptr = R_TRY(resolve_relptr, raw_array->data, base_ptr); obj = PROTECT(allocVector(RAWSXP, raw_array->size)); memcpy(RAW(obj), tmp_ptr, raw_array->size); } else { obj = PROTECT(allocVector(RAWSXP, 0)); } UNPROTECT(1); } else { Array* str_array = (Array*)data; if(str_array->size > 0){ void* tmp_ptr = R_TRY(resolve_relptr, str_array->data, base_ptr); SEXP chr = PROTECT(mkCharLen(tmp_ptr, str_array->size)); obj = PROTECT(ScalarString(chr)); } else { SEXP chr = PROTECT(mkChar("")); obj = PROTECT(ScalarString(chr)); } UNPROTECT(2); } } break; case MORLOC_ARRAY: { Array* array = (Array*)data; Schema* element_schema = schema->parameters[0]; char* start; switch(element_schema->type){ case MORLOC_BOOL: obj = PROTECT(allocVector(LGLSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } start = (char*)R_TRY(resolve_relptr, array->data, base_ptr); for (size_t i = 0; i < array->size; i++) { LOGICAL(obj)[i] = (bool)*(uint8_t*)(start + i) ? TRUE : FALSE; } UNPROTECT(1); break; case MORLOC_SINT8: obj = PROTECT(allocVector(INTSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } start = (char*)R_TRY(resolve_relptr, array->data, base_ptr); for (size_t i = 0; i < array->size; i++) { INTEGER(obj)[i] = (int)(*(int8_t*)(start + i * sizeof(int8_t))); } UNPROTECT(1); break; case MORLOC_SINT16: obj = PROTECT(allocVector(INTSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } start = (char*)R_TRY(resolve_relptr, array->data, base_ptr); for (size_t i = 0; i < array->size; i++) { INTEGER(obj)[i] = (int)(*(int16_t*)(start + i * sizeof(int16_t))); } UNPROTECT(1); break; case MORLOC_SINT32: obj = PROTECT(allocVector(INTSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } { void* tmp_ptr = R_TRY(resolve_relptr, array->data, base_ptr); memcpy(INTEGER(obj), tmp_ptr, array->size * sizeof(int32_t)); } UNPROTECT(1); break; case MORLOC_SINT64: obj = PROTECT(allocVector(REALSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } start = (char*)R_TRY(resolve_relptr, array->data, base_ptr); for (size_t i = 0; i < array->size; i++) { REAL(obj)[i] = (double)(*(int64_t*)(start + i * sizeof(int64_t))); } UNPROTECT(1); break; // Interpret the uint8 as a raw vector case MORLOC_UINT8: obj = PROTECT(allocVector(RAWSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } start = (char*)R_TRY(resolve_relptr, array->data, base_ptr); memcpy(RAW(obj), start, array->size * sizeof(uint8_t)); UNPROTECT(1); break; case MORLOC_UINT16: obj = PROTECT(allocVector(INTSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } start = (char*)R_TRY(resolve_relptr, array->data, base_ptr); for (size_t i = 0; i < array->size; i++) { INTEGER(obj)[i] = (int)(*(uint16_t*)(start + i * sizeof(uint16_t))); } UNPROTECT(1); break; case MORLOC_UINT32: obj = PROTECT(allocVector(REALSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } start = (char*)R_TRY(resolve_relptr, array->data, base_ptr); for (size_t i = 0; i < array->size; i++) { REAL(obj)[i] = (double)(*(uint32_t*)(start + i * sizeof(uint32_t))); } UNPROTECT(1); break; case MORLOC_UINT64: // NOTE: the R integer cannot store a 64 bit int obj = PROTECT(allocVector(REALSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } start = (char*)R_TRY(resolve_relptr, array->data, base_ptr); for (size_t i = 0; i < array->size; i++) { REAL(obj)[i] = (double)(*(uint64_t*)(start + i * sizeof(uint64_t))); } UNPROTECT(1); break; case MORLOC_FLOAT32: obj = PROTECT(allocVector(REALSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } start = (char*)R_TRY(resolve_relptr, array->data, base_ptr); for (size_t i = 0; i < array->size; i++) { REAL(obj)[i] = (double)(*(float*)(start + i * sizeof(float))); } UNPROTECT(1); break; case MORLOC_FLOAT64: obj = PROTECT(allocVector(REALSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } start = (char*)R_TRY(resolve_relptr, array->data, base_ptr); memcpy(REAL(obj), start, array->size * sizeof(double)); UNPROTECT(1); break; case MORLOC_STRING: { obj = PROTECT(allocVector(STRSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } start = (char*)R_TRY(resolve_relptr, array->data, base_ptr); size_t width = schema->width; for (size_t i = 0; i < array->size; i++) { Array* str_array = (Array*)(start + i * width); SEXP item; if(str_array->size == 0){ item = PROTECT(mkCharLen("", 0)); } else { void* str_ptr = R_TRY_WITH(UNPROTECT(1), resolve_relptr, str_array->data, base_ptr); item = PROTECT(mkCharLen(str_ptr, str_array->size)); } UNPROTECT(1); SET_STRING_ELT(obj, i, item); } UNPROTECT(1); } break; default: { obj = PROTECT(allocVector(VECSXP, array->size)); if(array->size == 0) { UNPROTECT(1); break; } start = (char*)R_TRY(resolve_relptr, array->data, base_ptr); size_t width = element_schema->width; for (size_t i = 0; i < array->size; i++) { SEXP item = from_voidstar(start + width * i, element_schema, base_ptr); if (item == R_NilValue) { UNPROTECT(1); obj = R_NilValue; goto error; } SET_VECTOR_ELT(obj, i, item); } UNPROTECT(1); } break; } } break; case MORLOC_TUPLE: { obj = PROTECT(allocVector(VECSXP, schema->size)); for (size_t i = 0; i < schema->size; i++) { void* item_ptr = (char*)data + schema->offsets[i]; SEXP item = from_voidstar(item_ptr, schema->parameters[i], base_ptr); if (item == R_NilValue) { UNPROTECT(1); obj = R_NilValue; goto error; } SET_VECTOR_ELT(obj, i, item); } UNPROTECT(1); break; } case MORLOC_MAP: { obj = PROTECT(allocVector(VECSXP, schema->size)); SEXP names = PROTECT(allocVector(STRSXP, schema->size)); for (size_t i = 0; i < schema->size; i++) { void* item_ptr = (char*)data + schema->offsets[i]; SEXP value = from_voidstar(item_ptr, schema->parameters[i], base_ptr); if (value == R_NilValue) { UNPROTECT(2); obj = R_NilValue; goto error; } SET_VECTOR_ELT(obj, i, value); SET_STRING_ELT(names, i, mkChar(schema->keys[i])); } setAttrib(obj, R_NamesSymbol, names); UNPROTECT(2); break; } case MORLOC_OPTIONAL: { uint8_t tag = *(const uint8_t*)data; if (tag == 0) { return R_NilValue; } obj = from_voidstar((const char*)data + schema->offsets[0], schema->parameters[0], base_ptr); break; } case MORLOC_TENSOR: { const Tensor* tensor = (const Tensor*)data; size_t ndim = schema_tensor_ndim(schema); size_t total = tensor->total_elements; if (total == 0) { if (isReal(obj)) { obj = PROTECT(allocVector(REALSXP, 0)); } else { obj = PROTECT(allocVector(INTSXP, 0)); } UNPROTECT(1); break; } const int64_t* shape = (const int64_t*)resolve_relptr(tensor->shape, base_ptr, NULL); const void* tdata = resolve_relptr(tensor->data, base_ptr, NULL); // Allocate R vector int sexptype; switch (schema->parameters[0]->type) { case MORLOC_FLOAT32: case MORLOC_FLOAT64: sexptype = REALSXP; break; case MORLOC_BOOL: sexptype = LGLSXP; break; default: sexptype = INTSXP; break; } obj = PROTECT(allocVector(sexptype, (R_xlen_t)total)); if (ndim == 1) { // 1D: no transpose if (sexptype == REALSXP) { if (schema->parameters[0]->type == MORLOC_FLOAT32) { const float* src = (const float*)tdata; double* dst = REAL(obj); for (size_t i = 0; i < total; i++) dst[i] = (double)src[i]; } else { memcpy(REAL(obj), tdata, total * sizeof(double)); } } else if (sexptype == INTSXP) { size_t elem_w = schema->parameters[0]->width; if (elem_w == sizeof(int)) { memcpy(INTEGER(obj), tdata, total * sizeof(int)); } else { // Widen or narrow to int int* dst = INTEGER(obj); const char* src = (const char*)tdata; for (size_t i = 0; i < total; i++) { int64_t v = 0; memcpy(&v, src + i * elem_w, elem_w); dst[i] = (int)v; } } } else if (sexptype == LGLSXP) { const uint8_t* src = (const uint8_t*)tdata; int* dst = LOGICAL(obj); for (size_t i = 0; i < total; i++) dst[i] = src[i] ? 1 : 0; } } else if (ndim == 2) { // 2D: row-major to col-major transpose size_t nrows = (size_t)shape[0]; size_t ncols = (size_t)shape[1]; if (sexptype == REALSXP) { const double* src = (const double*)tdata; double* dst = REAL(obj); for (size_t r = 0; r < nrows; r++) for (size_t c = 0; c < ncols; c++) dst[c * nrows + r] = src[r * ncols + c]; } else if (sexptype == INTSXP) { const int* src = (const int*)tdata; int* dst = INTEGER(obj); for (size_t r = 0; r < nrows; r++) for (size_t c = 0; c < ncols; c++) dst[c * nrows + r] = src[r * ncols + c]; } } else { // General N-D: row-major to col-major size_t col_strides[5]; col_strides[0] = 1; for (size_t d = 1; d < ndim; d++) col_strides[d] = col_strides[d-1] * (size_t)shape[d-1]; size_t row_strides[5]; row_strides[ndim-1] = 1; for (size_t d = ndim-1; d > 0; d--) row_strides[d-1] = row_strides[d] * (size_t)shape[d]; if (sexptype == REALSXP) { const double* src = (const double*)tdata; double* dst = REAL(obj); for (size_t i = 0; i < total; i++) { // i is row-major index, compute col-major index size_t rem = i; size_t col_idx = 0; for (size_t d = 0; d < ndim; d++) { size_t coord = rem / row_strides[d]; rem %= row_strides[d]; col_idx += coord * col_strides[d]; } dst[col_idx] = src[i]; } } else if (sexptype == INTSXP) { const int* src = (const int*)tdata; int* dst = INTEGER(obj); for (size_t i = 0; i < total; i++) { size_t rem = i; size_t col_idx = 0; for (size_t d = 0; d < ndim; d++) { size_t coord = rem / row_strides[d]; rem %= row_strides[d]; col_idx += coord * col_strides[d]; } dst[col_idx] = src[i]; } } } // Set dim attribute SEXP r_dim = PROTECT(allocVector(INTSXP, (R_xlen_t)ndim)); for (size_t i = 0; i < ndim; i++) INTEGER(r_dim)[i] = (int)shape[i]; setAttrib(obj, R_DimSymbol, r_dim); UNPROTECT(2); break; } default: MORLOC_ERROR("Unsupported schema type"); goto error; } return obj; error: return R_NilValue; } // }}} from_voidstar // {{{ exported morloc API functions // PID of the process that created the daemon (set in morloc_start_daemon) static pid_t daemon_creator_pid = 0; // Close the daemon when the R object dies static void daemon_finalizer(SEXP ptr) { if (!R_ExternalPtrAddr(ptr)) return; // Skip cleanup in forked children -- they must not unlink the socket file if (daemon_creator_pid != 0 && getpid() != daemon_creator_pid) { R_ClearExternalPtr(ptr); return; } language_daemon_t* daemon = (language_daemon_t*)R_ExternalPtrAddr(ptr); if(daemon != NULL){ close_daemon(&daemon); } R_ClearExternalPtr(ptr); } // Release daemon resources in a forked child WITHOUT unlinking the socket file. // Workers call this after fork so they don't hold the server_fd or accidentally // destroy the socket when they exit. SEXP morloc_detach_daemon(SEXP daemon_r) { if (!R_ExternalPtrAddr(daemon_r)) return R_NilValue; language_daemon_t* daemon = (language_daemon_t*)R_ExternalPtrAddr(daemon_r); if (daemon != NULL) { close_socket(daemon->server_fd); client_list_t *current = daemon->client_fds; while (current) { client_list_t *next = current->next; close(current->fd); free(current); current = next; } free(daemon->socket_path); free(daemon->tmpdir); free(daemon->shm_basename); free(daemon); } R_ClearExternalPtr(daemon_r); return R_NilValue; } SEXP morloc_start_daemon( SEXP socket_path_r, SEXP tmpdir_r, SEXP shm_basename_r, SEXP shm_default_size_r ){ MAYFAIL const char* socket_path = CHAR(STRING_ELT(socket_path_r, 0)); const char* tmpdir = CHAR(STRING_ELT(tmpdir_r, 0)); const char* shm_basename = CHAR(STRING_ELT(shm_basename_r, 0)); size_t shm_default_size = (size_t)asInteger(shm_default_size_r); language_daemon_t* daemon = R_TRY( start_daemon, socket_path, tmpdir, shm_basename, shm_default_size ); // Wrap pointer in external pointer SEXP result = PROTECT(R_MakeExternalPtr(daemon, R_NilValue, R_NilValue)); // Record which process owns the daemon (for the PID guard in daemon_finalizer) daemon_creator_pid = getpid(); // Register finalizer with wrapper R_RegisterCFinalizerEx(result, daemon_finalizer, TRUE); // Set class attribute SEXP class_name = PROTECT(mkString("language_daemon")); SET_CLASS(result, class_name); UNPROTECT(2); return result; } SEXP morloc_shinit(SEXP shm_basename_r, SEXP volume_index_r, SEXP shm_size_r) { MAYFAIL const char* shm_basename = CHAR(STRING_ELT(shm_basename_r, 0)); size_t volume_index = (size_t)asInteger(volume_index_r); size_t shm_size = (size_t)asInteger(shm_size_r); R_TRY(shinit, shm_basename, volume_index, shm_size); return R_NilValue; } // {{{ signal handling for graceful shutdown static volatile sig_atomic_t r_shutting_down = 0; static void r_sigterm_handler(int sig) { (void)sig; r_shutting_down = 1; } SEXP morloc_install_sigterm_handler(void) { struct sigaction sa; sa.sa_handler = r_sigterm_handler; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; sigaction(SIGTERM, &sa, NULL); return R_NilValue; } SEXP morloc_is_shutting_down(void) { return ScalarLogical(r_shutting_down != 0); } SEXP morloc_set_line_buffered(void) { // Only stderr - stdout is left fully buffered for performance // and flushed explicitly after each job. setvbuf(stderr, NULL, _IOLBF, 0); return R_NilValue; } // }}} signal handling SEXP morloc_wait_for_client(SEXP daemon_r){ MAYFAIL if (!R_ExternalPtrAddr(daemon_r)) { MORLOC_ERROR("Expected a daemon pointer"); } // Return immediately if shutdown was requested if (r_shutting_down) { return ScalarInteger(-1); } language_daemon_t* daemon = (language_daemon_t*)R_ExternalPtrAddr(daemon_r); // Use pselect directly (not wait_for_client_with_timeout) so we can // return immediately on EINTR from SIGTERM instead of retrying via WAIT fd_set read_fds; FD_ZERO(&read_fds); FD_SET(daemon->server_fd, &read_fds); int max_fd = daemon->server_fd; for (client_list_t* cl = daemon->client_fds; cl != NULL; cl = cl->next) { FD_SET(cl->fd, &read_fds); if (cl->fd > max_fd) max_fd = cl->fd; } // 100ms timeout -- short enough for responsive SIGTERM handling struct timespec ts = { .tv_sec = 0, .tv_nsec = 100000000 }; sigset_t emptymask; sigemptyset(&emptymask); int ready = pselect(max_fd + 1, &read_fds, NULL, NULL, &ts, &emptymask); // Check shutdown after pselect (signal may have arrived during the call) if (r_shutting_down) { return ScalarInteger(-1); } // Timeout or interrupted -- return 0 (no client) if (ready <= 0) { return ScalarInteger(0); } // Accept new connection if server_fd is ready if (FD_ISSET(daemon->server_fd, &read_fds)) { int fd = accept(daemon->server_fd, NULL, NULL); if (fd >= 0) { fcntl(fd, F_SETFL, O_NONBLOCK); client_list_t* new_client = (client_list_t*)calloc(1, sizeof(client_list_t)); if (new_client == NULL) { close(fd); MORLOC_ERROR("calloc failed"); } new_client->fd = fd; new_client->next = NULL; if (daemon->client_fds == NULL) { daemon->client_fds = new_client; } else { client_list_t* last = daemon->client_fds; while (last->next) last = last->next; last->next = new_client; } } } // Return first ready client fd if (daemon->client_fds != NULL) { client_list_t* first = daemon->client_fds; int client_fd = first->fd; daemon->client_fds = first->next; free(first); return ScalarInteger(client_fd); } return ScalarInteger(0); } SEXP morloc_read_morloc_call_packet(SEXP packet_r) { MAYFAIL uint8_t* packet = RAW(packet_r); morloc_call_t* call_packet = R_TRY(read_morloc_call_packet, packet); // Create two element R list // 1: manifold id // 2: argument list of raw packets SEXP r_list = PROTECT(allocVector(VECSXP, 2)); // Convert midx to R integer SEXP r_mid = PROTECT(ScalarInteger(call_packet->midx)); // Create arguments list SEXP r_args = PROTECT(allocVector(VECSXP, call_packet->nargs)); for(size_t i = 0; i < call_packet->nargs; i++) { size_t arg_packet_size = R_TRY_WITH(UNPROTECT(3), morloc_packet_size, call_packet->args[i]); SEXP r_arg = PROTECT(allocVector(RAWSXP, arg_packet_size)); memcpy(RAW(r_arg), call_packet->args[i], arg_packet_size); SET_VECTOR_ELT(r_args, i, r_arg); UNPROTECT(1); // r_arg } // Assemble final list SET_VECTOR_ELT(r_list, 0, r_mid); SET_VECTOR_ELT(r_list, 1, r_args); free_morloc_call(call_packet); UNPROTECT(3); // r_list, r_mid, r_args return r_list; } SEXP morloc_send_packet_to_foreign_server(SEXP client_fd_r, SEXP packet_r) { MAYFAIL if (TYPEOF(client_fd_r) != INTSXP || LENGTH(client_fd_r) != 1) { MORLOC_ERROR("client_fd must be a single integer"); } if (TYPEOF(packet_r) != RAWSXP) { MORLOC_ERROR("packet must be a raw vector"); } // Extract arguments int client_fd = INTEGER(client_fd_r)[0]; uint8_t* packet = RAW(packet_r); size_t packet_size = (size_t)LENGTH(packet_r); // Call underlying implementation size_t bytes_sent = R_TRY(send_packet_to_foreign_server, client_fd, packet); // This could in theory be problematic, since int is smaller than size_t // In practice it should not be, since packets are typically small // However, if I refactor to send large packets in the future, this could be // problematic. Then I would need to convert to a double return. return ScalarInteger((int)bytes_sent); } // Read from socket returning raw vector of received data SEXP morloc_stream_from_client(SEXP client_fd_r) { MAYFAIL if (TYPEOF(client_fd_r) != INTSXP || LENGTH(client_fd_r) != 1) { MORLOC_ERROR("client_fd must be a single integer"); } int client_fd = INTEGER(client_fd_r)[0]; // Read packet from socket uint8_t* packet = R_TRY(stream_from_client, client_fd); // Read the packet size from the header (free packet before longjmp on error) size_t packet_size = R_TRY_WITH(free(packet), morloc_packet_size, packet); // Create raw vector for result SEXP result = PROTECT(allocVector(RAWSXP, packet_size)); memcpy(RAW(result), packet, packet_size); free(packet); UNPROTECT(1); return result; } // close_socket SEXP morloc_close_socket(SEXP socket_id_r) { if (TYPEOF(socket_id_r) != INTSXP || LENGTH(socket_id_r) != 1) { MORLOC_ERROR("socket_id must be a single integer"); } int socket_id = INTEGER(socket_id_r)[0]; close_socket(socket_id); // Return invisible NULL return R_NilValue; } // put_value SEXP morloc_put_value(SEXP obj_r, SEXP schema_str_r) { MAYFAIL if (TYPEOF(schema_str_r) != STRSXP || LENGTH(schema_str_r) != 1) { MORLOC_ERROR("schema must be a single string"); } const char* schema_cstr = CHAR(STRING_ELT(schema_str_r, 0)); char* schema_str = strdup(schema_cstr); Schema* schema = R_TRY_WITH(free(schema_str), parse_schema, schema_str); free(schema_str); // Arrow dispatch: if schema hint is "arrow", use Arrow C Data Interface if (schema->hint && strcmp(schema->hint, "arrow") == 0) { // Export R arrow RecordBatch via C Data Interface -> copy to shm -> packet // arrow::ExportRecordBatch(batch, array_ptr, schema_ptr) struct ArrowSchema arrow_schema; struct ArrowArray arrow_array; memset(&arrow_schema, 0, sizeof(arrow_schema)); memset(&arrow_array, 0, sizeof(arrow_array)); SEXP arrow_ns = PROTECT(R_FindNamespace(mkString("arrow"))); SEXP export_fn = PROTECT(findVarInFrame(arrow_ns, install("ExportRecordBatch"))); if (export_fn == R_UnboundValue) { UNPROTECT(2); free_schema(schema); MORLOC_ERROR("arrow::ExportRecordBatch not found; is the arrow package installed?"); } SEXP array_ptr_r = PROTECT(R_MakeExternalPtr(&arrow_array, R_NilValue, R_NilValue)); SEXP schema_ptr_r = PROTECT(R_MakeExternalPtr(&arrow_schema, R_NilValue, R_NilValue)); SEXP call = PROTECT(lang4(export_fn, obj_r, array_ptr_r, schema_ptr_r)); eval(call, arrow_ns); UNPROTECT(5); char* errmsg = NULL; relptr_t relptr = arrow_to_shm(&arrow_array, &arrow_schema, &errmsg); if (arrow_schema.release) arrow_schema.release(&arrow_schema); if (arrow_array.release) arrow_array.release(&arrow_array); if (errmsg) { free_schema(schema); MORLOC_ERROR("Arrow export failed: %s", errmsg); } uint8_t* packet = make_arrow_data_packet(relptr, schema); if (!packet) { free_schema(schema); MORLOC_ERROR("Failed to create arrow data packet"); } size_t packet_size = R_TRY_WITH({free(packet); free_schema(schema);}, morloc_packet_size, packet); SEXP result = PROTECT(allocVector(RAWSXP, packet_size)); memcpy(RAW(result), packet, packet_size); free(packet); free_schema(schema); UNPROTECT(1); return result; } void* voidstar = to_voidstar(obj_r, schema); if (!voidstar) { free_schema(schema); MORLOC_ERROR("Failed to convert R object to internal representation"); } relptr_t relptr = R_TRY_WITH(free_schema(schema), abs2rel, voidstar); uint8_t* packet = R_TRY_WITH(free_schema(schema), make_data_packet_auto, voidstar, relptr, schema); const morloc_packet_header_t* hdr = (const morloc_packet_header_t*)packet; if (hdr->command.data.source != PACKET_SOURCE_RPTR) { // Data inlined in packet -- free SHM immediately char* free_err = NULL; shfree_by_schema((absptr_t)voidstar, schema, &free_err); if (free_err) { free(free_err); free_err = NULL; } shfree((absptr_t)voidstar, &free_err); if (free_err) { free(free_err); } } size_t packet_size = R_TRY_WITH({free(packet); free_schema(schema);}, morloc_packet_size, packet); SEXP result = PROTECT(allocVector(RAWSXP, packet_size)); memcpy(RAW(result), packet, packet_size); free(packet); free_schema(schema); UNPROTECT(1); return result; } // mlc_show: serialize a value to a JSON string SEXP morloc_mlc_show(SEXP obj_r, SEXP schema_str_r) { MAYFAIL if (TYPEOF(schema_str_r) != STRSXP || LENGTH(schema_str_r) != 1) { MORLOC_ERROR("schema must be a single string"); } char* schema_str = strdup(CHAR(STRING_ELT(schema_str_r, 0))); Schema* schema = R_TRY_WITH(free(schema_str), parse_schema, schema_str); free(schema_str); void* voidstar = to_voidstar(obj_r, schema); if (!voidstar) { free_schema(schema); MORLOC_ERROR("Failed to convert R object to internal representation"); } char* json = R_TRY_WITH(free_schema(schema), mlc_show, voidstar, schema); { char* shfree_errmsg = NULL; shfree(voidstar, &shfree_errmsg); free(shfree_errmsg); } free_schema(schema); SEXP result = PROTECT(mkString(json)); free(json); UNPROTECT(1); return result; } SEXP morloc_get_value(SEXP packet_r, SEXP schema_str_r) { MAYFAIL if (TYPEOF(packet_r) != RAWSXP) { MORLOC_ERROR("packet must be a raw vector"); } if (TYPEOF(schema_str_r) != STRSXP || LENGTH(schema_str_r) != 1) { MORLOC_ERROR("schema must be a single string"); } // Extract arguments uint8_t* packet = RAW(packet_r); size_t packet_size = (size_t)LENGTH(packet_r); const morloc_packet_header_t* header = (const morloc_packet_header_t*)packet; uint8_t source = header->command.data.source; uint8_t format = header->command.data.format; const char* schema_cstr = CHAR(STRING_ELT(schema_str_r, 0)); char* schema_str = strdup(schema_cstr); Schema* schema = R_TRY_WITH(free(schema_str), parse_schema, schema_str); free(schema_str); // Arrow dispatch: if packet format is Arrow, import via C Data Interface if (format == PACKET_FORMAT_ARROW) { uint8_t* arrow_ptr = R_TRY_WITH(free_schema(schema), get_morloc_data_packet_value, packet, schema); const arrow_shm_header_t* arrow_hdr = (const arrow_shm_header_t*)arrow_ptr; struct ArrowSchema arrow_schema; struct ArrowArray arrow_array; char* arrow_err = NULL; arrow_from_shm(arrow_hdr, &arrow_schema, &arrow_array, &arrow_err); if (arrow_err) { if (arrow_schema.release) arrow_schema.release(&arrow_schema); if (arrow_array.release) arrow_array.release(&arrow_array); free_schema(schema); MORLOC_ERROR("Arrow import failed: %s", arrow_err); } // Import via R arrow package: arrow::ImportRecordBatch(array_ptr, schema_ptr) SEXP arrow_ns = PROTECT(R_FindNamespace(mkString("arrow"))); SEXP import_fn = PROTECT(findVarInFrame(arrow_ns, install("ImportRecordBatch"))); if (import_fn == R_UnboundValue) { if (arrow_schema.release) arrow_schema.release(&arrow_schema); if (arrow_array.release) arrow_array.release(&arrow_array); UNPROTECT(2); free_schema(schema); MORLOC_ERROR("arrow::ImportRecordBatch not found; is the arrow package installed?"); } SEXP array_ptr_r = PROTECT(R_MakeExternalPtr(&arrow_array, R_NilValue, R_NilValue)); SEXP schema_ptr_r = PROTECT(R_MakeExternalPtr(&arrow_schema, R_NilValue, R_NilValue)); SEXP call = PROTECT(lang3(import_fn, array_ptr_r, schema_ptr_r)); SEXP obj_r = PROTECT(eval(call, arrow_ns)); UNPROTECT(6); // Incref shm so data stays alive char* incref_err = NULL; shincref((absptr_t)arrow_ptr, &incref_err); if (incref_err) { free(incref_err); } free_schema(schema); return obj_r; } // Fast path: inline voidstar -- read directly from packet, no SHM needed if (source == PACKET_SOURCE_MESG && format == PACKET_FORMAT_VOIDSTAR) { const uint8_t* payload = packet + sizeof(morloc_packet_header_t) + header->offset; SEXP obj_r = from_voidstar((const void*)payload, schema, (const void*)payload); free_schema(schema); if (obj_r == NULL) { MORLOC_ERROR("Failed to convert internal representation to R object"); } return obj_r; } // SHM paths uint8_t* voidstar = R_TRY_WITH(free_schema(schema), get_morloc_data_packet_value, packet, schema); SEXP obj_r = from_voidstar(voidstar, schema, NULL); if (obj_r == NULL) { free_schema(schema); MORLOC_ERROR("Failed to convert internal representation to R object"); } free_schema(schema); return obj_r; } SEXP morloc_foreign_call(SEXP socket_path_r, SEXP mid_r, SEXP args_r) { MAYFAIL // Validate inputs if (TYPEOF(socket_path_r) != STRSXP || LENGTH(socket_path_r) != 1) { MORLOC_ERROR("socket_path must be a single string"); } if (TYPEOF(mid_r) != INTSXP || LENGTH(mid_r) != 1) { MORLOC_ERROR("mid must be a single integer"); } if (TYPEOF(args_r) != VECSXP) { MORLOC_ERROR("args must be a list of raw vectors"); } // Extract arguments const char* socket_path = CHAR(STRING_ELT(socket_path_r, 0)); int mid = INTEGER(mid_r)[0]; size_t nargs = (size_t)LENGTH(args_r); // Allocate temporary storage const uint8_t** arg_packets = (const uint8_t**)R_alloc(nargs, sizeof(uint8_t*)); // Convert R raw vectors to C buffers for (size_t i = 0; i < nargs; i++) { SEXP arg = VECTOR_ELT(args_r, i); if (TYPEOF(arg) != RAWSXP) { MORLOC_ERROR("All arguments must be raw vectors (argument %zu)", i+1); } arg_packets[i] = RAW(arg); } // Create call packet uint8_t* packet = R_TRY( make_morloc_local_call_packet, (uint32_t)mid, arg_packets, nargs ); // Send/receive over socket uint8_t* result = R_TRY_WITH(free(packet), send_and_receive_over_socket, socket_path, packet ); // Get result size size_t result_length = R_TRY_WITH({free(packet); free(result);}, morloc_packet_size, result); // Create result raw vector SEXP result_r = PROTECT(allocVector(RAWSXP, result_length)); memcpy(RAW(result_r), result, result_length); free(packet); free(result); // Cleanup UNPROTECT(1); return result_r; } SEXP morloc_is_ping(SEXP packet_r) { MAYFAIL if (TYPEOF(packet_r) != RAWSXP) { MORLOC_ERROR("packet must be a raw vector"); } bool is_ping = R_TRY(packet_is_ping, RAW(packet_r)); return ScalarLogical(is_ping); } SEXP morloc_is_local_call(SEXP packet_r) { MAYFAIL if (TYPEOF(packet_r) != RAWSXP) { MORLOC_ERROR("packet must be a raw vector"); } bool is_local_call = R_TRY(packet_is_local_call, RAW(packet_r)); return ScalarLogical(is_local_call); } SEXP morloc_is_remote_call(SEXP packet_r) { MAYFAIL if (TYPEOF(packet_r) != RAWSXP) { MORLOC_ERROR("packet must be a raw vector"); } bool is_remote_call = R_TRY(packet_is_remote_call, RAW(packet_r)); return ScalarLogical(is_remote_call); } SEXP morloc_pong(SEXP packet_r) { MAYFAIL if (TYPEOF(packet_r) != RAWSXP) { MORLOC_ERROR("packet must be a raw vector"); } // Generate a response to ping uint8_t* pong = R_TRY(return_ping, RAW(packet_r)); size_t pong_size = R_TRY_WITH(free(pong), morloc_packet_size, pong); SEXP result_r = PROTECT(allocVector(RAWSXP, pong_size)); memcpy(RAW(result_r), pong, pong_size); free(pong); UNPROTECT(1); return result_r; } SEXP morloc_make_fail_packet(SEXP failure_message_r) { MAYFAIL const char* failure_message = CHAR(STRING_ELT(failure_message_r, 0)); uint8_t* fail_packet = make_fail_packet(failure_message); size_t packet_size = R_TRY(morloc_packet_size, fail_packet); SEXP packet_r = PROTECT(allocVector(RAWSXP, packet_size)); memcpy(RAW(packet_r), fail_packet, packet_size); free(fail_packet); UNPROTECT(1); return packet_r; } SEXP extract_element_by_name(SEXP list, const char* key) { // Ensure inputs are correct types if (TYPEOF(list) != VECSXP) MORLOC_ERROR("Input must be a list"); // Get list names attribute SEXP names = Rf_getAttrib(list, R_NamesSymbol); if (names == R_NilValue) MORLOC_ERROR("List must have names"); // Iterate through list elements for (int i = 0; i < Rf_length(list); i++) { const char *current_name = CHAR(STRING_ELT(names, i)); if (strcmp(key, current_name) == 0) { return VECTOR_ELT(list, i); // Return matching element } } return R_NilValue; // Return NULL if name not found } SEXP morloc_remote_call(SEXP midx, SEXP socket_path, SEXP cache_path, SEXP resources, SEXP arg_packets) { MAYFAIL // Protect all R inputs immediately PROTECT(socket_path); PROTECT(cache_path); PROTECT(resources); PROTECT(arg_packets = coerceVector(arg_packets, VECSXP)); // Convert basic parameters int c_midx = INTEGER(midx)[0]; const char* c_socket_path = CHAR(STRING_ELT(socket_path, 0)); const char* c_cache_path = CHAR(STRING_ELT(cache_path, 0)); // Extract resources with validation resources_t c_resources; SEXP mem = extract_element_by_name(resources, "memory"); SEXP tim = extract_element_by_name(resources, "time"); SEXP cpu = extract_element_by_name(resources, "cpus"); SEXP gpu = extract_element_by_name(resources, "gpus"); if (mem == R_NilValue || tim == R_NilValue || cpu == R_NilValue || gpu == R_NilValue) { UNPROTECT(4); MORLOC_ERROR("Missing required resource field (memory, time, cpus, or gpus)"); } c_resources.memory = INTEGER(mem)[0]; c_resources.time = INTEGER(tim)[0]; c_resources.cpus = INTEGER(cpu)[0]; c_resources.gpus = INTEGER(gpu)[0]; // Process argument packets with type checking size_t nargs = LENGTH(arg_packets); const uint8_t** c_arg_packets = (const uint8_t**) R_alloc(nargs, sizeof(uint8_t*)); for(size_t i = 0; i < nargs; i++) { SEXP raw_vec = VECTOR_ELT(arg_packets, i); if(TYPEOF(raw_vec) != RAWSXP) { UNPROTECT(4); MORLOC_ERROR("arg_packets must contain only raw vectors"); } c_arg_packets[i] = (uint8_t*)RAW(raw_vec); } // Execute remote call uint8_t* result_packet = R_TRY_WITH(UNPROTECT(4), remote_call, c_midx, c_socket_path, c_cache_path, &c_resources, c_arg_packets, nargs ); // Validate and copy result size_t packet_size = R_TRY_WITH({free(result_packet); UNPROTECT(4);}, morloc_packet_size, result_packet); if(!result_packet || packet_size == 0) { if(result_packet) free(result_packet); UNPROTECT(4); MORLOC_ERROR("Invalid result packet from remote call"); } SEXP result_packet_r = PROTECT(allocVector(RAWSXP, packet_size)); memcpy(RAW(result_packet_r), result_packet, packet_size); free(result_packet); // Cleanup and return UNPROTECT(5); // socket_path, cache_path, resources, arg_packets, result_packet_r return result_packet_r; } // {{{ fork and fd-passing functions SEXP morloc_socketpair(void) { int sv[2]; if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) < 0) { error("socketpair failed: %s", strerror(errno)); } SEXP result = PROTECT(allocVector(INTSXP, 2)); INTEGER(result)[0] = sv[0]; INTEGER(result)[1] = sv[1]; UNPROTECT(1); return result; } SEXP morloc_fork(void) { pid_t pid = fork(); if (pid < 0) { error("fork failed: %s", strerror(errno)); } return ScalarInteger((int)pid); } // Immediately terminate the process without running any cleanup. // Must be used by forked worker children instead of R's quit(). // R's quit() runs finalizers that try to free objects allocated by the // parent process, which corrupts the heap on glibc >= 2.39. SEXP morloc_exit(SEXP status_r) { int status = INTEGER(status_r)[0]; _exit(status); return R_NilValue; // unreachable } SEXP morloc_send_fd(SEXP pipe_fd_r, SEXP client_fd_r) { int pipe_fd = INTEGER(pipe_fd_r)[0]; int client_fd = INTEGER(client_fd_r)[0]; struct msghdr msg = {0}; struct iovec iov; char buf[1] = {0}; char cmsgbuf[CMSG_SPACE(sizeof(int))]; iov.iov_base = buf; iov.iov_len = 1; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = cmsgbuf; msg.msg_controllen = sizeof(cmsgbuf); struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; cmsg->cmsg_len = CMSG_LEN(sizeof(int)); memcpy(CMSG_DATA(cmsg), &client_fd, sizeof(int)); ssize_t n = sendmsg(pipe_fd, &msg, 0); if (n < 0) { error("sendmsg SCM_RIGHTS failed: %s", strerror(errno)); } return R_NilValue; } SEXP morloc_recv_fd(SEXP pipe_fd_r) { int pipe_fd = INTEGER(pipe_fd_r)[0]; struct msghdr msg = {0}; struct iovec iov; char buf[1]; char cmsgbuf[CMSG_SPACE(sizeof(int))]; iov.iov_base = buf; iov.iov_len = 1; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = cmsgbuf; msg.msg_controllen = sizeof(cmsgbuf); ssize_t n = recvmsg(pipe_fd, &msg, 0); if (n <= 0) { return ScalarInteger(-1); } struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); if (cmsg == NULL || cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) { return ScalarInteger(-1); } int fd; memcpy(&fd, CMSG_DATA(cmsg), sizeof(int)); return ScalarInteger(fd); } SEXP morloc_kill(SEXP pid_r, SEXP sig_r) { pid_t pid = (pid_t)INTEGER(pid_r)[0]; int sig = INTEGER(sig_r)[0]; int ret = kill(pid, sig); return ScalarInteger(ret); } SEXP morloc_waitpid(SEXP pid_r) { pid_t pid = (pid_t)INTEGER(pid_r)[0]; int status; pid_t result = waitpid(pid, &status, WNOHANG); return ScalarInteger((int)result); } SEXP morloc_waitpid_blocking(SEXP pid_r) { pid_t pid = (pid_t)INTEGER(pid_r)[0]; int status; pid_t result = waitpid(pid, &status, 0); return ScalarInteger((int)result); } // }}} fork and fd-passing functions // {{{ shared counter functions (for dynamic worker spawning) static void shared_counter_finalizer(SEXP ptr) { int* p = (int*)R_ExternalPtrAddr(ptr); if (p != NULL) { munmap(p, sizeof(int)); R_ClearExternalPtr(ptr); } } SEXP morloc_shared_counter_create(void) { int* p = (int*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) { error("mmap failed for shared counter: %s", strerror(errno)); } *p = 0; SEXP ptr = PROTECT(R_MakeExternalPtr(p, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ptr, shared_counter_finalizer, TRUE); UNPROTECT(1); return ptr; } SEXP morloc_shared_counter_inc(SEXP ptr_r) { int* p = (int*)R_ExternalPtrAddr(ptr_r); if (p == NULL) error("shared counter is NULL"); int val = __atomic_add_fetch(p, 1, __ATOMIC_RELAXED); return ScalarInteger(val); } SEXP morloc_shared_counter_dec(SEXP ptr_r) { int* p = (int*)R_ExternalPtrAddr(ptr_r); if (p == NULL) error("shared counter is NULL"); int val = __atomic_sub_fetch(p, 1, __ATOMIC_RELAXED); return ScalarInteger(val); } SEXP morloc_shared_counter_read(SEXP ptr_r) { int* p = (int*)R_ExternalPtrAddr(ptr_r); if (p == NULL) error("shared counter is NULL"); int val = __atomic_load_n(p, __ATOMIC_RELAXED); return ScalarInteger(val); } SEXP morloc_pipe(void) { int fds[2]; if (pipe(fds) != 0) { error("pipe failed: %s", strerror(errno)); } SEXP result = PROTECT(allocVector(INTSXP, 2)); INTEGER(result)[0] = fds[0]; /* read end */ INTEGER(result)[1] = fds[1]; /* write end */ UNPROTECT(1); return result; } SEXP morloc_write_byte(SEXP fd_r, SEXP byte_r) { int fd = INTEGER(fd_r)[0]; unsigned char b = (unsigned char)RAW(byte_r)[0]; ssize_t n = write(fd, &b, 1); return ScalarInteger((int)n); } SEXP morloc_close_fd(SEXP fd_r) { int fd = INTEGER(fd_r)[0]; close(fd); return R_NilValue; } // }}} shared counter functions // {{{ C-level worker loop // Receive a file descriptor over a Unix domain socket (C-level helper). static int recv_fd_c(int pipe_fd) { struct msghdr msg = {0}; struct iovec iov; char buf[1]; char cmsgbuf[CMSG_SPACE(sizeof(int))]; iov.iov_base = buf; iov.iov_len = 1; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = cmsgbuf; msg.msg_controllen = sizeof(cmsgbuf); ssize_t n = recvmsg(pipe_fd, &msg, 0); if (n <= 0) return -1; struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); if (!cmsg || cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) return -1; int fd; memcpy(&fd, CMSG_DATA(cmsg), sizeof(int)); return fd; } // Send a fail packet to the client (best-effort, ignores send errors). static void send_fail_to_client(int client_fd, const char* msg) { char* errmsg = NULL; uint8_t* fail = make_fail_packet(msg); send_packet_to_foreign_server(client_fd, fail, &errmsg); free(fail); } // Dispatch a call to a manifold function. All packet handling is in C; // only the manifold evaluation crosses into R via R_tryEval. static void dispatch_manifold_c(int client_fd, const uint8_t* packet, SEXP dispatch, const char* label) { char* errmsg = NULL; morloc_call_t* call = read_morloc_call_packet(packet, &errmsg); if (errmsg) { send_fail_to_client(client_fd, errmsg); return; } int midx = (int)call->midx; SEXP fn = (midx >= 1 && midx <= LENGTH(dispatch)) ? VECTOR_ELT(dispatch, midx - 1) : R_NilValue; if (fn == R_NilValue) { char msg[128]; snprintf(msg, sizeof(msg), "%s function not found: m%d", label, midx); send_fail_to_client(client_fd, msg); free_morloc_call(call); return; } // Build R pairlist of raw-vector arguments: fn(arg1, arg2, ...) int nprotect = 0; SEXP pairlist = R_NilValue; for (int i = (int)call->nargs - 1; i >= 0; i--) { size_t arg_size = morloc_packet_size(call->args[i], &errmsg); if (errmsg) { UNPROTECT(nprotect); send_fail_to_client(client_fd, errmsg); free_morloc_call(call); return; } SEXP r_arg = PROTECT(allocVector(RAWSXP, arg_size)); nprotect++; memcpy(RAW(r_arg), call->args[i], arg_size); pairlist = PROTECT(Rf_cons(r_arg, pairlist)); nprotect++; } free_morloc_call(call); SEXP r_call = PROTECT(Rf_lcons(fn, pairlist)); nprotect++; // Single crossing into R: evaluate the manifold int eval_err = 0; SEXP result = R_tryEvalSilent(r_call, R_GlobalEnv, &eval_err); if (eval_err || result == R_NilValue || TYPEOF(result) != RAWSXP) { UNPROTECT(nprotect); send_fail_to_client(client_fd, eval_err ? R_curErrorBuf() : "manifold returned non-raw result"); return; } PROTECT(result); nprotect++; send_packet_to_foreign_server(client_fd, RAW(result), &errmsg); UNPROTECT(nprotect); } // Process one client job entirely in C. Only crosses into R for // the actual manifold evaluation. static void run_job_c(int client_fd, SEXP dispatch, SEXP remote_dispatch) { char* errmsg = NULL; uint8_t* packet = stream_from_client(client_fd, &errmsg); if (errmsg) { send_fail_to_client(client_fd, errmsg); free(errmsg); close_socket(client_fd); return; } bool is_local = packet_is_local_call(packet, &errmsg); if (!errmsg && is_local) { dispatch_manifold_c(client_fd, packet, dispatch, "Local"); } else if (!errmsg) { bool is_remote = packet_is_remote_call(packet, &errmsg); if (!errmsg && is_remote) { dispatch_manifold_c(client_fd, packet, remote_dispatch, "Remote"); } else if (!errmsg) { bool is_ping_pkt = packet_is_ping(packet, &errmsg); if (!errmsg && is_ping_pkt) { uint8_t* pong = return_ping(packet, &errmsg); if (!errmsg) { send_packet_to_foreign_server(client_fd, pong, &errmsg); free(pong); } } else if (!errmsg) { send_fail_to_client(client_fd, "Unexpected packet type"); } } } if (errmsg) { send_fail_to_client(client_fd, errmsg); } free(packet); close_socket(client_fd); } // Tight C worker loop. Receives fds from the job queue and processes them, // crossing into R only for manifold evaluation. SEXP morloc_worker_loop_c(SEXP pipe_fd_r, SEXP dispatch_r, SEXP remote_dispatch_r) { int pipe_fd = INTEGER(pipe_fd_r)[0]; PROTECT(dispatch_r); PROTECT(remote_dispatch_r); while (!r_shutting_down) { int client_fd = recv_fd_c(pipe_fd); if (client_fd < 0) break; run_job_c(client_fd, dispatch_r, remote_dispatch_r); fflush(stdout); } UNPROTECT(2); return R_NilValue; } // }}} C-level worker loop // }}} exported functions void R_init_rmorloc(DllInfo *info) { R_CallMethodDef callMethods[] = { {"morloc_start_daemon", (DL_FUNC) &morloc_start_daemon, 4}, {"morloc_wait_for_client", (DL_FUNC) &morloc_wait_for_client, 1}, {"morloc_read_morloc_call_packet", (DL_FUNC) &morloc_read_morloc_call_packet, 1}, {"morloc_send_packet_to_foreign_server", (DL_FUNC) &morloc_send_packet_to_foreign_server, 2}, {"morloc_stream_from_client", (DL_FUNC) &morloc_stream_from_client, 1}, {"morloc_close_socket", (DL_FUNC) &morloc_close_socket, 1}, {"morloc_foreign_call", (DL_FUNC) &morloc_foreign_call, 3}, {"morloc_get_value", (DL_FUNC) &morloc_get_value, 2}, {"morloc_put_value", (DL_FUNC) &morloc_put_value, 2}, {"morloc_mlc_show", (DL_FUNC) &morloc_mlc_show, 2}, {"morloc_is_ping", (DL_FUNC) &morloc_is_ping, 1}, {"morloc_is_local_call", (DL_FUNC) &morloc_is_local_call, 1}, {"morloc_is_remote_call", (DL_FUNC) &morloc_is_remote_call, 1}, {"morloc_remote_call", (DL_FUNC) &morloc_remote_call, 5}, {"morloc_pong", (DL_FUNC) &morloc_pong, 1}, {"morloc_make_fail_packet", (DL_FUNC) &morloc_make_fail_packet, 1}, {"morloc_shinit", (DL_FUNC) &morloc_shinit, 3}, {"morloc_socketpair", (DL_FUNC) &morloc_socketpair, 0}, {"morloc_fork", (DL_FUNC) &morloc_fork, 0}, {"morloc_exit", (DL_FUNC) &morloc_exit, 1}, {"morloc_send_fd", (DL_FUNC) &morloc_send_fd, 2}, {"morloc_recv_fd", (DL_FUNC) &morloc_recv_fd, 1}, {"morloc_kill", (DL_FUNC) &morloc_kill, 2}, {"morloc_waitpid", (DL_FUNC) &morloc_waitpid, 1}, {"morloc_waitpid_blocking", (DL_FUNC) &morloc_waitpid_blocking, 1}, {"morloc_install_sigterm_handler", (DL_FUNC) &morloc_install_sigterm_handler, 0}, {"morloc_set_line_buffered", (DL_FUNC) &morloc_set_line_buffered, 0}, {"morloc_is_shutting_down", (DL_FUNC) &morloc_is_shutting_down, 0}, {"morloc_detach_daemon", (DL_FUNC) &morloc_detach_daemon, 1}, {"morloc_shared_counter_create", (DL_FUNC) &morloc_shared_counter_create, 0}, {"morloc_shared_counter_inc", (DL_FUNC) &morloc_shared_counter_inc, 1}, {"morloc_shared_counter_dec", (DL_FUNC) &morloc_shared_counter_dec, 1}, {"morloc_shared_counter_read", (DL_FUNC) &morloc_shared_counter_read, 1}, {"morloc_pipe", (DL_FUNC) &morloc_pipe, 0}, {"morloc_write_byte", (DL_FUNC) &morloc_write_byte, 2}, {"morloc_close_fd", (DL_FUNC) &morloc_close_fd, 1}, {"morloc_worker_loop_c", (DL_FUNC) &morloc_worker_loop_c, 3}, {NULL, NULL, 0} }; R_registerRoutines(info, NULL, callMethods, NULL, NULL); R_useDynamicSymbols(info, FALSE); } ================================================ FILE: data/misc/mlccpptypes.hpp ================================================ ================================================ FILE: data/morloc/morloc.h ================================================ // morloc.h -- C ABI contract for libmorloc.so // // This is the single public header for consumers of the morloc runtime library. // It defines all types and function declarations exported by libmorloc.so. #ifndef __MORLOC_H__ #define __MORLOC_H__ #ifdef __cplusplus extern "C" { #endif // ======================================================================== // Section 1: System includes and basic typedefs // ======================================================================== #include // va_list, va_start, va_arg, va_end (used by pool templates) #include #include #include #include // FILE* for read_binary_fd #include // fd_set #include #include // pid_t, ssize_t #include // struct sockaddr_un // Error message output parameter: all fallible functions take this as their // last argument. On failure the callee sets *errmsg_ to a heap-allocated // string describing the error. On success *errmsg_ is set to NULL. typedef char** errmsg_; #define ERRMSG char** errmsg_ // Exit codes used by several functions. #define EXIT_PASS 0 #define EXIT_FAIL 1 // Convenience macros used by language extensions (pymorloc.c, rmorloc.c, cppmorloc.cpp) #include // free #define FREE(ptr) if(ptr != NULL){ free(ptr); ptr = NULL; } // Size limits shared between library and consumers. #define MAX_FILENAME_SIZE 128 #define MAX_ERRMSG_SIZE 1024 #define MAX_PATH_SIZE 512 #define BUFFER_SIZE 4096 // Opaque JSON builder handle (Rust uses its own struct internally). typedef void* json_buf_t; // ======================================================================== // Section 2: Memory / SHM types // ======================================================================== // Pointer types for the multi-volume shared memory pool. // // relptr_t -- index into the logical (multi-volume) pool; shared between // processes. // volptr_t -- index into a single volume (0 = first block after shm header). // absptr_t -- absolute virtual address in the current process. typedef ssize_t relptr_t; typedef ssize_t volptr_t; typedef void* absptr_t; #define VOLNULL -1 #define RELNULL -1 // Magic numbers for integrity checks. #define SHM_MAGIC 0xFECA0DF0 #define BLK_MAGIC 0x0CB10DF0 #define MAX_VOLUME_NUMBER 32 // Shared memory volume header (lives at the start of each mmap'd region). typedef struct shm_s { unsigned int magic; char volume_name[MAX_FILENAME_SIZE]; int volume_index; size_t volume_size; size_t relative_offset; // Note: pthread_rwlock_t is opaque; consumers should not access it directly. // It is included here so that sizeof(shm_t) is correct for mmap calculations. // On Linux x86_64 this is typically 56 bytes. char _rwlock_storage[56]; // placeholder for pthread_rwlock_t volptr_t cursor; } shm_t; // Block header preceding every allocation inside a shared memory volume. // Atomic reference count for thread safety. Layout is stable (no padding). typedef struct block_header_s { unsigned int magic; unsigned int reference_count; // actually _Atomic in the C impl size_t size; } block_header_t; // ======================================================================== // Section 3: Schema types // ======================================================================== typedef enum { MORLOC_NIL, MORLOC_BOOL, MORLOC_SINT8, MORLOC_SINT16, MORLOC_SINT32, MORLOC_SINT64, MORLOC_UINT8, MORLOC_UINT16, MORLOC_UINT32, MORLOC_UINT64, MORLOC_FLOAT32, MORLOC_FLOAT64, MORLOC_TENSOR, MORLOC_STRING, MORLOC_ARRAY, MORLOC_TUPLE, MORLOC_MAP, MORLOC_OPTIONAL } morloc_serial_type; // Single-character schema encoding tokens. #define SCHEMA_NIL 'z' #define SCHEMA_BOOL 'b' #define SCHEMA_SINT 'i' #define SCHEMA_UINT 'u' #define SCHEMA_FLOAT 'f' #define SCHEMA_STRING 's' #define SCHEMA_ARRAY 'a' #define SCHEMA_TENSOR 'T' #define SCHEMA_TUPLE 't' #define SCHEMA_MAP 'm' #define SCHEMA_OPTIONAL '?' // Schema: recursive type descriptor used for serialisation/deserialisation. struct Schema; typedef struct Schema { morloc_serial_type type; size_t size; // number of parameters size_t width; // bytes per element when stored in a fixed-width array size_t* offsets; // field offsets (tuples) or ndim (tensors, in offsets[0]) char* hint; struct Schema** parameters; char** keys; // field names (records only) } Schema; // Variable-length array in voidstar representation. typedef struct Array { size_t size; relptr_t data; } Array; // Dense N-dimensional tensor in voidstar representation (row-major / C order). typedef struct Tensor { size_t total_elements; uint32_t device_type; // reserved: 0 = CPU uint32_t device_id; // reserved: 0 relptr_t data; // relptr to contiguous element data relptr_t shape; // relptr to int64_t[ndim] } Tensor; // ======================================================================== // Section 4: Packet types // ======================================================================== #define MORLOC_PACKET_MAGIC 0x0707f86d // Packet type discriminator. typedef uint8_t command_type_t; #define PACKET_TYPE_DATA ((command_type_t)0) #define PACKET_TYPE_CALL ((command_type_t)1) #define PACKET_TYPE_PING ((command_type_t)2) // Packed command sub-structs (all 8 bytes wide). typedef struct __attribute__((packed)) packet_command_type_s { command_type_t type; uint8_t padding[7]; } packet_command_type_t; #define PACKET_ENTRYPOINT_LOCAL 0x00 #define PACKET_ENTRYPOINT_REMOTE_SFS 0x01 typedef struct __attribute__((packed)) packet_command_call_s { command_type_t type; uint8_t entrypoint; uint8_t padding[2]; uint32_t midx; } packet_command_call_t; // Data source, format, compression, encryption, and status constants. #define PACKET_SOURCE_MESG 0x00 #define PACKET_SOURCE_FILE 0x01 #define PACKET_SOURCE_RPTR 0x02 #define PACKET_FORMAT_JSON 0x00 #define PACKET_FORMAT_MSGPACK 0x01 #define PACKET_FORMAT_TEXT 0x02 #define PACKET_FORMAT_DATA 0x03 #define PACKET_FORMAT_VOIDSTAR 0x04 #define PACKET_FORMAT_ARROW 0x05 #define PACKET_COMPRESSION_NONE 0x00 #define PACKET_ENCRYPTION_NONE 0x00 #define PACKET_STATUS_PASS 0x00 #define PACKET_STATUS_FAIL 0x01 typedef struct __attribute__((packed)) packet_command_data_s { command_type_t type; uint8_t source; uint8_t format; uint8_t compression; uint8_t encryption; uint8_t status; uint8_t padding[2]; } packet_command_data_t; typedef struct __attribute__((packed)) packet_command_ping_s { command_type_t type; uint8_t padding[7]; } packet_command_ping_t; typedef union __attribute__((packed)) packet_command_u { packet_command_type_t cmd_type; packet_command_call_t call; packet_command_data_t data; packet_command_ping_t ping; } packet_command_t; // 32-byte packet header (stable binary format, packed). typedef struct __attribute__((packed)) morloc_packet_header_s { uint32_t magic; uint16_t plain; uint16_t version; uint16_t flavor; uint16_t mode; packet_command_t command; uint32_t offset; uint64_t length; } morloc_packet_header_t; // Inline threshold: voidstar data <= this size is embedded in packet payload. #define MORLOC_INLINE_THRESHOLD (64 * 1024) // Metadata sub-header in packet metadata sections. #define MORLOC_METADATA_TYPE_SCHEMA_STRING 0x01 #define MORLOC_METADATA_TYPE_XXHASH 0x02 #define MORLOC_METADATA_HEADER_MAGIC "mmh" typedef struct __attribute__((packed)) morloc_metadata_header_s { char magic[3]; uint8_t type; uint32_t size; } morloc_metadata_header_t; // ======================================================================== // Section 5: Expression / eval types // ======================================================================== typedef struct argument_s { char* value; char** fields; char** default_fields; size_t size; } argument_t; typedef enum { MORLOC_X_DAT, MORLOC_X_APP, MORLOC_X_LAM, MORLOC_X_BND, MORLOC_X_PAT, MORLOC_X_FMT, MORLOC_X_SHOW, MORLOC_X_READ } morloc_expression_type; typedef enum { APPLY_PATTERN, APPLY_LAMBDA, APPLY_FORMAT } morloc_app_expression_type; typedef enum { SELECT_BY_KEY, SELECT_BY_INDEX, SELECT_END } morloc_pattern_type; // Forward declarations. typedef struct morloc_expression_s morloc_expression_t; typedef struct morloc_app_expression_s morloc_app_expression_t; typedef struct morloc_lam_expression_s morloc_lam_expression_t; typedef struct morloc_data_s morloc_data_t; typedef struct morloc_pattern_s morloc_pattern_t; typedef union primitive_u { char* s; uint8_t z; bool b; int8_t i1; int16_t i2; int32_t i4; int64_t i8; uint8_t u1; uint16_t u2; uint32_t u4; uint64_t u8; float f4; double f8; } primitive_t; typedef struct morloc_data_array_s { Schema* schema; size_t size; morloc_expression_t** values; } morloc_data_array_t; typedef struct morloc_data_s { bool is_voidstar; union { primitive_t lit_val; morloc_expression_t** tuple_val; morloc_data_array_t* array_val; void* voidstar; } data; } morloc_data_t; typedef struct morloc_app_expression_s { morloc_app_expression_type type; union { morloc_pattern_t* pattern; morloc_lam_expression_t* lambda; char** fmt; } function; morloc_expression_t** args; size_t nargs; } morloc_app_expression_t; typedef struct morloc_lam_expression_s { size_t nargs; char** args; morloc_expression_t* body; } morloc_lam_expression_t; typedef struct morloc_pattern_s { morloc_pattern_type type; size_t size; union { size_t* indices; char** keys; } fields; morloc_pattern_t** selectors; } morloc_pattern_t; typedef struct morloc_expression_s { morloc_expression_type type; Schema* schema; union { morloc_app_expression_t* app_expr; morloc_lam_expression_t* lam_expr; char* bnd_expr; char** interpolation; morloc_pattern_t* pattern_expr; morloc_data_t* data_expr; morloc_expression_t* unary_expr; } expr; } morloc_expression_t; // ======================================================================== // Section 6: Manifest types // ======================================================================== typedef struct { char* lang; char** exec; // NULL-terminated array char* socket; // socket basename } manifest_pool_t; typedef enum { MARG_POS = 0, MARG_OPT, MARG_FLAG, MARG_GRP } manifest_arg_kind_t; typedef struct manifest_arg_s manifest_arg_t; typedef struct { char* key; manifest_arg_t* arg; } manifest_grp_entry_t; struct manifest_arg_s { manifest_arg_kind_t kind; char** desc; char* metavar; char* type_desc; bool quoted; char short_opt; char* long_opt; char* long_rev; char* default_val; char grp_short; char* grp_long; manifest_grp_entry_t* entries; size_t n_entries; }; typedef struct { char* name; char** desc; } manifest_cmd_group_t; typedef struct { char* name; bool is_pure; uint32_t mid; size_t pool_index; size_t* needed_pools; size_t n_needed_pools; char** arg_schemas; char* return_schema; char** desc; char* return_type; char** return_desc; manifest_arg_t* args; size_t n_args; morloc_expression_t* expr; char* group; } manifest_command_t; typedef struct { char* type; char* host; int port; char* socket; } manifest_service_t; typedef struct { int version; char* name; char* build_dir; manifest_pool_t* pools; size_t n_pools; manifest_command_t* commands; size_t n_commands; manifest_cmd_group_t* groups; size_t n_groups; manifest_service_t* service; } manifest_t; // ======================================================================== // Section 7: Daemon / HTTP / Router types // ======================================================================== // -- Call types -- typedef struct morloc_call_s { uint32_t midx; uint8_t** args; size_t nargs; int owns_args; } morloc_call_t; typedef struct client_list_s { int fd; struct client_list_s* next; } client_list_t; typedef struct language_daemon_s { char* socket_path; char* tmpdir; char* shm_basename; shm_t* shm; size_t shm_default_size; int server_fd; fd_set read_fds; client_list_t* client_fds; } language_daemon_t; typedef struct morloc_socket_s { char* lang; char** syscmd; char* socket_filename; int pid; } morloc_socket_t; // -- Binding store -- typedef struct binding_entry_s { uint64_t hash; char* expr; char* artifact_dir; char* type_sig; char** names; size_t n_names; } binding_entry_t; typedef struct binding_store_s { binding_entry_t* entries; size_t capacity; size_t count; char* base_dir; char* names_path; } binding_store_t; // -- Daemon config and request/response -- typedef void (*pool_check_fn_t)(morloc_socket_t* sockets, size_t n_pools); typedef bool (*pool_alive_fn_t)(size_t pool_index); typedef struct daemon_config_s { const char* unix_socket_path; int tcp_port; int http_port; pool_check_fn_t pool_check_fn; pool_alive_fn_t pool_alive_fn; size_t n_pools; int eval_timeout; } daemon_config_t; typedef enum { DAEMON_CALL, DAEMON_DISCOVER, DAEMON_HEALTH, DAEMON_EVAL, DAEMON_TYPECHECK, DAEMON_BIND, DAEMON_BINDINGS, DAEMON_UNBIND } daemon_method_t; typedef struct daemon_request_s { char* id; daemon_method_t method; char* command; char* args_json; char* expr; char* name; } daemon_request_t; typedef struct daemon_response_s { char* id; bool success; char* result_json; char* error; } daemon_response_t; // -- HTTP types -- typedef enum { HTTP_GET, HTTP_POST, HTTP_DELETE, HTTP_OPTIONS } http_method_t; typedef struct http_request_s { http_method_t method; char path[256]; char* body; size_t body_len; } http_request_t; // -- Router types -- typedef struct router_program_s { char* name; char* manifest_path; manifest_t* manifest; pid_t daemon_pid; char daemon_socket[sizeof(((struct sockaddr_un*)0)->sun_path)]; } router_program_t; typedef struct router_s { router_program_t* programs; size_t n_programs; char* fdb_path; } router_t; // ======================================================================== // Section 8: Pool types // ======================================================================== typedef uint8_t* (*pool_dispatch_fn_t)( uint32_t mid, const uint8_t** args, size_t nargs, void* ctx ); typedef enum { POOL_THREADS, POOL_FORK, POOL_SINGLE } pool_concurrency_t; typedef struct { pool_dispatch_fn_t local_dispatch; pool_dispatch_fn_t remote_dispatch; void* dispatch_ctx; pool_concurrency_t concurrency; int initial_workers; bool dynamic_scaling; void (*post_fork_child)(void* ctx); } pool_config_t; typedef struct pool_state_s pool_state_t; // ======================================================================== // Section 9: Arrow types // ======================================================================== #ifndef ARROW_C_DATA_INTERFACE #define ARROW_C_DATA_INTERFACE struct ArrowSchema { const char* format; const char* name; const char* metadata; int64_t flags; int64_t n_children; struct ArrowSchema** children; struct ArrowSchema* dictionary; void (*release)(struct ArrowSchema*); void* private_data; }; struct ArrowArray { int64_t length; int64_t null_count; int64_t offset; int64_t n_buffers; int64_t n_children; const void** buffers; struct ArrowArray** children; struct ArrowArray* dictionary; void (*release)(struct ArrowArray*); void* private_data; }; #endif // ARROW_C_DATA_INTERFACE #define ARROW_SHM_MAGIC 0xA770DA7A #define ARROW_BUFFER_ALIGN 64 #define ARROW_ALIGN_UP(x) (((x) + ARROW_BUFFER_ALIGN - 1) & ~((size_t)ARROW_BUFFER_ALIGN - 1)) typedef struct arrow_column_desc { morloc_serial_type type; uint64_t length; uint64_t null_count; uint32_t name_offset; uint16_t name_length; uint64_t data_offset; uint64_t data_size; } arrow_column_desc_t; typedef struct arrow_shm_header { uint32_t magic; uint32_t n_columns; uint64_t n_rows; uint64_t total_size; } arrow_shm_header_t; // ======================================================================== // Section 10: Slurm / resource types // ======================================================================== #define MAX_SLURM_COMMAND_LENGTH 1024 typedef struct resources_s { int memory; // in GB int time; // walltime in seconds int cpus; int gpus; } resources_t; // ======================================================================== // Section 11: Function declarations -- Memory / SHM // ======================================================================== shm_t* shinit(const char* shm_basename, size_t volume_index, size_t shm_size, ERRMSG); shm_t* shopen(size_t volume_index, ERRMSG); bool shclose(ERRMSG); void shm_set_fallback_dir(const char* dir); void* shmalloc(size_t size, ERRMSG); void* shmemcpy(void* src, size_t size, ERRMSG); bool shfree(absptr_t ptr, ERRMSG); bool shincref(absptr_t ptr, ERRMSG); bool shfree_by_schema(absptr_t ptr, const Schema* schema, ERRMSG); void* shcalloc(size_t nmemb, size_t size, ERRMSG); void* shrealloc(void* ptr, size_t size, ERRMSG); size_t total_shm_size(void); volptr_t rel2vol(relptr_t ptr, ERRMSG); absptr_t rel2abs(relptr_t ptr, ERRMSG); // Convenience: resolve a relptr, using base_ptr if available (no SHM lookup needed). static inline void* resolve_relptr(relptr_t relptr, const void* base_ptr, ERRMSG) { if (base_ptr) { return (char*)base_ptr + relptr; } return rel2abs(relptr, errmsg_); } relptr_t vol2rel(volptr_t ptr, shm_t* shm); absptr_t vol2abs(volptr_t ptr, shm_t* shm); relptr_t abs2rel(absptr_t ptr, ERRMSG); shm_t* abs2shm(absptr_t ptr, ERRMSG); block_header_t* abs2blk(void* ptr, ERRMSG); // ======================================================================== // Section 12: Function declarations -- Schema // ======================================================================== Schema* parse_schema(const char* schema, ERRMSG); char* schema_to_string(const Schema* schema); void* get_ptr(const Schema* schema, ERRMSG); void free_schema(Schema* schema); bool schema_is_fixed_width(const Schema* schema); size_t schema_alignment(const Schema* schema); size_t calculate_voidstar_size(const void* data, const Schema* schema, ERRMSG); // Inline helpers used by language extensions (pymorloc.c, rmorloc.c) #define ALIGN_UP(x, align) (((x) + (align) - 1) & ~((size_t)(align) - 1)) static inline size_t schema_tensor_ndim(const Schema* schema) { if (schema == NULL || schema->size == 0) return 0; // ndim is stored in offsets[0] for tensor schemas return schema->offsets ? schema->offsets[0] : 0; } // ======================================================================== // Section 13: Function declarations -- Serialisation (pack/unpack) // ======================================================================== int pack(const void* mlc, const char* schema_str, char** mpkptr, size_t* mpk_size, ERRMSG); int pack_with_schema(const void* mlc, const Schema* schema, char** mpkptr, size_t* mpk_size, ERRMSG); int unpack_with_schema(const char* mpk, size_t mpk_size, const Schema* schema, void** mlcptr, ERRMSG); // ======================================================================== // Section 14: Function declarations -- Packets // ======================================================================== morloc_packet_header_t* read_morloc_packet_header(const uint8_t* msg, ERRMSG); bool packet_is_ping(const uint8_t* packet, ERRMSG); bool packet_is_local_call(const uint8_t* packet, ERRMSG); bool packet_is_remote_call(const uint8_t* packet, ERRMSG); size_t morloc_packet_size_from_header(const morloc_packet_header_t* header); size_t morloc_packet_size(const uint8_t* packet, ERRMSG); uint8_t* return_ping(const uint8_t* packet, ERRMSG); uint8_t* make_ping_packet(void); uint8_t* make_standard_data_packet(relptr_t ptr, const Schema* schema); uint8_t* make_arrow_data_packet(relptr_t ptr, const Schema* schema); uint8_t* make_mpk_data_packet(const char* mpk_filename, const Schema* schema); uint8_t* make_data_packet_from_mpk(const char* mpk, size_t mpk_size, const Schema* schema); int get_data_packet_as_mpk(const uint8_t* packet, const Schema* schema, char** mpk_out, size_t* mpk_size_out, ERRMSG); char* read_schema_from_packet_meta(const uint8_t* packet, ERRMSG); uint8_t* make_fail_packet(const char* failure_message); char* get_morloc_data_packet_error_message(const uint8_t* data, ERRMSG); uint8_t* get_morloc_data_packet_value(const uint8_t* data, const Schema* schema, ERRMSG); uint8_t* make_morloc_local_call_packet(uint32_t midx, const uint8_t** arg_packets, size_t nargs, ERRMSG); uint8_t* make_morloc_remote_call_packet(uint32_t midx, const uint8_t** arg_packets, size_t nargs, ERRMSG); morloc_call_t* read_morloc_call_packet(const uint8_t* packet, ERRMSG); void free_morloc_call(morloc_call_t* call); int print_morloc_data_packet(const uint8_t* packet, const Schema* schema, ERRMSG); int flatten_voidstar_to_buffer(const void* data, const Schema* schema, uint8_t** out_buf, size_t* out_size, ERRMSG); uint8_t* make_data_packet_auto(void* voidstar, relptr_t relptr, const Schema* schema, ERRMSG); int adjust_voidstar_relptrs(void* data, const Schema* schema, relptr_t base_rel, ERRMSG); void* read_voidstar_binary(const uint8_t* blob, size_t blob_size, const Schema* schema, ERRMSG); bool parse_morloc_call_arguments(uint8_t* packet, uint8_t** args, size_t* nargs, ERRMSG); bool hash_morloc_packet(const uint8_t* packet, const Schema* schema, uint64_t seed, uint64_t* hash, ERRMSG); // ======================================================================== // Section 15: Function declarations -- Printing / output // ======================================================================== char* quoted(const char* input); bool print_voidstar(const void* voidstar, const Schema* schema, ERRMSG); bool pretty_print_voidstar(const void* voidstar, const Schema* schema, ERRMSG); bool print_arrow_as_json(const void* data, ERRMSG); bool print_arrow_as_table(const void* data, ERRMSG); bool print_hex_dump(const uint8_t* data, size_t size, ERRMSG); char* voidstar_to_json_string(const void* voidstar, const Schema* schema, ERRMSG); // ======================================================================== // Section 16: Function declarations -- Daemon / socket communication // ======================================================================== void close_socket(int socket_id); void close_daemon(language_daemon_t** daemon_ptr); language_daemon_t* start_daemon( const char* socket_path, const char* tmpdir, const char* shm_basename, size_t shm_default_size, ERRMSG); uint8_t* stream_from_client_wait(int client_fd, int pselect_timeout_us, int recv_timeout_us, ERRMSG); uint8_t* stream_from_client(int client_fd, ERRMSG); uint8_t* send_and_receive_over_socket_wait( const char* socket_path, const uint8_t* packet, int pselect_timeout_us, int recv_timeout_us, ERRMSG); uint8_t* send_and_receive_over_socket(const char* socket_path, const uint8_t* packet, ERRMSG); size_t send_packet_to_foreign_server(int client_fd, uint8_t* packet, ERRMSG); int wait_for_client_with_timeout(language_daemon_t* daemon, int timeout_us, ERRMSG); int wait_for_client(language_daemon_t* daemon, ERRMSG); // Daemon event loop and dispatch. void daemon_run(daemon_config_t* config, manifest_t* manifest, morloc_socket_t* sockets, size_t n_pools, const char* shm_basename); daemon_response_t* daemon_dispatch(manifest_t* manifest, daemon_request_t* request, morloc_socket_t* sockets, const char* shm_basename); daemon_request_t* daemon_parse_request(const char* json, size_t len, ERRMSG); daemon_response_t* daemon_parse_response(const char* json, size_t len, ERRMSG); char* daemon_serialize_response(daemon_response_t* response, size_t* out_len); char* daemon_build_discovery(manifest_t* manifest); void daemon_set_eval_timeout(int timeout_sec); void daemon_free_request(daemon_request_t* req); void daemon_free_response(daemon_response_t* resp); // Binding store (public types only; internal hash table functions are Rust-side). binding_store_t* binding_store_init(const char* base_dir); void binding_store_free(binding_store_t* store); // ======================================================================== // Section 17: Function declarations -- HTTP // ======================================================================== http_request_t* http_parse_request(int fd, ERRMSG); bool http_write_response(int fd, int status, const char* content_type, const char* body, size_t body_len); daemon_request_t* http_to_daemon_request(http_request_t* req, ERRMSG); void http_free_request(http_request_t* req); // ======================================================================== // Section 18: Function declarations -- Router // ======================================================================== router_t* router_init(const char* fdb_path, ERRMSG); void router_run(daemon_config_t* config, router_t* router); bool router_start_program(router_program_t* prog, ERRMSG); daemon_response_t* router_forward(router_t* router, const char* program, daemon_request_t* request, ERRMSG); char* router_build_discovery(router_t* router); void router_free(router_t* router); // ======================================================================== // Section 19: Function declarations -- Pool // ======================================================================== int pool_main(int argc, char** argv, pool_config_t* config); uint8_t* pool_dispatch_packet( const uint8_t* packet, pool_dispatch_fn_t local_dispatch, pool_dispatch_fn_t remote_dispatch, void* ctx); void pool_mark_busy(void); void pool_mark_idle(void); // ======================================================================== // Section 20: Function declarations -- Arrow // ======================================================================== size_t arrow_element_size(morloc_serial_type type); const char* arrow_format_string(morloc_serial_type type); morloc_serial_type arrow_format_to_type(const char* format); relptr_t arrow_to_shm(const struct ArrowArray* array, const struct ArrowSchema* schema, ERRMSG); int arrow_validate(const arrow_shm_header_t* header, const Schema* schema, ERRMSG); const void* arrow_column_data(const arrow_shm_header_t* header, uint32_t col_index); const arrow_column_desc_t* arrow_column_desc(const arrow_shm_header_t* header, uint32_t col_index); const char* arrow_column_name(const arrow_shm_header_t* header, uint32_t col_index); int arrow_from_shm(const arrow_shm_header_t* header, struct ArrowSchema* out_schema, struct ArrowArray* out_array, ERRMSG); // ======================================================================== // Section 21: Function declarations -- Cache // ======================================================================== char* put_cache_packet(const uint8_t* voidstar, const Schema* schema, uint64_t key, const char* cache_path, ERRMSG); uint8_t* get_cache_packet(uint64_t key, const char* cache_path, ERRMSG); bool del_cache_packet(uint64_t key, const char* cache_path, ERRMSG); char* check_cache_packet(uint64_t key, const char* cache_path, ERRMSG); // ======================================================================== // Section 22: Function declarations -- CLI / argument parsing // ======================================================================== argument_t* initialize_positional(char* value); argument_t* initialize_unrolled(size_t size, char* default_value, char** fields, char** default_fields); void free_argument_t(argument_t* arg); uint8_t* parse_cli_data_argument(uint8_t* dest, const argument_t* arg, const Schema* schema, ERRMSG); uint8_t* make_call_packet_from_cli( uint8_t* dest, uint32_t mid, argument_t** args, char** arg_schema_strs, ERRMSG); void* load_morloc_data_file(const char* path, uint8_t* data, size_t data_size, const Schema* schema, ERRMSG); // ======================================================================== // Section 23: Function declarations -- Expression evaluation // ======================================================================== morloc_expression_t* make_morloc_bound_var(const char* schema_str, char* varname, ERRMSG); morloc_expression_t* make_morloc_literal(const char* schema_str, primitive_t lit, ERRMSG); morloc_expression_t* make_morloc_pattern(const char* schema_str, morloc_pattern_t* pattern, ERRMSG); morloc_pattern_t* make_morloc_pattern_end(void); absptr_t morloc_eval( morloc_expression_t* expr, Schema* return_schema, uint8_t** arg_voidstar, Schema** arg_schemas, size_t nargs, ERRMSG); // ======================================================================== // Section 24: Function declarations -- Manifest // ======================================================================== manifest_t* parse_manifest(const char* text, ERRMSG); manifest_t* read_manifest(const char* path, ERRMSG); void free_manifest(manifest_t* manifest); morloc_expression_t* build_manifest_expr(const char* json_str, ERRMSG); char* manifest_to_discovery_json(const manifest_t* manifest); // ======================================================================== // Section 25: Function declarations -- Intrinsics // ======================================================================== int mlc_save(const absptr_t data, const Schema* schema, const char* path, ERRMSG); int mlc_save_json(const absptr_t data, const Schema* schema, const char* path, ERRMSG); int mlc_save_voidstar(const absptr_t data, const Schema* schema, const char* path, ERRMSG); void* mlc_load(const char* path, const Schema* schema, ERRMSG); char* mlc_hash(const absptr_t data, const Schema* schema, ERRMSG); char* mlc_show(const absptr_t data, const Schema* schema, ERRMSG); void* mlc_read(const char* json_str, const Schema* schema, ERRMSG); relptr_t write_voidstar_binary(int fd, const void* data, const Schema* schema, ERRMSG); // ======================================================================== // Section 26: Function declarations -- Slurm // ======================================================================== size_t parse_slurm_time(const char* time_str, ERRMSG); char* write_slurm_time(int seconds); bool slurm_job_is_complete(uint32_t job_id); uint32_t submit_morloc_slurm_job( const char* nexus_path, const char* socket_basename, const char* call_packet_filename, const char* result_cache_filename, const char* output_filename, const char* error_filename, const resources_t* resources, ERRMSG); uint8_t* remote_call( int midx, const char* socket_basename, const char* cache_path, const resources_t* resources, const uint8_t** arg_packets, size_t nargs, ERRMSG); // ======================================================================== // Section 27: Function declarations -- Utility // ======================================================================== void hex(const void* ptr, size_t size); bool file_exists(const char* filename); int mkdir_p(const char* path, ERRMSG); void delete_directory(const char* path); bool has_suffix(const char* x, const char* suffix); int write_atomic(const char* filename, const uint8_t* data, size_t size, ERRMSG); int write_binary_fd(int fd, const char* buf, size_t count, ERRMSG); int print_binary(const char* buf, size_t count, ERRMSG); uint8_t* read_binary_fd(FILE* file, size_t* file_size, ERRMSG); uint8_t* read_binary_file(const char* filename, size_t* file_size, ERRMSG); // ======================================================================== // Section 28: Function declarations -- Hashing // ======================================================================== uint64_t morloc_xxh64(const void* input, size_t length, uint64_t seed); // ======================================================================== // Section 29: Function declarations -- JSON reader // ======================================================================== uint8_t* read_json_with_schema(uint8_t* voidstar, char* json_data, const Schema* schema, ERRMSG); #ifdef __cplusplus } #endif #endif // __MORLOC_H__ ================================================ FILE: data/rust/.gitignore ================================================ target/ ================================================ FILE: data/rust/Cargo.toml ================================================ [workspace] members = ["morloc-manifest", "morloc-runtime", "morloc-nexus", "morloc-manager"] resolver = "2" [workspace.dependencies] libc = "0.2" serde = { version = "1", features = ["derive"] } serde_json = "1" rmp-serde = "1" twox-hash = "2" nix = { version = "0.29", features = ["signal", "socket", "mman", "process", "fs"] } clap = { version = "4", features = ["derive"] } thiserror = "2" [profile.release] opt-level = 2 lto = "thin" ================================================ FILE: data/rust/morloc-manager/Cargo.toml ================================================ [package] name = "morloc-manager" version = "0.23.1" edition = "2021" description = "Container lifecycle manager for Morloc" [[bin]] name = "morloc-manager" path = "src/main.rs" [dependencies] serde = { workspace = true } serde_json = { workspace = true } nix = { version = "0.29", features = ["signal", "socket", "mman", "process", "fs", "user"] } clap = { workspace = true } thiserror = { workspace = true } sha2 = "0.10" chrono = { version = "0.4", features = ["serde"] } dirs = "6" [dev-dependencies] tempfile = "3" ================================================ FILE: data/rust/morloc-manager/src/config.rs ================================================ use std::fs; use std::os::unix::fs::{OpenOptionsExt, PermissionsExt}; use std::path::{Path, PathBuf}; use std::process::Command as StdCommand; use crate::error::{ManagerError, Result}; use crate::types::*; // ====================================================================== // Path utilities // ====================================================================== pub fn config_dir(scope: Scope) -> PathBuf { match scope { Scope::Local => dirs::config_dir() .unwrap_or_else(|| PathBuf::from("~/.config")) .join("morloc"), Scope::System => PathBuf::from("/etc/morloc"), } } pub fn config_path(scope: Scope) -> PathBuf { config_dir(scope).join("config.json") } pub fn data_dir(scope: Scope) -> PathBuf { match scope { Scope::Local => dirs::data_dir() .unwrap_or_else(|| PathBuf::from("~/.local/share")) .join("morloc"), Scope::System => PathBuf::from("/usr/local/share/morloc"), } } // Environment paths pub fn env_config_dir(scope: Scope, name: &str) -> PathBuf { config_dir(scope).join("environments").join(name) } pub fn env_config_path(scope: Scope, name: &str) -> PathBuf { env_config_dir(scope, name).join("env.json") } pub fn env_dockerfile_path(scope: Scope, name: &str) -> PathBuf { env_config_dir(scope, name).join("Dockerfile") } pub fn env_flags_path(scope: Scope, name: &str) -> PathBuf { env_config_dir(scope, name).join("env.flags") } pub fn env_data_dir(scope: Scope, name: &str) -> PathBuf { data_dir(scope).join("environments").join(name) } // ====================================================================== // Reading configuration // ====================================================================== pub fn read_config(path: &Path) -> Result { let bytes = fs::read(path).map_err(|e| { if e.kind() == std::io::ErrorKind::PermissionDenied { ManagerError::ConfigPermissionDenied(path.display().to_string()) } else { ManagerError::ConfigNotFound(path.display().to_string()) } })?; serde_json::from_slice(&bytes).map_err(|e| ManagerError::ConfigParseError { path: path.display().to_string(), msg: e.to_string(), }) } pub fn read_active_config() -> Option { let local_path = config_path(Scope::Local); if let Ok(cfg) = read_config::(&local_path) { return Some(cfg); } let system_path = config_path(Scope::System); read_config::(&system_path).ok() } pub fn read_env_config(scope: Scope, name: &str) -> Result { read_config(&env_config_path(scope, name)) } // ====================================================================== // Writing configuration // ====================================================================== pub fn write_config(path: &Path, val: &T) -> Result<()> { let dir = path.parent().unwrap(); fs::create_dir_all(dir).map_err(|e| ManagerError::ConfigParseError { path: path.display().to_string(), msg: e.to_string(), })?; best_effort_chmod(dir, 0o755); let lock_path = format!("{}.lock", path.display()); with_file_lock(&lock_path, || { // Atomic write: temp file then rename let tmp_path = path.with_extension("tmp"); let json = serde_json::to_vec(val).map_err(|e| ManagerError::ConfigParseError { path: path.display().to_string(), msg: e.to_string(), })?; fs::write(&tmp_path, &json).map_err(|e| ManagerError::ConfigParseError { path: path.display().to_string(), msg: e.to_string(), })?; fs::rename(&tmp_path, path).map_err(|e| ManagerError::ConfigParseError { path: path.display().to_string(), msg: e.to_string(), })?; best_effort_chmod(path, 0o644); Ok(()) }) } pub fn write_env_config(scope: Scope, name: &str, ec: &EnvironmentConfig) -> Result<()> { write_config(&env_config_path(scope, name), ec) } // ====================================================================== // Scope utilities // ====================================================================== /// Find which scope an environment lives in. Checks local first, then system. pub fn find_env_scope(name: &str) -> Result { let local_path = env_config_path(Scope::Local, name); if local_path.is_file() { return Ok(Scope::Local); } let sys_path = env_config_path(Scope::System, name); if sys_path.is_file() { return Ok(Scope::System); } Err(ManagerError::EnvironmentNotFound(name.to_string())) } /// List environment names in a given scope. pub fn list_env_names(scope: Scope) -> Vec { let env_dir = config_dir(scope).join("environments"); if !env_dir.is_dir() { return Vec::new(); } let Ok(entries) = fs::read_dir(&env_dir) else { return Vec::new(); }; entries .filter_map(|e| e.ok()) .filter(|e| e.path().join("env.json").is_file()) .filter_map(|e| e.file_name().into_string().ok()) .collect() } // ====================================================================== // Flags files // ====================================================================== pub fn read_flags_file(path: &Path) -> Vec { let Ok(contents) = fs::read_to_string(path) else { return Vec::new(); }; contents .lines() .map(|line| line.trim()) .filter(|line| !line.is_empty() && !line.starts_with('#')) .flat_map(shell_expand_line) .collect() } /// Expand a single flagfile line through the shell, getting glob expansion, /// environment variable expansion, tilde expansion, and quote handling. /// Falls back to simple whitespace splitting if the shell invocation fails. fn shell_expand_line(line: &str) -> Vec { let output = StdCommand::new("sh") .args(["-c", &format!("printf '%s\\0' {}", line)]) .output(); match output { Ok(out) if out.status.success() => { let stdout = String::from_utf8_lossy(&out.stdout); let tokens: Vec = stdout .split('\0') .filter(|s| !s.is_empty()) .map(|s| s.to_string()) .collect(); if tokens.is_empty() { line.split_whitespace().map(|s| s.to_string()).collect() } else { tokens } } _ => line.split_whitespace().map(|s| s.to_string()).collect(), } } /// Read flags file preserving one line per entry (for display). pub fn read_flags_file_lines(path: &Path) -> Vec { let Ok(contents) = fs::read_to_string(path) else { return Vec::new(); }; contents .lines() .map(|line| line.trim()) .filter(|line| !line.is_empty() && !line.starts_with('#')) .map(|s| s.to_string()) .collect() } // ====================================================================== // File locking // ====================================================================== fn with_file_lock(lock_path: &str, action: F) -> Result where F: FnOnce() -> Result, { if let Some(parent) = Path::new(lock_path).parent() { let _ = fs::create_dir_all(parent); } let file = std::fs::OpenOptions::new() .write(true) .create(true) .truncate(false) .mode(0o644) .open(lock_path) .map_err(|e| { if e.kind() == std::io::ErrorKind::PermissionDenied { ManagerError::ConfigPermissionDenied(format!( "{}. Use sudo for system-scope operations", lock_path )) } else { ManagerError::ConfigParseError { path: lock_path.to_string(), msg: format!("Failed to open lock file: {e}"), } } })?; use nix::fcntl::FlockArg; let locked = nix::fcntl::Flock::lock(file, FlockArg::LockExclusive).map_err( |(_file, errno)| ManagerError::ConfigParseError { path: lock_path.to_string(), msg: format!("Failed to acquire lock: {errno}"), }, )?; let result = action(); // Lock is released when Flock is dropped drop(locked); result } // ====================================================================== // Internal // ====================================================================== fn best_effort_chmod(path: &Path, mode: u32) { let _ = fs::set_permissions(path, fs::Permissions::from_mode(mode)); } ================================================ FILE: data/rust/morloc-manager/src/container.rs ================================================ use std::io; use std::process::{Command, ExitStatus, Stdio}; use crate::types::ContainerEngine; // ====================================================================== // Configuration records // ====================================================================== #[derive(Debug, Clone)] pub struct RunConfig { pub image: String, pub bind_mounts: Vec<(String, String)>, pub ports: Vec<(u16, u16)>, pub env: Vec<(String, String)>, pub read_only: bool, pub interactive: bool, pub remove_after: bool, pub name: Option, pub shm_size: Option, pub command: Option>, pub work_dir: Option, pub selinux_suffix: String, pub extra_flags: Vec, } impl RunConfig { pub fn new(image: &str) -> Self { Self { image: image.to_string(), bind_mounts: Vec::new(), ports: Vec::new(), env: Vec::new(), read_only: false, interactive: false, remove_after: true, name: None, shm_size: None, command: None, work_dir: None, selinux_suffix: String::new(), extra_flags: Vec::new(), } } } #[derive(Debug, Clone)] pub struct BuildConfig { pub dockerfile: String, pub context: String, pub tag: String, pub build_args: Vec<(String, String)>, } // ====================================================================== // Engine detection // ====================================================================== pub fn engine_executable(engine: ContainerEngine) -> &'static str { match engine { ContainerEngine::Docker => "docker", ContainerEngine::Podman => "podman", } } // ====================================================================== // Operations // ====================================================================== pub fn container_run(engine: ContainerEngine, cfg: &RunConfig) -> (ExitStatus, String, String) { let exe = engine_executable(engine); let extra = engine_specific_run_flags_io(engine); let args = build_run_args(engine, &extra, cfg); run_process(exe, &args) } /// Like `container_run` but captures both stdout and stderr (no streaming). pub fn container_run_quiet(engine: ContainerEngine, cfg: &RunConfig) -> (ExitStatus, String, String) { let exe = engine_executable(engine); let extra = engine_specific_run_flags_io(engine); let args = build_run_args(engine, &extra, cfg); run_process_quiet(exe, &args) } pub fn container_run_passthrough( engine: ContainerEngine, verbose: bool, shell: bool, cfg: &RunConfig, ) -> ExitStatus { let exe = engine_executable(engine); let extra = engine_specific_run_flags_io(engine); let args = build_run_args(engine, &extra, cfg); if verbose || shell { let quoted: Vec = args .iter() .map(|a| { if a.contains(' ') { format!("'{a}'") } else { a.clone() } }) .collect(); eprintln!("[morloc-manager] {exe} {}", quoted.join(" ")); } Command::new(exe) .args(&args) .stdin(Stdio::inherit()) .stdout(Stdio::inherit()) .stderr(Stdio::inherit()) .status() .unwrap_or_else(|_| std::process::exit(1)) } pub fn container_build(engine: ContainerEngine, cfg: &BuildConfig) -> (ExitStatus, String, String) { let exe = engine_executable(engine); let args = build_build_args(cfg); run_process(exe, &args) } pub fn container_pull(engine: ContainerEngine, image: &str) -> (ExitStatus, String, String) { let exe = engine_executable(engine); run_process(exe, &["pull".to_string(), image.to_string()]) } /// Build a container image with all output (stdout+stderr) redirected to stderr. /// Use for IO () commands where stdout must stay clean. pub fn container_build_visible(engine: ContainerEngine, cfg: &BuildConfig) -> ExitStatus { let exe = engine_executable(engine); let args = build_build_args(cfg); run_process_to_stderr(exe, &args) } /// Pull a container image with all output (stdout+stderr) redirected to stderr. /// Use for IO () commands where stdout must stay clean. pub fn container_pull_visible(engine: ContainerEngine, image: &str) -> ExitStatus { let exe = engine_executable(engine); run_process_to_stderr(exe, &["pull".to_string(), image.to_string()]) } pub fn image_exists_locally(engine: ContainerEngine, image: &str) -> bool { let exe = engine_executable(engine); Command::new(exe) .args(["image", "inspect", image]) .stdout(Stdio::null()) .stderr(Stdio::null()) .status() .map(|s| s.success()) .unwrap_or(false) } /// Run `image inspect` and return the stderr if it fails. /// Returns None on success, Some(stderr) on failure. pub fn image_inspect_stderr(engine: ContainerEngine, image: &str) -> Option { let exe = engine_executable(engine); let output = Command::new(exe) .args(["image", "inspect", image]) .stdout(Stdio::null()) .output() .ok()?; if output.status.success() { None } else { Some(String::from_utf8_lossy(&output.stderr).to_string()) } } /// Result of checking whether a remote image exists. pub enum RemoteImageStatus { /// The image exists on the registry. Exists, /// The registry was reached but the image/tag was not found. NotFound, /// The check failed for an unknown reason (network, auth, etc). /// Contains the stderr output from the container engine. Unknown(String), } pub fn check_remote_image(engine: ContainerEngine, image: &str) -> RemoteImageStatus { let exe = engine_executable(engine); let output = Command::new(exe) .args(["manifest", "inspect", image]) .stdout(Stdio::null()) .output(); match output { Ok(o) if o.status.success() => RemoteImageStatus::Exists, Ok(o) => { let stderr = String::from_utf8_lossy(&o.stderr).to_string(); let lower = stderr.to_lowercase(); // "manifest unknown" / "not found" / "name unknown" indicate // the registry was reachable but the image doesn't exist. if lower.contains("manifest unknown") || lower.contains("not found") || lower.contains("name unknown") { RemoteImageStatus::NotFound } else { RemoteImageStatus::Unknown(stderr) } } Err(e) => RemoteImageStatus::Unknown(format!("Failed to execute {exe}: {e}")), } } pub fn container_stop(engine: ContainerEngine, name_or_id: &str) -> (ExitStatus, String) { let exe = engine_executable(engine); let (code, _, err) = run_process(exe, &["stop".to_string(), name_or_id.to_string()]); (code, err) } pub fn container_remove(engine: ContainerEngine, name_or_id: &str) -> ExitStatus { let exe = engine_executable(engine); let (code, _, _) = run_process(exe, &["rm".to_string(), "-f".to_string(), name_or_id.to_string()]); code } /// Quiet container removal: suppresses stderr (for pre-emptive cleanup). pub fn container_remove_quiet(engine: ContainerEngine, name_or_id: &str) -> ExitStatus { let exe = engine_executable(engine); let (code, _, _) = run_process_quiet(exe, &["rm".to_string(), "-f".to_string(), name_or_id.to_string()]); code } /// Check whether a container with this name exists (running or stopped). pub fn container_exists(engine: ContainerEngine, name: &str) -> bool { let exe = engine_executable(engine); Command::new(exe) .args(["container", "inspect", name]) .stdout(Stdio::null()) .stderr(Stdio::null()) .status() .map(|s| s.success()) .unwrap_or(false) } pub fn remove_image(engine: ContainerEngine, tag: &str) -> bool { let exe = engine_executable(engine); let (status, _, _) = run_process(exe, &["rmi".to_string(), tag.to_string()]); status.success() } // ====================================================================== // CLI argument construction // ====================================================================== pub fn build_run_args( engine: ContainerEngine, extra_engine_flags: &[String], cfg: &RunConfig, ) -> Vec { let mut args = vec!["run".to_string()]; args.extend(extra_engine_flags.iter().cloned()); if cfg.remove_after { args.push("--rm".to_string()); } if cfg.read_only { args.push("--read-only".to_string()); // Docker does not auto-mount a tmpfs at /tmp when --read-only is used // (podman does). Pool daemons need a writable /tmp for temp files. if engine == ContainerEngine::Docker { args.push("--tmpfs".to_string()); args.push("/tmp".to_string()); } } // Always attach stdin so piped input works; only allocate a TTY for // interactive (shell) sessions. args.push("-i".to_string()); if cfg.interactive { args.push("-t".to_string()); } if let Some(ref n) = cfg.name { args.push("--name".to_string()); args.push(n.clone()); } if let Some(ref s) = cfg.shm_size { args.push("--shm-size".to_string()); args.push(s.clone()); } if let Some(ref w) = cfg.work_dir { args.push("-w".to_string()); args.push(w.clone()); } for (host, container) in &cfg.bind_mounts { args.push("-v".to_string()); args.push(format!("{host}:{container}{}", cfg.selinux_suffix)); } for (host_port, container_port) in &cfg.ports { args.push("-p".to_string()); args.push(format!("{host_port}:{container_port}")); } for (key, val) in &cfg.env { args.push("-e".to_string()); args.push(format!("{key}={val}")); } args.extend(cfg.extra_flags.iter().cloned()); args.push(cfg.image.clone()); if let Some(ref cmd) = cfg.command { args.extend(cmd.iter().cloned()); } args } pub fn engine_specific_run_flags_io(engine: ContainerEngine) -> Vec { let uid = nix::unistd::getuid(); match engine { ContainerEngine::Podman => { if uid.is_root() { Vec::new() } else { vec!["--userns=keep-id".to_string()] } } ContainerEngine::Docker => { if uid.is_root() { Vec::new() } else { let gid = nix::unistd::getgid(); vec!["--user".to_string(), format!("{}:{}", uid, gid)] } } } } /// Pure version for testing. #[cfg(test)] pub fn engine_specific_run_flags(engine: ContainerEngine) -> Vec { match engine { ContainerEngine::Podman => vec!["--userns=keep-id".to_string()], ContainerEngine::Docker => Vec::new(), } } pub fn build_build_args(cfg: &BuildConfig) -> Vec { let mut args = vec![ "build".to_string(), "-f".to_string(), cfg.dockerfile.clone(), "-t".to_string(), cfg.tag.clone(), ]; for (key, val) in &cfg.build_args { args.push("--build-arg".to_string()); args.push(format!("{key}={val}")); } args.push(cfg.context.clone()); args } // ====================================================================== // Process execution // ====================================================================== /// Run a process with both stdout and stderr redirected to our stderr. /// Returns only the exit status. Use for IO () commands where morloc-manager's /// stdout must stay clean but the user should see all container output. fn run_process_to_stderr(exe: &str, args: &[String]) -> ExitStatus { let mut child = Command::new(exe) .args(args) .stdin(Stdio::null()) .stdout(Stdio::piped()) .stderr(Stdio::inherit()) .spawn() .unwrap_or_else(|e| { eprintln!("Failed to execute {exe}: {e}"); std::process::exit(1); }); // Pump child stdout -> our stderr if let Some(mut child_stdout) = child.stdout.take() { let stderr = io::stderr(); let _ = io::copy(&mut child_stdout, &mut stderr.lock()); } child.wait().unwrap_or_else(|e| { eprintln!("Failed to wait for {exe}: {e}"); std::process::exit(1); }) } /// Run a process with stderr streamed live to the terminal. /// Returns (exit_status, captured_stdout, ""). fn run_process(exe: &str, args: &[String]) -> (ExitStatus, String, String) { let output = Command::new(exe) .args(args) .stdin(Stdio::null()) .stdout(Stdio::piped()) .stderr(Stdio::inherit()) .output() .unwrap_or_else(|e| { eprintln!("Failed to execute {exe}: {e}"); std::process::exit(1); }); ( output.status, String::from_utf8_lossy(&output.stdout).to_string(), String::new(), ) } /// Run a process with all output captured (no streaming). /// Used when stderr must be parsed (e.g., for error classification). fn run_process_quiet(exe: &str, args: &[String]) -> (ExitStatus, String, String) { let output = Command::new(exe) .args(args) .stdin(Stdio::null()) .output() .unwrap_or_else(|e| { eprintln!("Failed to execute {exe}: {e}"); std::process::exit(1); }); ( output.status, String::from_utf8_lossy(&output.stdout).to_string(), String::from_utf8_lossy(&output.stderr).to_string(), ) } // ====================================================================== // Helpers // ====================================================================== pub fn exit_code_to_int(status: ExitStatus) -> i32 { status.code().unwrap_or(1) } ================================================ FILE: data/rust/morloc-manager/src/doctor.rs ================================================ use std::fs; use std::path::Path; use std::process::Command; use crate::config as cfg; use crate::container::{container_run_quiet, engine_executable, image_exists_locally, RunConfig}; use crate::environment; use crate::error::Result; use crate::types::*; const MANIFEST_MARKER: &str = "### MANIFEST ###"; #[derive(serde::Serialize)] pub struct CheckResult { pub category: String, pub result: String, pub message: String, } #[derive(serde::Serialize)] pub struct DoctorSummary { pub ok: u32, pub warnings: u32, pub errors: u32, } struct Counts { ok: u32, warn: u32, fail: u32, json_mode: bool, current_category: String, checks: Vec, } impl Counts { fn new(json_mode: bool) -> Self { Self { ok: 0, warn: 0, fail: 0, json_mode, current_category: String::new(), checks: Vec::new() } } fn set_category(&mut self, cat: &str) { self.current_category = cat.to_string(); } fn pass(&mut self, msg: &str) { self.ok += 1; if self.json_mode { self.checks.push(CheckResult { category: self.current_category.clone(), result: "ok".to_string(), message: msg.to_string(), }); } else { println!(" [ok] {msg}"); } } fn warn(&mut self, msg: &str) { self.warn += 1; if self.json_mode { self.checks.push(CheckResult { category: self.current_category.clone(), result: "warning".to_string(), message: msg.to_string(), }); } else { println!(" [!!] {msg}"); } } fn fail(&mut self, msg: &str) { self.fail += 1; if self.json_mode { self.checks.push(CheckResult { category: self.current_category.clone(), result: "error".to_string(), message: msg.to_string(), }); } else { println!(" [EE] {msg}"); } } fn skip(&mut self, msg: &str) { if self.json_mode { self.checks.push(CheckResult { category: self.current_category.clone(), result: "skipped".to_string(), message: msg.to_string(), }); } else { println!(" [--] {msg}"); } } } pub fn doctor( engine: ContainerEngine, verbose: bool, env_name: &str, scope: Scope, ec: &EnvironmentConfig, deep: bool, strict: bool, json_mode: bool, ) -> Result<()> { let scope_str = match scope { Scope::Local => "local", Scope::System => "system", }; let engine_str = match engine { ContainerEngine::Docker => "docker", ContainerEngine::Podman => "podman", }; if !json_mode { println!("Environment: {env_name} ({scope_str})"); println!("Engine: {engine_str}"); println!(); } let mut c = Counts::new(json_mode); let data_dir = cfg::env_data_dir(scope, env_name); // ==== Prerequisites ==== if !json_mode { println!("Prerequisites"); } c.set_category("prerequisites"); check_engine(&mut c, engine); check_base_image(&mut c, engine, &ec.base_image); check_built_image(&mut c, engine, ec, scope, env_name); check_data_dirs(&mut c, &data_dir); check_file_readability(&mut c, &data_dir); // ==== Manifests ==== if !json_mode { println!("\nManifests"); } c.set_category("manifests"); check_manifests(&mut c, &data_dir, ec.morloc_version.as_ref()); // ==== Deep checks ==== c.set_category("deep"); if deep { if !json_mode { println!("\nDeep checks"); } check_morloc_version(&mut c, engine, ec); check_programs_deep(&mut c, engine, verbose, ec, &data_dir); } else { if !json_mode { println!("\nDeep checks"); } c.skip("Use --deep to run container-side checks"); } let fail_count = c.fail; let warn_count = c.warn; if json_mode { #[derive(serde::Serialize)] struct DoctorOutput { environment: String, scope: String, engine: String, checks: Vec, summary: DoctorSummary, } let output = DoctorOutput { environment: env_name.to_string(), scope: scope_str.to_string(), engine: engine_str.to_string(), checks: c.checks, summary: DoctorSummary { ok: c.ok, warnings: warn_count, errors: fail_count }, }; println!("{}", serde_json::to_string_pretty(&output).unwrap()); } else { // ==== Summary ==== println!(); println!( "{} passed, {} warnings, {} errors", c.ok, warn_count, fail_count ); } if fail_count > 0 { return Err(crate::error::ManagerError::DoctorFailed(fail_count)); } if strict && warn_count > 0 { return Err(crate::error::ManagerError::DoctorFailed(warn_count)); } Ok(()) } // ====================================================================== // Individual checks // ====================================================================== fn check_engine(c: &mut Counts, engine: ContainerEngine) { let exe = engine_executable(engine); let fmt = match engine { ContainerEngine::Podman => "{{.Version.Version}}", ContainerEngine::Docker => "{{.ServerVersion}}", }; let output = Command::new(exe) .args(["info", "--format", fmt]) .output(); match output { Ok(o) if o.status.success() => { let ver = String::from_utf8_lossy(&o.stdout).trim().to_string(); if ver.is_empty() { c.pass(&format!("{exe} engine reachable")); } else { c.pass(&format!("{exe} engine reachable ({ver})")); } } Ok(o) => { let err = String::from_utf8_lossy(&o.stderr).trim().to_string(); if err.contains("permission denied") || err.contains("Permission denied") { c.fail(&format!("{exe} permission denied -- add user to {exe} group?")); } else { c.fail(&format!("{exe} not reachable: {err}")); } } Err(e) => { c.fail(&format!("{exe} not found: {e}")); } } } fn check_base_image(c: &mut Counts, engine: ContainerEngine, base_image: &str) { if image_exists_locally(engine, base_image) { c.pass(&format!("Base image {base_image}")); } else { c.fail(&format!( "Base image {base_image} not found locally\n \ Run: morloc-manager run -- morloc --version (triggers pull)" )); } } fn check_built_image(c: &mut Counts, engine: ContainerEngine, ec: &EnvironmentConfig, scope: Scope, env_name: &str) { if ec.dockerfile.is_none() { return; } // Check if the Dockerfile file itself still exists let df_path = cfg::env_dockerfile_path(scope, env_name); if !df_path.exists() { c.warn(&format!( "Dockerfile configured but file is missing: {}\n \ Remove stale config or recreate the file, then run: morloc-manager update", df_path.display() )); return; } match &ec.built_image { Some(img) => { if image_exists_locally(engine, img) { c.pass(&format!("Built image {img}")); } else { c.fail(&format!( "Built image {img} not found locally\n \ Run: morloc-manager update" )); } } None => { c.warn("Dockerfile configured but no image built yet\n \ Run: morloc-manager update"); } } } fn check_data_dirs(c: &mut Counts, data_dir: &Path) { let expected = ["lib", "bin", "opt", "fdb", "src/morloc/plane", "exe"]; let mut missing: Vec<&str> = Vec::new(); for dir in &expected { if !data_dir.join(dir).is_dir() { missing.push(dir); } } if missing.is_empty() { c.pass("Data directories intact"); } else { c.fail(&format!( "Missing directories: {}\n \ Run: morloc-manager run -- morloc init -f", missing.join(", ") )); } } /// Walk exe/ and other data subdirectories, warning about files unreadable /// by the current user (which would cause freeze to fail). fn check_file_readability(c: &mut Counts, data_dir: &Path) { let dirs_to_check = ["exe", "bin", "lib"]; let mut unreadable: Vec = Vec::new(); for dir in &dirs_to_check { let dir_path = data_dir.join(dir); if dir_path.is_dir() { collect_unreadable(&dir_path, &mut unreadable); } } if unreadable.is_empty() { c.pass("All data files readable"); } else { let shown: Vec<&str> = unreadable.iter().take(5).map(|s| s.as_str()).collect(); let suffix = if unreadable.len() > 5 { format!(" (and {} more)", unreadable.len() - 5) } else { String::new() }; c.fail(&format!( "Unreadable files (freeze will fail): {}{suffix}\n \ Fix with: chmod -R a+rX ", shown.join(", ") )); } } fn collect_unreadable(dir: &Path, out: &mut Vec) { let Ok(entries) = fs::read_dir(dir) else { out.push(dir.display().to_string()); return; }; for entry in entries.flatten() { let path = entry.path(); if path.is_dir() { collect_unreadable(&path, out); } else if fs::File::open(&path).is_err() { out.push(path.display().to_string()); } } } fn check_manifests( c: &mut Counts, data_dir: &Path, expected_version: Option<&Version>, ) { let fdb_dir = data_dir.join("fdb"); if !fdb_dir.is_dir() { c.warn("No fdb/ directory found"); return; } let entries = match fs::read_dir(&fdb_dir) { Ok(e) => e, Err(e) => { c.fail(&format!("Cannot read fdb/: {e}")); return; } }; let mut found_any = false; for entry in entries.flatten() { let name = entry.file_name(); let name_str = name.to_string_lossy(); if !name_str.ends_with(".manifest") { continue; } found_any = true; let prog_name = &name_str[..name_str.len() - ".manifest".len()]; check_one_manifest(c, &entry.path(), prog_name, data_dir, expected_version); } if !found_any { c.warn("No program manifests found in fdb/"); } } fn check_one_manifest( c: &mut Counts, path: &Path, prog_name: &str, data_dir: &Path, expected_version: Option<&Version>, ) { let content = match fs::read_to_string(path) { Ok(s) => s, Err(e) => { c.fail(&format!("{prog_name} -- cannot read manifest: {e}")); return; } }; let json_str = if content.starts_with("#!") { if let Some(marker_pos) = content.find(MANIFEST_MARKER) { let after_marker = &content[marker_pos..]; let json_start = after_marker .find('\n') .map(|i| marker_pos + i + 1) .unwrap_or(content.len()); &content[json_start..] } else { c.fail(&format!("{prog_name} -- manifest missing ### MANIFEST ### marker")); return; } } else { content.as_str() }; let manifest: serde_json::Value = match serde_json::from_str(json_str) { Ok(v) => v, Err(e) => { c.fail(&format!("{prog_name} -- invalid manifest JSON: {e}")); return; } }; let mut issues: Vec = Vec::new(); // Check morloc_version let build_ver = manifest .get("build") .and_then(|b| b.get("morloc_version")) .and_then(|v| v.as_str()); if let (Some(build_ver_str), Some(expected)) = (build_ver, expected_version) { let expected_str = expected.show(); if build_ver_str != expected_str { issues.push(format!( "built with {build_ver_str}, expected {expected_str}" )); } } // Check build.path exists let build_path = manifest .get("build") .and_then(|b| b.get("path")) .and_then(|v| v.as_str()); if let Some(bp) = build_path { // Build paths inside containers are /opt/morloc/exe/..., on host they're // under data_dir/exe/... Try the host path first. let host_path = data_dir.join("exe").join(prog_name); if !host_path.is_dir() && !Path::new(bp).is_dir() { issues.push("build directory missing".to_string()); } } else { issues.push("no build.path in manifest".to_string()); } // Check pool files exist let pool_count = manifest .get("pools") .and_then(|p| p.as_array()) .map(|a| a.len()) .unwrap_or(0); if pool_count == 0 { issues.push("no pools defined".to_string()); } if issues.is_empty() { let ver_str = build_ver.unwrap_or("unknown"); c.pass(&format!( "{prog_name} -- built with {ver_str}, {pool_count} pools" )); } else { for issue in &issues { c.warn(&format!( "{prog_name} -- {issue}\n \ Recompile: morloc-manager run -- morloc make --install" )); } } } fn check_morloc_version(c: &mut Counts, engine: ContainerEngine, ec: &EnvironmentConfig) { let image = ec.active_image(); match environment::detect_morloc_version(engine, image) { Ok(detected) => { if let Some(ref expected) = ec.morloc_version { if detected == *expected { c.pass(&format!("morloc {} (matches config)", detected.show())); } else { c.warn(&format!( "morloc {} in container, config says {}", detected.show(), expected.show() )); } } else { c.pass(&format!("morloc {} (no version in config to compare)", detected.show())); } } Err(e) => { c.fail(&format!("Cannot run morloc in container: {e}")); } } } fn check_programs_deep( c: &mut Counts, engine: ContainerEngine, verbose: bool, ec: &EnvironmentConfig, data_dir: &Path, ) { let image = ec.active_image(); let mh = "/opt/morloc"; let bind_mounts = vec![(data_dir.to_string_lossy().to_string(), mh.to_string())]; let env = vec![ ("MORLOC_HOME".to_string(), mh.to_string()), ]; // Scan programs from fdb/ to get program names let fdb_dir = format!("{mh}/fdb"); let cfg = RunConfig { command: Some(vec!["ls".to_string(), fdb_dir.clone()]), bind_mounts: bind_mounts.clone(), env: env.clone(), ..RunConfig::new(image) }; let (status, stdout, _) = container_run_quiet(engine, &cfg); if !status.success() { c.fail("Cannot list programs in container"); return; } let programs: Vec = stdout .lines() .filter(|l| l.ends_with(".manifest")) .map(|l| { let name = l.strip_suffix(".manifest").unwrap_or(l); ProgramEntry { name: name.to_string(), commands: Vec::new(), } }) .collect(); if programs.is_empty() { c.warn("No programs found in container"); return; } if !c.json_mode { println!("Running smoke tests for {} programs...", programs.len()); } for prog in &programs { let exe_path = format!("{mh}/bin/{}", prog.name); let cfg = RunConfig { command: Some(vec![exe_path.clone(), "--help".to_string()]), bind_mounts: bind_mounts.clone(), env: env.clone(), ..RunConfig::new(image) }; if verbose { let exe = engine_executable(engine); eprintln!("[morloc-manager] {exe} run --rm {image} {exe_path} --help"); } let (status, _, stderr) = container_run_quiet(engine, &cfg); if status.success() { c.pass(&format!("{} -- smoke test passed", prog.name)); } else { let snippet: String = stderr.lines().take(3).collect::>().join("\n "); c.fail(&format!("{} -- smoke test failed: {snippet}", prog.name)); } } } ================================================ FILE: data/rust/morloc-manager/src/environment.rs ================================================ use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; use serde::Serialize; use sha2::{Digest, Sha256}; use crate::config; use crate::container::{ self, check_remote_image, container_build_visible, container_pull_visible, engine_executable, exit_code_to_int, image_exists_locally, BuildConfig, RemoteImageStatus, }; use crate::error::{ManagerError, Result}; use crate::serve; use crate::types::*; // ====================================================================== // Public types // ====================================================================== /// Options for creating or updating an environment. /// For `new` (is_new=true): all Option fields that are None use defaults. /// For `update` (is_new=false): None means keep the existing value. pub struct ApplyOptions { pub name: String, pub scope: Scope, pub is_new: bool, pub base_image: Option, pub original_image: Option, pub morloc_version: Option, pub dockerfile: Option, pub includes: Vec, pub flagfile: Option, pub engine_args: Vec, pub engine: Option, pub shm_size: Option, pub skip_dockerfile_build: bool, pub verbose: bool, } /// Info returned by list_environments. #[derive(Serialize)] pub struct EnvInfo { pub name: String, pub morloc_version: Option, pub active: bool, } // ====================================================================== // Image resolution // ====================================================================== const MORLOC_IMAGE_PREFIX: &str = "ghcr.io/morloc-project/morloc/morloc-full"; /// Recognize engine errors that mean "cannot chdir into the current working /// directory" and rewrite them into a clearer message. This commonly happens /// when running `sudo -u morloc-manager ...` from a directory /// that cannot access (e.g., /root or another user's $HOME). /// Without this hint, the error bubbles up as "Failed to check registry..." /// which misleads users toward debugging network/auth problems. fn cwd_access_hint(stderr: &str) -> Option { let lower = stderr.to_lowercase(); let looks_like_cwd_denied = (lower.contains("chdir") || lower.contains("getwd") || lower.contains("current working directory")) && (lower.contains("permission denied") || lower.contains("no such file")); if looks_like_cwd_denied { Some(format!( "Cannot change into the current working directory as the target user. \ Run morloc-manager from a directory the target user can access \ (for example /tmp or the user's home directory).\nOriginal error: {}", stderr.trim() )) } else { None } } /// Resolve a morloc version string to a registry image reference. pub fn version_to_image(ver: &Version) -> String { format!("{MORLOC_IMAGE_PREFIX}:{}", ver.show()) } /// Pull an image by tag from the morloc registry, detect its version, and /// return (image_ref, version). The tag can be a semver string ("0.77.0"), /// a named tag ("edge", "nightly"), or any other valid container tag. pub fn pull_tagged_image(engine: ContainerEngine, tag: &str) -> Result<(String, Version)> { let image_ref = format!("{MORLOC_IMAGE_PREFIX}:{tag}"); if !image_exists_locally(engine, &image_ref) { match check_remote_image(engine, &image_ref) { RemoteImageStatus::Exists => {} RemoteImageStatus::NotFound => { return Err(ManagerError::EnvError(format!( "No container image found for tag '{tag}'" ))); } RemoteImageStatus::Unknown(stderr) => { if let Some(hint) = cwd_access_hint(&stderr) { return Err(ManagerError::EnvError(hint)); } return Err(ManagerError::EnvError(format!( "Failed to check registry for tag '{tag}': {}", stderr.trim() ))); } } eprintln!("Pulling {image_ref}..."); let status = container_pull_visible(engine, &image_ref); if !status.success() { return Err(ManagerError::EngineError { engine, code: exit_code_to_int(status), stderr: "Pull failed (see output above)".to_string(), }); } } else { eprintln!("Using local copy of {image_ref}"); } let ver = detect_morloc_version(engine, &image_ref)?; // Also tag with the detected version so future --version lookups find it let versioned_image = version_to_image(&ver); if versioned_image != image_ref { let exe = engine_executable(engine); let _ = Command::new(exe) .args(["tag", &image_ref, &versioned_image]) .output(); } Ok((versioned_image, ver)) } /// Pull the :edge image. Convenience wrapper around pull_tagged_image. pub fn resolve_latest(engine: ContainerEngine) -> Result<(String, Version)> { pull_tagged_image(engine, "edge") } /// Pull a specific version image from the morloc registry. pub fn pull_version_image(engine: ContainerEngine, ver: &Version) -> Result { let (img, _) = pull_tagged_image(engine, &ver.show())?; Ok(img) } /// Detect the morloc version by running `morloc --version` inside the image. pub fn detect_morloc_version(engine: ContainerEngine, image: &str) -> Result { let exe = engine_executable(engine); let output = Command::new(exe) .args(["run", "--rm", image, "morloc", "--version"]) .stdin(std::process::Stdio::null()) .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::piped()) .output() .map_err(|e| ManagerError::EnvError(format!("Failed to run container: {e}")))?; if !output.status.success() { return Err(ManagerError::EnvError(format!( "Image '{image}' does not have a working morloc binary: {}", String::from_utf8_lossy(&output.stderr).trim() ))); } let ver_out = String::from_utf8_lossy(&output.stdout).trim().to_string(); let ver_str = ver_out.split_whitespace().last().unwrap_or(&ver_out); ver_str.parse().map_err(|_| { ManagerError::EnvError(format!( "Could not parse morloc version from image '{image}' output: {ver_out}" )) }) } /// Pull a custom image (not from morloc registry). pub fn pull_custom_image(engine: ContainerEngine, image: &str) -> Result<()> { if image_exists_locally(engine, image) { eprintln!("Using local copy of {image}"); return Ok(()); } eprintln!("Pulling {image}..."); let status = container_pull_visible(engine, image); if !status.success() { return Err(ManagerError::EngineError { engine, code: exit_code_to_int(status), stderr: "Pull failed (see output above)".to_string(), }); } Ok(()) } // ====================================================================== // Core operations // ====================================================================== /// Create or update an environment. /// /// When `is_new` is true: validates name uniqueness, creates data directories. /// Validate that an environment name contains only allowed characters. pub fn validate_env_name(name: &str) -> Result<()> { if name.is_empty() || !name .chars() .all(|c| c.is_alphanumeric() || c == '-' || c == '_' || c == '.') { return Err(ManagerError::EnvError(format!( "Invalid environment name '{name}': must contain only alphanumeric characters, hyphens, underscores, or dots" ))); } Ok(()) } /// Parse an include spec into (resolved_source, destination). /// /// Supports two forms: /// - `path` — copies to cfg_dir/basename(path) /// - `src:dest` — copies src to cfg_dir/dest /// /// Rules for dest: /// - Must be relative (no leading `/`) /// - Cannot contain `..` /// - If dest ends with `/`, src's basename is appended /// /// Source symlinks are resolved via canonicalize(). fn parse_include_spec(spec: &str, cfg_dir: &Path) -> Result<(PathBuf, PathBuf)> { let (src_str, dest_rel) = if let Some(idx) = spec.find(':') { let s = &spec[..idx]; let d = &spec[idx + 1..]; if s.is_empty() || d.is_empty() { return Err(ManagerError::EnvError(format!( "Invalid include spec: '{spec}'" ))); } (s, d.to_string()) } else { let src_path = Path::new(spec); let fname = src_path.file_name().ok_or_else(|| { ManagerError::EnvError(format!("Invalid include path: {spec}")) })?; (spec.as_ref(), fname.to_string_lossy().to_string()) }; // Validate dest constraints if dest_rel.starts_with('/') { return Err(ManagerError::EnvError(format!( "Include destination must be relative, not absolute: '{dest_rel}'" ))); } if dest_rel.contains("..") { return Err(ManagerError::EnvError(format!( "Include destination cannot contain '..': '{dest_rel}'" ))); } // Resolve src (canonicalize follows symlinks, errors if path doesn't exist) let real_src = Path::new(src_str).canonicalize().map_err(|e| { ManagerError::EnvError(format!("Cannot resolve include path '{src_str}': {e}")) })?; // Compute final dest let dest = cfg_dir.join(&dest_rel); let final_dest = if dest_rel.ends_with('/') { dest.join(real_src.file_name().unwrap_or_default()) } else { dest }; Ok((real_src, final_dest)) } /// When `is_new` is false: loads existing config, applies overrides. pub fn apply_environment(opts: &ApplyOptions) -> Result<()> { let scope = opts.scope; let name = &opts.name; validate_env_name(name)?; // Load existing config or start fresh let mut ec = if opts.is_new { let cfg_path = config::env_config_path(scope, name); if cfg_path.is_file() { return Err(ManagerError::EnvError(format!( "Environment '{name}' already exists" ))); } // Create data directories let data_dir = config::env_data_dir(scope, name); for sub in &["bin", "lib", "fdb", "include", "opt", "tmp"] { fs::create_dir_all(data_dir.join(sub)).map_err(|e| { ManagerError::EnvError(format!("Failed to create directory: {e}")) })?; } if scope == Scope::System { use std::os::unix::fs::PermissionsExt; let dirs: Vec<_> = std::iter::once(data_dir.clone()) .chain( ["bin", "lib", "fdb", "include", "opt", "tmp"] .iter() .map(|d| data_dir.join(d)), ) .collect(); for d in dirs { let _ = fs::set_permissions(&d, fs::Permissions::from_mode(0o2775)); } } // Start with required fields from opts; the rest will be applied below EnvironmentConfig { name: name.clone(), base_image: opts.base_image.clone().unwrap_or_default(), original_image: None, dockerfile: None, content_hash: None, built_image: None, engine: opts.engine.unwrap_or(ContainerEngine::Podman), shm_size: "512m".to_string(), morloc_version: None, } } else { config::read_env_config(scope, name) .map_err(|_| ManagerError::EnvironmentNotFound(name.to_string()))? }; // Apply overrides if let Some(ref img) = opts.base_image { ec.base_image = img.clone(); } if let Some(ref img) = opts.original_image { ec.original_image = Some(img.clone()); } if let Some(ref ver) = opts.morloc_version { ec.morloc_version = Some(ver.clone()); } if let Some(engine) = opts.engine { ec.engine = engine; } if let Some(ref shm) = opts.shm_size { if !is_valid_shm_size(shm) { return Err(ManagerError::EnvError(format!( "Invalid --shm-size '{shm}'. Use format like: 512m, 1g, 2048k" ))); } ec.shm_size = shm.clone(); } // Copy Dockerfile if a new one was provided let dockerfile_changed = if let Some(ref src) = opts.dockerfile { let dest = config::env_dockerfile_path(scope, name); let dest_dir = dest.parent().unwrap(); fs::create_dir_all(dest_dir).map_err(|e| { ManagerError::EnvError(format!("Failed to create config dir: {e}")) })?; fs::copy(src, &dest).map_err(|e| { ManagerError::EnvError(format!("Failed to copy Dockerfile '{}': {e}", src)) })?; ec.dockerfile = Some("Dockerfile".to_string()); true } else { false }; // Copy included files/directories into build context. // Supports src:dest syntax (like Docker volume mounts) for explicit placement. let cfg_dir = config::env_config_dir(scope, name); fs::create_dir_all(&cfg_dir).map_err(|e| { ManagerError::EnvError(format!("Failed to create config dir: {e}")) })?; for spec in &opts.includes { let (real_src, final_dest) = parse_include_spec(spec, &cfg_dir)?; if let Some(parent) = final_dest.parent() { fs::create_dir_all(parent).map_err(|e| { ManagerError::EnvError(format!("Failed to create directory: {e}")) })?; } if real_src.is_dir() { let status = Command::new("cp") .args(["-a", &real_src.to_string_lossy(), &final_dest.to_string_lossy()]) .stdin(std::process::Stdio::null()) .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::inherit()) .status() .map_err(|e| ManagerError::EnvError(format!("Failed to copy '{spec}': {e}")))?; if !status.success() { return Err(ManagerError::EnvError(format!( "Failed to copy directory '{spec}'" ))); } } else { fs::copy(&real_src, &final_dest).map_err(|e| { ManagerError::EnvError(format!("Failed to copy '{spec}': {e}")) })?; } } // Write flags file: for new envs or when flagfile is provided, write fresh. // For updates with only engine_args, append to existing. let flags_path = config::env_flags_path(scope, name); if opts.is_new || opts.flagfile.is_some() { let mut flag_lines: Vec = Vec::new(); if let Some(ref src) = opts.flagfile { let content = fs::read_to_string(src).map_err(|e| { ManagerError::EnvError(format!("Failed to read flagfile '{}': {e}", src)) })?; flag_lines.extend( content .lines() .map(|l| l.trim().to_string()) .filter(|l| !l.is_empty() && !l.starts_with('#')), ); } flag_lines.extend(opts.engine_args.iter().cloned()); let flags_content = if flag_lines.is_empty() { String::new() } else { flag_lines.join("\n") + "\n" }; fs::write(&flags_path, &flags_content).map_err(|e| { ManagerError::EnvError(format!("Failed to write flags file: {e}")) })?; } else if !opts.engine_args.is_empty() { // Append engine_args to existing flags file let mut existing = config::read_flags_file_lines(&flags_path); existing.extend(opts.engine_args.iter().cloned()); let flags_content = existing.join("\n") + "\n"; fs::write(&flags_path, &flags_content).map_err(|e| { ManagerError::EnvError(format!("Failed to write flags file: {e}")) })?; } // Build Dockerfile layer if present and not skipped let has_dockerfile = ec.dockerfile.is_some(); let should_build = has_dockerfile && !opts.skip_dockerfile_build && (opts.is_new || dockerfile_changed || !opts.includes.is_empty() || opts.base_image.is_some() || opts.engine.is_some() // For update with no specific changes, rebuild if Dockerfile exists || (!opts.is_new && opts.dockerfile.is_none() && opts.includes.is_empty())); if should_build { let tag = format!("localhost/morloc-env:{name}"); let df_path = config::env_dockerfile_path(scope, name); if df_path.exists() { let hash = hash_file(&df_path)?; // Skip rebuild when nothing has actually changed: same Dockerfile // hash, no new includes, no base-image change, tagged image still // present. Without this, `update` with no arguments silently // re-runs the full build every time. let unchanged = !opts.is_new && !dockerfile_changed && opts.includes.is_empty() && opts.base_image.is_none() && ec.content_hash.as_deref() == Some(hash.as_str()) && ec.built_image.as_ref() .map(|img| image_exists_locally(ec.engine, img)) .unwrap_or(false); if unchanged { eprintln!("Dockerfile unchanged; skipping rebuild."); } else { let build_cfg = BuildConfig { dockerfile: df_path.to_string_lossy().to_string(), context: cfg_dir.to_string_lossy().to_string(), tag: tag.clone(), build_args: vec![("CONTAINER_BASE".to_string(), ec.base_image.clone())], }; if opts.verbose { let exe = engine_executable(ec.engine); eprintln!( "[morloc-manager] {exe} build -f {} -t {} {}", build_cfg.dockerfile, build_cfg.tag, build_cfg.context ); } let status = container_build_visible(ec.engine, &build_cfg); if !status.success() { return Err(ManagerError::EngineError { engine: ec.engine, code: exit_code_to_int(status), stderr: "Build failed (see output above)".to_string(), }); } ec.built_image = Some(tag); ec.content_hash = Some(hash); } } } // Always reconcile the stored morloc version against the actual image. // - For `new --version 0.77.0-rc.6`, the binary reports "0.77.0" (stack // does not expose prerelease tags), so keep the recorded value when // major.minor.patch match — the recorded tag is more informative. // - For `new --image ` or `update --image ...`, nothing was // recorded yet, so store the detected version. // - If the image has no morloc binary (e.g., a bare base image staged // for a Dockerfile layer not yet built), silently leave the field // unchanged rather than failing the whole operation. let detect_target = ec.built_image.clone().unwrap_or_else(|| ec.base_image.clone()); if !detect_target.is_empty() { if let Ok(detected) = detect_morloc_version(ec.engine, &detect_target) { ec.morloc_version = Some(match ec.morloc_version.take() { Some(recorded) if recorded.major == detected.major && recorded.minor == detected.minor && recorded.patch == detected.patch => recorded, _ => detected, }); } } // Write environment config config::write_env_config(scope, name, &ec)?; Ok(()) } /// Remove an environment and all its data. pub fn remove_environment(engine: ContainerEngine, scope: Scope, name: &str) -> Result<()> { let ec = config::read_env_config(scope, name) .map_err(|_| ManagerError::EnvironmentNotFound(name.to_string()))?; // Stop and remove any running serve container for this environment before // removing its image. If we skipped this, the serve container would keep // running and be unreachable through morloc-manager. let serve_name = serve::serve_container_name(name); if container::container_exists(engine, &serve_name) { let _ = container::container_stop(engine, &serve_name); let _ = container::container_remove_quiet(engine, &serve_name); } // Remove built Dockerfile layer image if let Some(ref img) = ec.built_image { if image_exists_locally(engine, img) { container::remove_image(engine, img); } } // Remove config directory let cfg_dir = config::env_config_dir(scope, name); if cfg_dir.is_dir() { let _ = fs::remove_dir_all(&cfg_dir); } // Remove data directory let data_dir = config::env_data_dir(scope, name); if data_dir.is_dir() { let _ = fs::remove_dir_all(&data_dir); } // If the active env was this one, clear it in both local and system configs for cfg_scope in [Scope::Local, Scope::System] { let cfg_path = config::config_path(cfg_scope); if let Ok(cfg) = config::read_config::(&cfg_path) { if cfg.active_env.as_deref() == Some(name) { let new_cfg = Config { active_env: None, ..cfg }; let _ = config::write_config(&cfg_path, &new_cfg); } } } Ok(()) } /// List environments in the given scope. pub fn list_environments(scope: Scope, active_env: Option<&str>) -> Vec { let names = config::list_env_names(scope); let mut result = Vec::new(); for name in names { if let Ok(ec) = config::read_env_config(scope, &name) { result.push(EnvInfo { name: name.clone(), morloc_version: ec.morloc_version, active: active_env == Some(name.as_str()), }); } } result } /// Select an environment by writing active_env to the given write_scope config. pub fn select_environment(name: &str, write_scope: Scope) -> Result<()> { // Verify the environment exists somewhere config::find_env_scope(name)?; let cfg_path = config::config_path(write_scope); let base_cfg = config::read_config::(&cfg_path) .or_else(|_| config::read_config::(&config::config_path(Scope::System))) .unwrap_or_default(); let new_cfg = Config { active_env: Some(name.to_string()), ..base_cfg }; config::write_config(&cfg_path, &new_cfg) } /// Resolve the active environment. Checks local config first, then system. /// Returns (name, scope where env config lives, EnvironmentConfig). pub fn resolve_active_environment() -> Result<(String, Scope, EnvironmentConfig)> { // Find active_env name from config (local first, then system) let name = resolve_active_env_name()?; // Find which scope has the environment config let scope = config::find_env_scope(&name)?; let ec = config::read_env_config(scope, &name)?; Ok((name, scope, ec)) } /// Resolve just the active environment name from config. /// Skips names that don't resolve to an actual environment (e.g., stale /// entries from old config formats). fn resolve_active_env_name() -> Result { if let Ok(cfg) = config::read_config::(&config::config_path(Scope::Local)) { if let Some(ref name) = cfg.active_env { if config::find_env_scope(name).is_ok() { return Ok(name.clone()); } } } if let Ok(cfg) = config::read_config::(&config::config_path(Scope::System)) { if let Some(ref name) = cfg.active_env { if config::find_env_scope(name).is_ok() { return Ok(name.clone()); } } } // Check if any environments exist to give a better suggestion let local_envs = config::list_env_names(Scope::Local); let system_envs = config::list_env_names(Scope::System); if local_envs.is_empty() && system_envs.is_empty() { Err(ManagerError::NoActiveEnvironment) } else { // Label each entry with its scope so same-named envs are distinguishable. // System envs are flagged with --system to disambiguate in select. let mut available: Vec = local_envs .iter() .map(|n| format!("{n} (local)")) .collect(); available.extend(system_envs.iter().map(|n| format!("{n} (system)"))); Err(ManagerError::EnvError(format!( "No active environment. Select one with: morloc-manager select \n\ Available: {}", available.join(", ") ))) } } // ====================================================================== // Internal // ====================================================================== pub fn is_valid_shm_size(s: &str) -> bool { if s.is_empty() { return false; } let (digits, suffix) = if s.ends_with(|c: char| "bkmgBKMG".contains(c)) { (&s[..s.len() - 1], true) } else { (s, false) }; !digits.is_empty() && digits.chars().all(|c| c.is_ascii_digit()) && (suffix || !digits.is_empty()) } fn hash_file(path: &Path) -> Result { let contents = fs::read(path).map_err(|e| { ManagerError::EnvError(format!("Failed to read file: {e}")) })?; let digest = Sha256::digest(&contents); Ok(hex_encode(&digest)) } fn hex_encode(bytes: &[u8]) -> String { bytes.iter().map(|b| format!("{b:02x}")).collect() } ================================================ FILE: data/rust/morloc-manager/src/error.rs ================================================ use std::fmt; use thiserror::Error; use crate::types::{ContainerEngine, Scope, Version}; #[derive(Debug, Error, PartialEq, Eq)] pub enum ManagerError { #[error("Configuration not found: {0}")] ConfigNotFound(String), #[error("Permission denied: {0}")] ConfigPermissionDenied(String), #[error("Invalid configuration in {path}: {msg}")] ConfigParseError { path: String, msg: String }, #[error("No active environment. Run: morloc-manager new")] NoActiveEnvironment, #[error("Environment not found: {0}")] EnvironmentNotFound(String), #[error("Environment error: {0}")] EnvError(String), #[error("Invalid version: {0}. Expected format: MAJOR.MINOR.PATCH. For named tags like 'edge', use --tag instead.")] InvalidVersion(String), #[error("No command specified. Use --shell or provide a command after --.")] NoCommand, #[error("No container engine found. Install podman or docker.")] EngineNotFound, #[error("Container engine ({engine}) failed with exit code {code}:\n{stderr}")] EngineError { engine: ContainerEngine, code: i32, stderr: String, }, #[error("Freeze failed: {0}")] FreezeError(String), #[error("Unfreeze failed: {0}")] UnfreezeError(String), #[error("SELinux error: {0}")] SELinuxError(String), #[error("Doctor found {0} error(s)")] DoctorFailed(u32), #[error("{}", match .0 { Scope::Local => "No local configuration found. Run: morloc-manager new", Scope::System => "No system configuration found. Run: sudo morloc-manager new --system", })] SetupNotComplete(Scope), } impl fmt::Display for Version { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}.{}.{}", self.major, self.minor, self.patch) } } impl fmt::Display for ContainerEngine { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { ContainerEngine::Docker => write!(f, "Docker"), ContainerEngine::Podman => write!(f, "Podman"), } } } pub type Result = std::result::Result; ================================================ FILE: data/rust/morloc-manager/src/freeze.rs ================================================ use std::fs; use std::path::Path; use std::process::{Command, Stdio}; use chrono::Utc; use sha2::{Digest, Sha256}; use crate::config; use crate::error::{ManagerError, Result}; use crate::types::*; pub fn freeze_from_dir( scope: Scope, ver: Version, engine: ContainerEngine, image: &str, v_data_dir: &str, output_dir: &str, verbose: bool, ) -> Result<()> { fs::create_dir_all(output_dir) .map_err(|e| ManagerError::FreezeError(format!("Failed to create output dir: {e}")))?; if !Path::new(v_data_dir).is_dir() { return Err(ManagerError::FreezeError(format!( "Data directory does not exist: {v_data_dir}" ))); } // Validate programs exist before writing any files let modules = scan_modules(&format!("{v_data_dir}/fdb")); let programs = scan_programs(&format!("{v_data_dir}/fdb")); if programs.is_empty() { return Err(ManagerError::FreezeError( "No morloc programs are installed. Compile and install with 'morloc make --install' before freezing.".to_string() )); } // Validate programs work before freezing let mh = "/opt/morloc"; let bind_mounts = vec![(v_data_dir.to_string(), mh.to_string())]; crate::serve::validate_programs(engine, image, &programs, bind_mounts, verbose)?; eprintln!("Freezing installed state from {v_data_dir}..."); let tar_path = Path::new(output_dir).join("state.tar.gz"); let tar_path = tar_path.to_string_lossy(); let mut tar_dirs: Vec<&str> = Vec::new(); for dir in &["lib", "fdb", "bin", "exe", "opt", "src"] { if Path::new(&format!("{v_data_dir}/{dir}")).is_dir() { tar_dirs.push(dir); } } // Pre-flight: verify all files are readable before invoking tar for dir in &tar_dirs { check_readable_recursive(&Path::new(v_data_dir).join(dir))?; } let tar_status = Command::new("tar") .args(["-czf", &tar_path, "-C", v_data_dir]) .args(&tar_dirs) .stdin(Stdio::null()) .stdout(Stdio::null()) .stderr(Stdio::inherit()) .status() .map_err(|e| ManagerError::FreezeError(format!("tar failed: {e}")))?; if !tar_status.success() { return Err(ManagerError::FreezeError( "tar failed (see error output above)".to_string() )); } eprintln!("Created {tar_path}"); let now = Utc::now(); // Get base image from the active environment config. // Check local config first, then system config for the active env name // (mirrors resolve_active_env_name in environment.rs). let active_env_name: Option = config::read_active_config() .and_then(|c| c.active_env) .or_else(|| { let sys_path = config::config_path(Scope::System); config::read_config::(&sys_path) .ok() .and_then(|c| c.active_env) }); let (base_img, env_layer) = if let Some(ref env_name) = active_env_name { let env_scope = config::find_env_scope(env_name).unwrap_or(scope); match config::read_env_config(env_scope, env_name) { Ok(ec) => { let base = ec.base_image.clone(); // Capture env layer info if there's a Dockerfile let layer = if ec.dockerfile.is_some() { let df_path = config::env_dockerfile_path(env_scope, env_name); if df_path.exists() { let df_contents = fs::read_to_string(&df_path).unwrap_or_default(); let content_hash = ec.content_hash.unwrap_or_default(); // Use the tagged image reference (not digest) so that // unfreeze can resolve it locally without network access. // Digest references like localhost/morloc-env@sha256:... // cause BuildKit to attempt HTTPS to localhost. let image_tag = ec.built_image.clone(); Some(FrozenEnvLayer { name: env_name.to_string(), dockerfile: df_contents, content_hash, image_tag, }) } else { None } } else { None }; (base, layer) } Err(_) => ("unknown".to_string(), None), } } else { ("unknown".to_string(), None) }; let manifest = FreezeManifest { morloc_version: ver, frozen_at: now, modules, programs, base_image: base_img, env_layer, env_vars: Vec::new(), }; let manifest_path = Path::new(output_dir).join("freeze-manifest.json"); let manifest_path = manifest_path.to_string_lossy(); write_freeze_manifest(&manifest_path, &manifest)?; eprintln!("Wrote {manifest_path}"); eprintln!("Frozen state written to {output_dir}"); Ok(()) } pub fn write_freeze_manifest(path: &str, manifest: &FreezeManifest) -> Result<()> { let json = serde_json::to_vec(manifest) .map_err(|e| ManagerError::FreezeError(format!("JSON encode failed: {e}")))?; fs::write(path, json) .map_err(|e| ManagerError::FreezeError(format!("Write failed: {e}")))?; Ok(()) } pub fn read_freeze_manifest(path: &str) -> Result { let bytes = fs::read(path).map_err(|e| ManagerError::FreezeError(format!("Read failed: {e}")))?; serde_json::from_slice(&bytes) .map_err(|e| ManagerError::FreezeError(format!("Invalid manifest: {e}"))) } // ====================================================================== // Internal: scanning installed state // ====================================================================== fn scan_modules(fdb_dir: &str) -> Vec { let fdb_path = Path::new(fdb_dir); if !fdb_path.is_dir() { return Vec::new(); } let Ok(entries) = fs::read_dir(fdb_path) else { return Vec::new(); }; #[derive(serde::Deserialize)] struct ModuleStub { name: String, #[serde(default)] version: Option, } entries .flatten() .filter(|e| { e.file_name() .to_string_lossy() .ends_with(".module") }) .filter_map(|e| { let bytes = fs::read(e.path()).ok()?; let stub: ModuleStub = serde_json::from_slice(&bytes).ok()?; let digest = Sha256::digest(&bytes); let sha256: String = digest.iter().map(|b| format!("{b:02x}")).collect(); Some(ModuleEntry { name: stub.name, version: stub.version, sha256, }) }) .collect() } fn scan_programs(fdb_dir: &str) -> Vec { let fdb_path = Path::new(fdb_dir); if !fdb_path.is_dir() { return Vec::new(); } let Ok(entries) = fs::read_dir(fdb_path) else { return Vec::new(); }; entries .flatten() .filter(|e| { e.file_name() .to_string_lossy() .ends_with(".manifest") }) .map(|e| { let filename = e.file_name().to_string_lossy().to_string(); let prog_name = filename.strip_suffix(".manifest").unwrap_or(&filename); let commands = parse_manifest_commands(&e.path()); ProgramEntry { name: prog_name.to_string(), commands, } }) .collect() } fn parse_manifest_commands(path: &Path) -> Vec { let Ok(bytes) = fs::read(path) else { return Vec::new(); }; #[derive(serde::Deserialize)] struct ManifestStub { #[serde(default)] commands: Vec, } #[derive(serde::Deserialize)] struct ManifestStubCmd { name: String, } match serde_json::from_slice::(&bytes) { Ok(stub) => stub.commands.into_iter().map(|c| c.name).collect(), Err(_) => Vec::new(), } } /// Walk a directory tree and verify every file is readable by the current user. fn check_readable_recursive(dir: &Path) -> Result<()> { if !dir.is_dir() { return Ok(()); } let entries = fs::read_dir(dir).map_err(|e| { ManagerError::FreezeError(format!("Cannot read directory {}: {e}", dir.display())) })?; for entry in entries { let entry = entry.map_err(|e| { ManagerError::FreezeError(format!( "Cannot read entry in {}: {e}", dir.display() )) })?; let path = entry.path(); if path.is_dir() { check_readable_recursive(&path)?; } else if fs::File::open(&path).is_err() { return Err(ManagerError::FreezeError(format!( "Unreadable file: {}. Fix permissions or remove before freezing.", path.display() ))); } } Ok(()) } ================================================ FILE: data/rust/morloc-manager/src/main.rs ================================================ mod config; mod container; mod doctor; mod environment; mod error; mod freeze; mod selinux; mod serve; mod types; use std::collections::HashSet; use std::fs; use std::io::{self, IsTerminal, Write}; use std::process::{Command, ExitCode, Stdio}; use clap::builder::styling::Style; use clap::{CommandFactory, FromArgMatches, Parser, Subcommand, ValueEnum}; use crate::config as cfg; use crate::container::{container_run_passthrough, RunConfig}; use crate::error::{ManagerError, Result}; use crate::selinux::{detect_selinux, volume_suffix, SELinuxMode}; use crate::types::*; // ====================================================================== // CLI types // ====================================================================== fn build_help_template() -> String { let b = Style::new().bold().render(); let bu = Style::new().bold().underline().render(); let r = "\x1b[0m"; // full ANSI reset format!( "\ {{name}} - {{about}} {{usage-heading}} {{usage}} {bu}Development{r} {b}setup{r} Configure the default container engine {b}new{r} Build a new morloc environment {b}run{r} Run a command in the active environment {b}rm{r} Remove a morloc environment {b}ls{r} List morloc environments {b}info{r} Show configuration and installed environments {b}select{r} Select an environment {b}update{r} Rebuild an environment {b}nuke{r} Remove all morloc environments {bu}Deployment{r} {b}start{r} Serve an environment over the network {b}stop{r} Stop a running serve container {b}logs{r} Stream logs from a running serve container {b}freeze{r} Export installed state as a frozen artifact {b}unfreeze{r} Build a portable serve image from frozen state {b}status{r} List running serve containers {b}doctor{r} Check environment health and diagnose issues {bu}Options{r} {{options}}" ) } #[derive(Parser)] #[command(name = "morloc-manager")] #[command(about = "container lifecycle manager for Morloc")] #[command(long_about = "Manage containerized Morloc installations, dependency layers, and deployments")] #[command(disable_version_flag = true)] #[command(arg_required_else_help = true)] #[command(hide_possible_values = true)] struct Cli { /// Print container commands to stderr before executing #[arg(short, long, global = true)] verbose: bool, /// Output machine-readable JSON instead of human-readable text #[arg(long, global = true)] json: bool, /// Print version and exit #[arg(long)] version: bool, #[command(subcommand)] command: Option, } #[derive(Subcommand)] enum Cmd { // -- Development -- /// Configure the default container engine #[command(display_order = 0)] #[command(after_help = "Examples:\n morloc-manager setup --engine podman\n morloc-manager setup --engine docker\n sudo morloc-manager setup --engine podman --system")] Setup { /// Container engine: podman or docker #[arg(long, value_enum)] engine: Option, /// Apply to system scope (requires root) #[arg(long)] system: bool, }, /// Build a new morloc environment #[command(display_order = 1)] #[command(after_help = "Examples:\n morloc-manager new\n morloc-manager new myenv --version 0.73.0\n morloc-manager new myenv --tag edge\n morloc-manager new myenv --image ubuntu:22.04 --dockerfile ./Dockerfile\n\nDefault (when --version, --tag, and --image are all omitted): pulls the\n:edge tag from the morloc registry and records the resolved version.\n\nIn non-interactive mode (no TTY), if no name is given, the latest edge\nimage is pulled and the environment is named after the detected morloc\nversion.")] New { /// Environment name (default: derived from base image version) name: Option, /// Base image from Docker Hub or a registry #[arg(long)] image: Option, /// Morloc version (MAJOR.MINOR.PATCH, leading 'v' stripped automatically) #[arg(long)] version: Option, /// Container image tag (e.g., 'edge', 'nightly') #[arg(long, conflicts_with_all = ["version", "image"])] tag: Option, /// Dockerfile to layer on top of the base image #[arg(long)] dockerfile: Option, /// Generate a stub Dockerfile for customization #[arg(long)] dockerfile_stub: bool, /// Force overwrite of existing Dockerfile stub #[arg(long)] force: bool, /// Include file/dir in build context; use src:dest for explicit placement (repeatable) #[arg(short = 'i', long = "include")] include: Vec, /// Path to a file with one engine argument per line #[arg(long)] flagfile: Option, /// A single engine flag (may be repeated) #[arg(short = 'x', long = "engine-arg", allow_hyphen_values = true)] engine_arg: Vec, /// Container engine: podman or docker #[arg(long, value_enum)] engine: Option, /// Shared memory size (default: 512m) #[arg(long)] shm_size: Option, /// Create in system scope (requires root) #[arg(long)] system: bool, /// Skip morloc init after creation #[arg(long)] no_init: bool, /// Skip interactive wizard, use defaults for unspecified options #[arg(long)] non_interactive: bool, }, /// Run a command in the active environment #[command(display_order = 2)] #[command(after_help = "\ Examples: morloc-manager run -- morloc --version morloc-manager run -- morloc make -o svc svc.loc morloc-manager run -- morloc install math morloc-manager run --shell Use -- to separate morloc-manager flags from the container command. Without --, flags like --version are interpreted by morloc-manager itself.")] Run { /// Command to run inside the container command: Vec, /// Start an interactive shell #[arg(long)] shell: bool, /// Pass environment variable to the container (KEY=VALUE) #[arg(short, long = "env")] env_vars: Vec, /// Read environment variables from a file (one KEY=VALUE per line) #[arg(long)] env_file: Option, }, /// Remove a morloc environment #[command(display_order = 3)] #[command(after_help = "Examples:\n morloc-manager rm myenv\n sudo morloc-manager rm myenv --system")] Rm { /// Environment name(s) to remove names: Vec, /// Remove from system scope (requires root) #[arg(long)] system: bool, /// Remove even if active (deactivates first) #[arg(long)] force: bool, }, /// Remove all morloc environments #[command(display_order = 8)] #[command(after_help = "Examples:\n morloc-manager nuke\n morloc-manager nuke --yes\n morloc-manager nuke --images\n sudo morloc-manager nuke --system\n sudo morloc-manager nuke --system --images --yes")] Nuke { /// Remove system-scope environments instead of local (requires root) #[arg(long)] system: bool, /// Also remove base container images #[arg(long)] images: bool, /// Skip confirmation prompt #[arg(long)] yes: bool, }, /// List morloc environments #[command(display_order = 4)] #[command(after_help = "Examples:\n morloc-manager ls\n morloc-manager ls --system")] Ls { /// Show only system environments #[arg(long)] system: bool, /// Show only local environments #[arg(long)] local: bool, }, /// Show configuration and installed environments #[command(display_order = 5)] #[command(after_help = "Examples:\n morloc-manager info\n morloc-manager info myenv")] Info { /// Environment name (show details for this environment) name: Option, /// Look up the system-scope environment (when name is shadowed locally) #[arg(long)] system: bool, }, /// Select an environment #[command(display_order = 6)] #[command(after_help = "Examples:\n morloc-manager select myenv\n sudo morloc-manager select myenv --system")] Select { /// Environment name name: String, /// Write to system config instead of local (requires root) #[arg(long)] system: bool, }, /// Rebuild an environment #[command(display_order = 7)] #[command(after_help = "Examples:\n morloc-manager update # rebuild active environment\n morloc-manager update myenv # rebuild a specific environment\n morloc-manager update --shm-size 1g\n morloc-manager update --dockerfile ./new.Dockerfile -i ./data\n morloc-manager update myenv --reinit # re-run morloc init in myenv")] Update { /// Environment name (default: active environment) name: Option, /// Change the base image #[arg(long)] image: Option, /// Change to a specific morloc version (MAJOR.MINOR.PATCH, leading 'v' stripped) #[arg(long)] version: Option, /// Container image tag (e.g., 'edge', 'nightly') #[arg(long, conflicts_with_all = ["version", "image"])] tag: Option, /// Replace the Dockerfile #[arg(long)] dockerfile: Option, /// Include file/dir in build context; use src:dest for explicit placement (repeatable) #[arg(short = 'i', long = "include")] include: Vec, /// Replace the flags file #[arg(long)] flagfile: Option, /// Add an engine flag (repeatable; appends unless --flagfile replaces) #[arg(short = 'x', long = "engine-arg", allow_hyphen_values = true)] engine_arg: Vec, /// Change the container engine #[arg(long, value_enum)] engine: Option, /// Change shared memory size #[arg(long)] shm_size: Option, /// Generate a stub Dockerfile (fails if one already exists) #[arg(long)] dockerfile_stub: bool, /// Force overwrite of existing Dockerfile stub #[arg(long)] force: bool, /// Skip Dockerfile build #[arg(long)] no_build: bool, /// Re-run morloc init #[arg(long)] reinit: bool, /// Accepted for scripting uniformity with `new` (no effect) #[arg(long, hide = true)] non_interactive: bool, }, // -- Deployment -- /// Serve an environment over the network #[command(display_order = 20)] #[command(after_help = "Examples:\n morloc-manager start # serve active environment\n morloc-manager start myenv -p 9090:8080")] Start { /// Environment name (default: active environment) name: Option, /// Port mapping HOST:CONTAINER (default: 8080:8080) #[arg(short, long, value_parser = parse_port)] port: Vec<(u16, u16)>, /// Pass environment variable to the container (KEY=VALUE) #[arg(short, long = "env")] env_vars: Vec, /// Read environment variables from a file (one KEY=VALUE per line) #[arg(long)] env_file: Option, /// Replace an already-running serve container #[arg(long)] force: bool, }, /// Stop a running serve container #[command(display_order = 21)] #[command(after_help = "Examples:\n morloc-manager stop # stop active environment\n morloc-manager stop myenv")] Stop { /// Environment name (default: active environment) name: Option, }, /// Stream logs from a running serve container #[command(display_order = 22)] #[command(after_help = "Examples:\n morloc-manager logs # logs from only running serve container\n morloc-manager logs myenv\n morloc-manager logs -f myenv # follow mode")] Logs { /// Environment name (default: auto-detect running container) name: Option, /// Follow log output #[arg(short, long)] follow: bool, }, /// Export installed state as a frozen artifact #[command(display_order = 23)] #[command(after_help = "Examples:\n morloc-manager freeze\n morloc-manager freeze myenv\n morloc-manager freeze -o ./my-freeze\n\nRequires at least one program compiled with 'morloc make --install'.")] Freeze { /// Environment name (default: active environment) name: Option, /// Output directory (default: ./morloc-freeze) #[arg(short, long)] output: Option, /// Overwrite existing output directory #[arg(long)] force: bool, }, /// Build a serve image from frozen state #[command(display_order = 24)] #[command(after_help = "Examples:\n morloc-manager unfreeze --from ./morloc-freeze/state.tar.gz -t myservice:v1\n morloc-manager unfreeze --from ./state.tar.gz -t svc:v1 --engine docker")] Unfreeze { /// Path to state.tar.gz from freeze #[arg(long)] from: String, /// Image tag #[arg(short, long)] tag: String, /// Base image override #[arg(long)] base: Option, /// Container engine override (default: configured engine). /// Images frozen with engine-specific flags may not work with a different engine. #[arg(long, value_enum)] engine: Option, /// Rebuild image even if it already exists locally #[arg(long)] rebuild: bool, }, /// Evaluate a morloc expression against a running serve container #[command(display_order = 25)] #[command(after_help = "Examples:\n morloc-manager eval 'add 1 2'\n morloc-manager eval myenv 'map (add 1) [1,2,3]'\n morloc-manager eval -p 9090 'greet \"world\"'")] Eval { /// Expression to evaluate (or environment name if two positional args) first: String, /// Expression to evaluate (when first arg is environment name) second: Option, /// Port of the serve container (default: 8080) #[arg(short, long, default_value = "8080")] port: u16, }, /// List running serve containers #[command(display_order = 26)] #[command(after_help = "Examples:\n morloc-manager status")] Status, /// Check environment health and diagnose issues #[command(display_order = 26)] #[command(after_help = "Examples:\n morloc-manager doctor\n morloc-manager doctor myenv\n morloc-manager doctor --deep")] Doctor { /// Environment name (default: active) name: Option, /// Check system-scope environment #[arg(long)] system: bool, /// Run checks inside the container (slower, more thorough) #[arg(long)] deep: bool, /// Treat warnings as errors (non-zero exit on warnings) #[arg(long)] strict: bool, }, } #[derive(Clone, ValueEnum)] enum EngineArg { Docker, Podman, } impl From for ContainerEngine { fn from(e: EngineArg) -> Self { match e { EngineArg::Docker => ContainerEngine::Docker, EngineArg::Podman => ContainerEngine::Podman, } } } fn parse_port(s: &str) -> std::result::Result<(u16, u16), String> { let parts: Vec<&str> = s.splitn(2, ':').collect(); if parts.len() != 2 { return Err(format!("Expected HOST:CONTAINER format, got: {s}")); } let host: u16 = parts[0] .parse() .map_err(|_| format!("Invalid host port: {}", parts[0]))?; let container: u16 = parts[1] .parse() .map_err(|_| format!("Invalid container port: {}", parts[1]))?; Ok((host, container)) } /// Parse env vars from --env flags and --env-file, returning (key, value) pairs. fn collect_env_vars( env_flags: &[String], env_file: Option<&str>, ) -> Result> { let mut result = Vec::new(); if let Some(path) = env_file { let contents = std::fs::read_to_string(path).map_err(|e| { ManagerError::EnvError(format!("Cannot read env file {path}: {e}")) })?; for line in contents.lines() { let trimmed = line.trim(); if trimmed.is_empty() || trimmed.starts_with('#') { continue; } if let Some((k, v)) = trimmed.split_once('=') { result.push((k.to_string(), v.to_string())); } } } for entry in env_flags { if let Some((k, v)) = entry.split_once('=') { result.push((k.to_string(), v.to_string())); } else { // Bare key — pass through from host environment if let Ok(v) = std::env::var(entry) { result.push((entry.clone(), v)); } else { eprintln!("Warning: env var '{entry}' not set in host environment, skipping"); } } } Ok(result) } // ====================================================================== // Main // ====================================================================== fn main() -> ExitCode { #[cfg(unix)] { use nix::sys::signal::{signal, SigHandler, Signal}; unsafe { let _ = signal(Signal::SIGPIPE, SigHandler::SigDfl); } } let matches = match Cli::command() .help_template(build_help_template()) .try_get_matches() { Ok(m) => m, Err(e) => { // Detect missing -- separator for the run subcommand let rendered = e.to_string(); if rendered.contains("unrecognized") || rendered.contains("unexpected") { let args: Vec = std::env::args().collect(); if args.len() > 1 && args[1] == "run" { let inner: Vec<&str> = args[2..].iter() .filter(|a| *a != "--shell") .map(|a| a.as_str()) .collect(); if !inner.is_empty() { eprintln!("Error: unrecognized arguments for 'run'."); eprintln!(); eprintln!("Use -- to separate morloc-manager flags from the container command:"); eprintln!(" morloc-manager run -- {}", inner.join(" ")); return ExitCode::from(2); } } } e.exit(); } }; let cli = Cli::from_arg_matches(&matches).unwrap(); if cli.version { println!("morloc-manager {}", env!("CARGO_PKG_VERSION")); return ExitCode::SUCCESS; } let Some(cmd) = cli.command else { Cli::command() .help_template(build_help_template()) .print_help() .ok(); return ExitCode::from(2); }; match dispatch(cli.verbose, cli.json, cmd) { Ok(()) => ExitCode::SUCCESS, Err(err) => { if cli.json { println!("{}", serde_json::json!({"error": format!("{err}")})); } else { eprintln!("{err}"); } if let ManagerError::EngineError { code, .. } = &err { ExitCode::from(*code as u8) } else { ExitCode::FAILURE } } } } fn resolve_scope(system: bool) -> Scope { if system { Scope::System } else { Scope::Local } } fn check_system_write_access() -> Result<()> { let sys_dir = cfg::config_dir(Scope::System); if sys_dir.exists() { let test_path = sys_dir.join(".write-check"); match fs::write(&test_path, b"") { Ok(_) => { let _ = fs::remove_file(&test_path); Ok(()) } Err(_) => Err(ManagerError::ConfigPermissionDenied(format!( "{}. System-scope operations require root. Re-run with sudo", sys_dir.display() ))) } } else { match fs::create_dir_all(&sys_dir) { Ok(_) => Ok(()), Err(_) => Err(ManagerError::ConfigPermissionDenied(format!( "{}. System-scope operations require root. Re-run with sudo", sys_dir.display() ))) } } } /// Resolve an environment by explicit name or fall back to the active environment. fn resolve_env_or_active(name: Option) -> Result<(String, Scope, EnvironmentConfig)> { match name { Some(n) => { let scope = cfg::find_env_scope(&n)?; let ec = cfg::read_env_config(scope, &n)?; Ok((n, scope, ec)) } None => environment::resolve_active_environment(), } } fn ensure_engine() -> Result { if let Some(cfg) = cfg::read_active_config() { return Ok(cfg.engine); } Err(ManagerError::SetupNotComplete(Scope::Local)) } fn which(name: &str) -> bool { Command::new("which") .arg(name) .stdout(Stdio::null()) .stderr(Stdio::null()) .status() .map(|s| s.success()) .unwrap_or(false) } fn display_engine(engine: ContainerEngine) -> &'static str { match engine { ContainerEngine::Docker => "docker", ContainerEngine::Podman => "podman", } } fn bold_green(msg: &str) -> String { if io::stderr().is_terminal() { format!("\x1b[1;32m{msg}\x1b[0m") } else { msg.to_string() } } fn check_docker_socket(engine: ContainerEngine) { use std::path::Path; if engine != ContainerEngine::Docker { return; } let socket = Path::new("/var/run/docker.sock"); if !socket.exists() { eprintln!("Warning: Docker socket not found at /var/run/docker.sock"); eprintln!(" Docker may not be installed or the daemon may not be running."); } else if nix::unistd::access(socket, nix::unistd::AccessFlags::R_OK).is_err() { eprintln!("Warning: Cannot access Docker socket. You may need to:"); eprintln!(" sudo usermod -aG docker $USER # then log out and back in"); } } /// Returns Err with a clear message if Docker is selected but its socket is unreachable. fn require_docker_socket(engine: ContainerEngine) -> Result<()> { use std::path::Path; if engine != ContainerEngine::Docker { return Ok(()); } let socket = Path::new("/var/run/docker.sock"); if !socket.exists() { return Err(ManagerError::EnvError( "Docker socket not found at /var/run/docker.sock. Ensure Docker is installed and the daemon is running.".to_string() )); } if nix::unistd::access(socket, nix::unistd::AccessFlags::R_OK).is_err() { return Err(ManagerError::EnvError( "Cannot access Docker socket. Add your user to the docker group:\n \ sudo usermod -aG docker $USER # then log out and back in".to_string() )); } Ok(()) } /// Check if Podman is configured to see rootful images from rootless contexts. /// Returns true if additionalimagestore is configured (or not needed). fn check_podman_additional_stores(engine: ContainerEngine) -> bool { if engine != ContainerEngine::Podman { return true; } // Root doesn't need additional stores — it owns the store if nix::unistd::getuid().is_root() { return true; } let rootful_store = std::path::Path::new("/var/lib/containers/storage"); if !rootful_store.is_dir() { // No rootful store exists, nothing to configure return true; } // Check system and user storage.conf for additionalimagestores for path in &[ "/etc/containers/storage.conf", &format!( "{}/.config/containers/storage.conf", dirs::home_dir() .unwrap_or_default() .to_string_lossy() ), ] { if let Ok(contents) = fs::read_to_string(path) { if contents.contains("/var/lib/containers/storage") { return true; } } } false } fn warn_podman_additional_stores() { eprintln!("Warning: Podman is not configured to see system (rootful) images."); eprintln!(" Non-root users will not be able to run system environments."); eprintln!(" Option 1 (recommended): Use Docker for system environments."); eprintln!(" Option 2: Add to [storage.options] in /etc/containers/storage.conf:"); eprintln!(); eprintln!(" additionalimagestores = [\"/var/lib/containers/storage\"]"); eprintln!(); eprintln!(" Note: Option 2 may cause storage locking conflicts on Fedora and Debian."); } // ====================================================================== // Dispatch // ====================================================================== fn dispatch(verbose: bool, json: bool, cmd: Cmd) -> Result<()> { match cmd { // ---- setup ---- Cmd::Setup { engine, system } => { // With no --engine, show the current engine settings if engine.is_none() { let local = cfg::read_config::(&cfg::config_path(Scope::Local)).ok(); let sys = cfg::read_config::(&cfg::config_path(Scope::System)).ok(); println!("Local engine: {}", local.as_ref().map(|c| display_engine(c.engine)).unwrap_or("unset")); println!("System engine: {}", sys.as_ref().map(|c| display_engine(c.engine)).unwrap_or("unset")); println!(); println!("Set with: morloc-manager setup --engine "); return Ok(()); } if system { check_system_write_access()?; } let scope = resolve_scope(system); let eng: ContainerEngine = engine.unwrap().into(); check_docker_socket(eng); let cfg_path = cfg::config_path(scope); let base_cfg = cfg::read_config::(&cfg_path).unwrap_or_default(); let new_cfg = Config { engine: eng, ..base_cfg }; cfg::write_config(&cfg_path, &new_cfg)?; eprintln!("Engine set to: {}", display_engine(eng)); Ok(()) } // ---- new ---- Cmd::New { name, image, version, tag, dockerfile, dockerfile_stub, force, include, flagfile, engine_arg, engine, shm_size, system, no_init, non_interactive, } => { if system { check_system_write_access()?; } let scope = resolve_scope(system); // Resolve engine: explicit flag > config default > auto-detect single > error // For --system, prefer system config so the env uses the system engine. let resolved_engine = if let Some(e) = engine { let eng: ContainerEngine = e.into(); check_docker_socket(eng); eng } else if let Some(cfg) = if system { // System scope: check system config first, then local cfg::read_config::(&cfg::config_path(Scope::System)).ok() .or_else(|| cfg::read_active_config()) } else { cfg::read_active_config() } { cfg.engine } else { // No config — try auto-detection let has_podman = which("podman"); let has_docker = which("docker"); match (has_podman, has_docker) { (true, false) => ContainerEngine::Podman, (false, true) => { check_docker_socket(ContainerEngine::Docker); ContainerEngine::Docker } (true, true) => { let scope_flag = if system { " --system" } else { "" }; return Err(ManagerError::EnvError(format!( "Both podman and docker are installed and no default is set.\n\ Pick one with:\n \ morloc-manager setup --engine podman{scope_flag}\n \ morloc-manager setup --engine docker{scope_flag}\n\ Or pass --engine to this command directly." ))); } (false, false) => return Err(ManagerError::EngineNotFound), } }; // Ensure config exists (write default if first run) if cfg::read_active_config().is_none() { let cfg_path = cfg::config_path(scope); let new_cfg = Config { active_env: None, engine: resolved_engine, }; cfg::write_config(&cfg_path, &new_cfg)?; } let interactive = !non_interactive && io::stdin().is_terminal(); if !non_interactive && !interactive { eprintln!("Note: No TTY detected, running in non-interactive mode."); } // Step 1: Resolve name (ask first so user isn't surprised after a long pull) let env_name = if let Some(n) = name { if cfg::env_config_path(scope, &n).is_file() { return Err(ManagerError::EnvError(format!( "Environment '{n}' already exists" ))); } n } else if interactive { loop { eprint!("Environment name: "); io::stderr().flush().ok(); let mut name_input = String::new(); io::stdin().read_line(&mut name_input).ok(); let n = name_input.trim().to_string(); if n.is_empty() { eprintln!("Name cannot be empty."); continue; } if cfg::env_config_path(scope, &n).is_file() { eprintln!("Environment '{n}' already exists. Choose a different name."); continue; } break n; } } else { // Non-interactive without a name: will be filled in after // version resolution below (default to version string) String::new() }; // Validate name early (before potentially slow image pull) if !env_name.is_empty() { environment::validate_env_name(&env_name)?; } if version.is_some() && image.is_some() { return Err(ManagerError::EnvError( "--version and --image are mutually exclusive".to_string() )); } // Validate cheap-to-check parameters before any I/O if let Some(ref shm) = shm_size { if !environment::is_valid_shm_size(shm) { return Err(ManagerError::EnvError(format!( "Invalid --shm-size '{shm}'. Use format like: 512m, 1g, 2048k" ))); } } // Step 2: Resolve base image and version let (base_image, original_image, morloc_ver) = if let Some(ref ver_str) = version { // Strip leading 'v' for convenience (e.g., "v0.77.0" -> "0.77.0") let clean = ver_str.strip_prefix('v').unwrap_or(ver_str); let ver: Version = clean.parse().map_err(|_| { ManagerError::InvalidVersion(ver_str.clone()) })?; let img = environment::pull_version_image(resolved_engine, &ver)?; (img, None, Some(ver)) } else if let Some(ref t) = tag { let (img, ver) = environment::pull_tagged_image(resolved_engine, t)?; (img, None, Some(ver)) } else if let Some(ref img) = image { environment::pull_custom_image(resolved_engine, img)?; (img.clone(), None, None) } else if interactive { eprintln!("Choose a base image:"); eprintln!(" [1] Latest morloc release (recommended)"); eprintln!(" [2] Specific morloc version"); eprintln!(" [3] Custom image"); eprint!("Choose [1]: "); io::stderr().flush().ok(); let mut input = String::new(); io::stdin().read_line(&mut input).ok(); match input.trim() { "2" => { eprint!("Morloc version: "); io::stderr().flush().ok(); let mut ver_input = String::new(); io::stdin().read_line(&mut ver_input).ok(); let ver: Version = ver_input.trim().parse().map_err(|_| { ManagerError::InvalidVersion(ver_input.trim().to_string()) })?; let img = environment::pull_version_image(resolved_engine, &ver)?; (img, None, Some(ver)) } "3" => { eprint!("Image reference: "); io::stderr().flush().ok(); let mut img_input = String::new(); io::stdin().read_line(&mut img_input).ok(); let img = img_input.trim().to_string(); if img.is_empty() { return Err(ManagerError::EnvError("No image specified".to_string())); } environment::pull_custom_image(resolved_engine, &img)?; (img, None, None) } _ => { let (img, ver) = environment::resolve_latest(resolved_engine)?; (img.clone(), Some(img), Some(ver)) } } } else { let (img, ver) = environment::resolve_latest(resolved_engine)?; (img.clone(), Some(img), Some(ver)) }; // Fill in name for non-interactive mode if it wasn't provided let env_name = if env_name.is_empty() { if let Some(ref ver) = morloc_ver { let default_name = ver.show(); if cfg::env_config_path(scope, &default_name).is_file() { return Err(ManagerError::EnvError(format!( "Environment '{}' already exists. Specify a different name: morloc-manager new ...", default_name ))); } default_name } else { return Err(ManagerError::EnvError( "Environment name required in non-interactive mode".to_string(), )); } } else { env_name }; // Resolve dockerfile: explicit path takes precedence, then stub generation let resolved_dockerfile = if dockerfile.is_some() { if dockerfile_stub { return Err(ManagerError::EnvError( "Cannot use both --dockerfile and --dockerfile-stub".to_string(), )); } dockerfile } else if dockerfile_stub { let df_path = cfg::env_dockerfile_path(scope, &env_name); if df_path.exists() && !force { return Err(ManagerError::EnvError(format!( "Dockerfile already exists: {}\nUse --force to overwrite.", df_path.display() ))); } let stub_dir = cfg::data_dir(scope).join("tmp"); fs::create_dir_all(&stub_dir).map_err(|e| { ManagerError::EnvError(format!("Failed to create tmp dir: {e}")) })?; let stub_path = stub_dir.join(format!("{env_name}.Dockerfile")); let stub_content = format!( "# morloc environment: {env_name}\n\ # Edit this file, then rebuild with: morloc-manager update\n\ \n\ # CONTAINER_BASE is replaced at build time with the environment's base image\n\ ARG CONTAINER_BASE=scratch\n\ FROM ${{CONTAINER_BASE}}\n\ \n\ # Example: install system packages\n\ # RUN apt-get update && apt-get install -y jq && rm -rf /var/lib/apt/lists/*\n\ \n\ # Example: install Python packages\n\ # RUN pip install scikit-learn pandas\n\ \n\ # Example: install R packages\n\ # RUN R -e \"install.packages('ggplot2', repos='https://cloud.r-project.org')\"\n" ); fs::write(&stub_path, &stub_content).map_err(|e| { ManagerError::EnvError(format!("Failed to write stub Dockerfile: {e}")) })?; Some(stub_path.to_string_lossy().to_string()) } else { None }; let opts = environment::ApplyOptions { name: env_name.clone(), scope, is_new: true, base_image: Some(base_image), original_image, morloc_version: morloc_ver, dockerfile: resolved_dockerfile, includes: include, flagfile, engine_args: engine_arg, engine: Some(resolved_engine), shm_size: Some(shm_size.unwrap_or_else(|| "512m".to_string())), skip_dockerfile_build: dockerfile_stub, verbose, }; environment::apply_environment(&opts)?; if dockerfile_stub { let df_path = cfg::env_dockerfile_path(scope, &env_name); eprintln!("Stub Dockerfile: {}", df_path.display()); eprintln!("Edit it, then run: morloc-manager update {env_name}"); } eprintln!("Created environment: {env_name}"); // Run morloc init, passing the env explicitly (no active env needed) if !no_init { let ec = cfg::read_env_config(scope, &env_name)?; run_morloc_init_for(Some((env_name.clone(), scope, ec)), verbose)?; } else { eprintln!("Warning: --no-init was used. Run 'morloc-manager run -- morloc init -f' before building morloc programs."); } eprintln!("{}", bold_green(&format!("Environment '{env_name}' is ready."))); eprintln!("Activate it with: morloc-manager select {env_name}"); if system && !check_podman_additional_stores(resolved_engine) { eprintln!(); warn_podman_additional_stores(); } Ok(()) } // ---- run ---- Cmd::Run { command, shell, env_vars, env_file } => { if !shell && command.is_empty() { return Err(ManagerError::NoCommand); } let user_env = collect_env_vars(&env_vars, env_file.as_deref())?; run_in_container(verbose, shell, &command, &user_env).map_err(|e| match e { ManagerError::EnvironmentNotFound(msg) => ManagerError::EnvironmentNotFound( format!("{msg}. Run 'morloc-manager new' to create an environment") ), other => other, }) } // ---- rm ---- Cmd::Rm { names, system, force } => { if system { check_system_write_access()?; } if names.is_empty() { return Err(ManagerError::EnvError("No environment names specified".to_string())); } // Capture current active env for post-removal feedback let was_active = cfg::read_active_config().and_then(|c| c.active_env); // Attempt each removal; collect failures, continue past errors let mut failures: Vec = Vec::new(); for name in &names { let result: Result<()> = (|| { let scope = if system { Scope::System } else { cfg::find_env_scope(name)? }; if scope == Scope::System && !system { check_system_write_access()?; } if !force { if let Some(cfg) = cfg::read_active_config() { if cfg.active_env.as_deref() == Some(name.as_str()) { return Err(ManagerError::EnvError(format!( "active environment (use --force)" ))); } } } let ec = cfg::read_env_config(scope, name) .map_err(|_| ManagerError::EnvironmentNotFound(name.to_string()))?; environment::remove_environment(ec.engine, scope, name)?; Ok(()) })(); match result { Ok(()) => { // Check if removed env was active and report new state if was_active.as_deref() == Some(name.as_str()) { match environment::resolve_active_environment() { Ok((new_active, _, _)) => { // Persist the fallback as the new active environment let _ = environment::select_environment(&new_active, Scope::Local); eprintln!("Removed environment: {name}. Active environment is now: {new_active}"); } Err(_) => { eprintln!("Removed environment: {name}. No active environment. Use: morloc-manager select "); } } } else { eprintln!("Removed environment: {name}"); } } Err(e) => failures.push(format!("{name}: {e}")), } } if !failures.is_empty() { eprintln!(); eprintln!("Failed to remove {} environment(s):", failures.len()); for f in &failures { eprintln!(" {f}"); } return Err(ManagerError::EnvError(format!( "{} of {} removals failed", failures.len(), names.len() ))); } Ok(()) } // ---- nuke ---- Cmd::Nuke { system, images, yes } => { let scope = if system { Scope::System } else { Scope::Local }; let scope_label = if system { "system" } else { "local" }; if system { check_system_write_access()?; } // Confirm before removing all environments let env_names = cfg::list_env_names(scope); if env_names.is_empty() { eprintln!("No {scope_label} environments found."); return Ok(()); } if !yes { eprintln!("This will remove {} {scope_label} environment(s):", env_names.len()); for n in &env_names { eprintln!(" {n}"); } if io::stdin().is_terminal() { eprint!("Continue? [y/N] "); io::stderr().flush().ok(); let mut answer = String::new(); io::stdin().read_line(&mut answer).ok(); if !matches!(answer.trim(), "y" | "yes" | "Y" | "YES") { eprintln!("Aborted."); return Ok(()); } } else { return Err(ManagerError::EnvError( "nuke requires --yes for non-interactive use".to_string(), )); } } eprintln!("Removing all {scope_label} morloc environments..."); // Collect env info before removal (configs are deleted during removal) let mut env_list: Vec<(String, ContainerEngine)> = Vec::new(); let mut base_images: HashSet = HashSet::new(); for name in cfg::list_env_names(scope) { if let Ok(ec) = cfg::read_env_config(scope, &name) { if images { base_images.insert(ec.base_image.clone()); if let Some(ref orig) = ec.original_image { base_images.insert(orig.clone()); } } env_list.push((name, ec.engine)); } } if env_list.is_empty() { eprintln!("No {scope_label} environments found."); } else { let mut removed = 0usize; let mut failures: Vec = Vec::new(); for (name, engine) in &env_list { eprintln!("Removing environment: {name}..."); match environment::remove_environment(*engine, scope, name) { Ok(()) => { eprintln!(" Removed: {name}"); removed += 1; } Err(e) => { eprintln!(" Failed: {name}: {e}"); failures.push(format!("{name}: {e}")); } } } // Clear active_env in the targeted scope's config let cfg_path = cfg::config_path(scope); if let Ok(cfg_data) = cfg::read_config::(&cfg_path) { if cfg_data.active_env.is_some() { let new_cfg = Config { active_env: None, ..cfg_data }; let _ = cfg::write_config(&cfg_path, &new_cfg); eprintln!("Cleared active environment."); } } eprintln!("Removed {removed} environment(s)."); if !failures.is_empty() { eprintln!(); eprintln!("Failed to remove {} environment(s):", failures.len()); for f in &failures { eprintln!(" {f}"); } return Err(ManagerError::EnvError(format!( "{} of {} removals failed", failures.len(), env_list.len() ))); } } // Remove base images if --images if images && !base_images.is_empty() { let engine = ensure_engine().unwrap_or(ContainerEngine::Docker); eprintln!("Removing base images..."); for img in &base_images { if container::image_exists_locally(engine, img) { eprintln!(" Removing image: {img}..."); if container::remove_image(engine, img) { eprintln!(" Removed: {img}"); } else { eprintln!(" Failed to remove: {img}"); } } } } // Hint about the other scope let other_scope = if system { Scope::Local } else { Scope::System }; let other_envs = cfg::list_env_names(other_scope); if !other_envs.is_empty() { if system { eprintln!( "{} local environment(s) remain. Use: morloc-manager nuke", other_envs.len() ); } else { eprintln!( "{} system environment(s) remain. Use: sudo morloc-manager nuke --system", other_envs.len() ); } } Ok(()) } // ---- ls ---- Cmd::Ls { system, local } => { let active_env = cfg::read_active_config() .and_then(|c| c.active_env); let active_str = active_env.as_deref(); // Determine which scope effectively owns the active environment. // Local takes priority (same resolution as run/select). let active_in_local = active_str .map(|name| cfg::env_config_path(Scope::Local, name).is_file()) .unwrap_or(false); let show_local = !system || local; let show_system = !local || system; let local_envs = if show_local { let local_active = if active_in_local { active_str } else { None }; environment::list_environments(Scope::Local, local_active) } else { Vec::new() }; let system_envs = if show_system { let system_active = if active_in_local { None } else { active_str }; environment::list_environments(Scope::System, system_active) } else { Vec::new() }; if json { #[derive(serde::Serialize)] struct LsOutput { local: Vec, system: Vec, } let output = LsOutput { local: local_envs, system: system_envs }; println!("{}", serde_json::to_string_pretty(&output).unwrap()); } else { let total = local_envs.len() + system_envs.len(); if !local_envs.is_empty() { println!("Local environments:"); for e in &local_envs { let active_mark = if e.active { " (active)" } else { "" }; let ver_mark = e.morloc_version.as_ref() .map(|v| format!(" [{}]", v.show())) .unwrap_or_default(); println!(" {}{}{}", e.name, ver_mark, active_mark); } } if !system_envs.is_empty() { if !local_envs.is_empty() { println!(); } println!("System environments:"); for e in &system_envs { let active_mark = if e.active { " (active)" } else { "" }; let ver_mark = e.morloc_version.as_ref() .map(|v| format!(" [{}]", v.show())) .unwrap_or_default(); println!(" {}{}{}", e.name, ver_mark, active_mark); } } if total == 0 { println!("No environments found. Create one with: morloc-manager new"); } } Ok(()) } // ---- info ---- Cmd::Info { name, system } => { if let Some(env_name) = name { // Detailed info for a specific environment let scope = if system { if !cfg::env_config_path(Scope::System, &env_name).is_file() { return Err(ManagerError::EnvironmentNotFound(format!( "{env_name} (in system scope)" ))); } Scope::System } else { cfg::find_env_scope(&env_name)? }; let ec = cfg::read_env_config(scope, &env_name)?; let data_dir = cfg::env_data_dir(scope, &env_name); let active = cfg::read_active_config() .and_then(|c| c.active_env) .as_deref() == Some(env_name.as_str()); if json { #[derive(serde::Serialize)] struct InfoDetail { name: String, scope: String, active: bool, base_image: String, built_image: Option, morloc_version: Option, engine: String, shm_size: String, dockerfile: Option, flags: Vec, data_dir: String, } let df_str = ec.dockerfile.as_ref().map(|_| { let df_path = cfg::env_dockerfile_path(scope, &env_name); df_path.display().to_string() }); let flags_path = cfg::env_flags_path(scope, &env_name); let flags = cfg::read_flags_file_lines(&flags_path); let output = InfoDetail { name: ec.name.clone(), scope: match scope { Scope::Local => "local", Scope::System => "system" }.to_string(), active, base_image: ec.base_image.clone(), built_image: ec.built_image.clone(), morloc_version: ec.morloc_version.clone(), engine: display_engine(ec.engine).to_string(), shm_size: ec.shm_size.clone(), dockerfile: df_str, flags, data_dir: data_dir.display().to_string(), }; println!("{}", serde_json::to_string_pretty(&output).unwrap()); } else { println!("Name: {}", ec.name); println!("Scope: {}", match scope { Scope::Local => "local", Scope::System => "system" }); println!("Active: {}", if active { "yes" } else { "no" }); println!("Base image: {}", ec.base_image); if let Some(ref img) = ec.built_image { println!("Built image: {img}"); } if let Some(ref ver) = ec.morloc_version { println!("Morloc version: {}", ver.show()); } println!("Engine: {}", display_engine(ec.engine)); println!("SHM size: {}", ec.shm_size); println!("Dockerfile: {}", match ec.dockerfile { Some(_) => { let df_path = cfg::env_dockerfile_path(scope, &env_name); if df_path.exists() { df_path.display().to_string() } else { format!("{} (MISSING)", df_path.display()) } } None => "none".to_string(), }); let flags_path = cfg::env_flags_path(scope, &env_name); println!("Flags: {}", flags_path.display()); let flags = cfg::read_flags_file_lines(&flags_path); for flag in &flags { println!(" {flag}"); } println!("Data dir: {}", data_dir.display()); } } else { // Overview let local_cfg = cfg::read_config::(&cfg::config_path(Scope::Local)).ok(); let system_cfg = cfg::read_config::(&cfg::config_path(Scope::System)).ok(); let se_mode = detect_selinux(); let active_env = environment::resolve_active_environment() .map(|(name, _, _)| name) .unwrap_or_else(|_| "none".to_string()); let se_str = match se_mode { SELinuxMode::Enforcing => "enforcing", SELinuxMode::Permissive => "permissive", SELinuxMode::Disabled => "not detected", }; if json { #[derive(serde::Serialize)] struct DirInfo { path: String, exists: bool } #[derive(serde::Serialize)] struct InfoOverview { active: String, local_engine: String, system_engine: String, selinux: String, directories: std::collections::BTreeMap, local: Vec, system: Vec, } let active_str = if active_env == "none" { None } else { Some(active_env.as_str()) }; let mut directories = std::collections::BTreeMap::new(); for (label, path) in [ ("config_local", cfg::config_dir(Scope::Local)), ("data_local", cfg::data_dir(Scope::Local)), ("config_system", cfg::config_dir(Scope::System)), ("data_system", cfg::data_dir(Scope::System)), ] { directories.insert(label.to_string(), DirInfo { path: path.display().to_string(), exists: path.is_dir(), }); } let output = InfoOverview { active: active_env.clone(), local_engine: local_cfg.as_ref().map(|c| display_engine(c.engine)).unwrap_or("unset").to_string(), system_engine: system_cfg.as_ref().map(|c| display_engine(c.engine)).unwrap_or("unset").to_string(), selinux: se_str.to_string(), directories, local: environment::list_environments(Scope::Local, active_str), system: environment::list_environments(Scope::System, active_str), }; println!("{}", serde_json::to_string_pretty(&output).unwrap()); } else { println!("Active: {active_env}"); println!("Local engine: {}", local_cfg.as_ref().map(|c| display_engine(c.engine)).unwrap_or("unset")); println!("System engine: {}", system_cfg.as_ref().map(|c| display_engine(c.engine)).unwrap_or("unset")); println!("SELinux: {se_str}"); let dirs = [ ("Config (local)", cfg::config_dir(Scope::Local)), ("Data (local)", cfg::data_dir(Scope::Local)), ("Config (system)", cfg::config_dir(Scope::System)), ("Data (system)", cfg::data_dir(Scope::System)), ]; println!("\nDirectories:"); for (label, path) in &dirs { let status = if path.is_dir() { "exists" } else { "not found" }; println!(" {:<20} {} ({})", label, path.display(), status); } let active_str = if active_env == "none" { None } else { Some(active_env.as_str()) }; // Check if active env lives in local scope (local takes priority) let active_in_local = active_str .map(|name| cfg::env_config_path(Scope::Local, name).is_file()) .unwrap_or(false); let local_envs = environment::list_environments(Scope::Local, active_str); println!("\nLocal environments:"); if local_envs.is_empty() { println!(" (none)"); } else { for e in &local_envs { let active_mark = if e.active { " (active)" } else { "" }; let ver_mark = e.morloc_version.as_ref() .map(|v| format!(" [{}]", v.show())) .unwrap_or_default(); println!(" {}{}{}", e.name, ver_mark, active_mark); } } let system_envs = environment::list_environments(Scope::System, active_str); if !system_envs.is_empty() { println!("\nSystem environments:"); for e in &system_envs { let active_mark = if e.active && active_in_local { " (active - shadowed)" } else if e.active { " (active)" } else { "" }; let ver_mark = e.morloc_version.as_ref() .map(|v| format!(" [{}]", v.show())) .unwrap_or_default(); println!(" {}{}{}", e.name, ver_mark, active_mark); } } } } Ok(()) } // ---- select ---- Cmd::Select { name, system } => { if system { check_system_write_access()?; } let write_scope = resolve_scope(system); environment::select_environment(&name, write_scope)?; if system { eprintln!("Set system default environment: {name}"); } else { eprintln!("Selected environment: {name}"); } Ok(()) } // ---- update ---- Cmd::Update { name, image, version, tag, dockerfile, dockerfile_stub, force, include, flagfile, engine_arg, engine, shm_size, no_build, reinit, non_interactive: _, } => { let (env_name, env_scope) = match name { Some(n) => { let scope = cfg::find_env_scope(&n)?; (n, scope) } None => { let (n, s, _) = environment::resolve_active_environment()?; (n, s) } }; if env_scope == Scope::System { check_system_write_access()?; } // Handle --dockerfile-stub: generate stub if no Dockerfile exists let resolved_dockerfile = if dockerfile.is_some() && dockerfile_stub { return Err(ManagerError::EnvError( "Cannot use both --dockerfile and --dockerfile-stub".to_string(), )); } else if dockerfile_stub { let df_path = cfg::env_dockerfile_path(env_scope, &env_name); if df_path.exists() && !force { return Err(ManagerError::EnvError(format!( "Dockerfile already exists: {}\nUse --force to overwrite.", df_path.display() ))); } let stub_dir = cfg::data_dir(env_scope).join("tmp"); fs::create_dir_all(&stub_dir).map_err(|e| { ManagerError::EnvError(format!("Failed to create tmp dir: {e}")) })?; let stub_path = stub_dir.join(format!("{env_name}.Dockerfile")); let stub_content = format!( "# morloc environment: {env_name}\n\ # Edit this file, then rebuild with: morloc-manager update\n\ \n\ # CONTAINER_BASE is replaced at build time with the environment's base image\n\ ARG CONTAINER_BASE=scratch\n\ FROM ${{CONTAINER_BASE}}\n\ \n\ # Example: install system packages\n\ # RUN apt-get update && apt-get install -y jq && rm -rf /var/lib/apt/lists/*\n\ \n\ # Example: install Python packages\n\ # RUN pip install scikit-learn pandas\n\ \n\ # Example: install R packages\n\ # RUN R -e \"install.packages('ggplot2', repos='https://cloud.r-project.org')\"\n" ); fs::write(&stub_path, &stub_content).map_err(|e| { ManagerError::EnvError(format!("Failed to write stub Dockerfile: {e}")) })?; Some(stub_path.to_string_lossy().to_string()) } else { dockerfile }; if version.is_some() && image.is_some() { return Err(ManagerError::EnvError( "--version and --image are mutually exclusive".to_string() )); } // Resolve base image if --version, --tag, or --image provided let (base_image, original_image, morloc_ver) = if let Some(ref ver_str) = version { let ec = cfg::read_env_config(env_scope, &env_name)?; let clean = ver_str.strip_prefix('v').unwrap_or(ver_str); let ver: Version = clean.parse().map_err(|_| { ManagerError::InvalidVersion(ver_str.clone()) })?; let img = environment::pull_version_image(ec.engine, &ver)?; (Some(img), None, Some(ver)) } else if let Some(ref t) = tag { let ec = cfg::read_env_config(env_scope, &env_name)?; let (img, ver) = environment::pull_tagged_image(ec.engine, t)?; (Some(img), None, Some(ver)) } else if let Some(ref img) = image { let ec = cfg::read_env_config(env_scope, &env_name)?; environment::pull_custom_image(ec.engine, img)?; // Detect version from the new image so it doesn't stay stale let detected_ver = environment::detect_morloc_version(ec.engine, img).ok(); (Some(img.clone()), None, detected_ver) } else { (None, None, None) }; eprintln!("Updating environment: {env_name}"); let opts = environment::ApplyOptions { name: env_name.clone(), scope: env_scope, is_new: false, base_image, original_image, morloc_version: morloc_ver, dockerfile: resolved_dockerfile, includes: include, flagfile, engine_args: engine_arg, engine: engine.map(|e| e.into()), shm_size, skip_dockerfile_build: no_build || dockerfile_stub, verbose, }; environment::apply_environment(&opts)?; if dockerfile_stub { let df_path = cfg::env_dockerfile_path(env_scope, &env_name); eprintln!("Stub Dockerfile: {}", df_path.display()); eprintln!("Edit it, then run: morloc-manager update {env_name}"); } // --version, --tag, and --image imply --reinit (ABI may have changed) if reinit || version.is_some() || tag.is_some() || image.is_some() { // Re-read the config (apply_environment may have updated it) let ec = cfg::read_env_config(env_scope, &env_name)?; // Check for running serve container -- reinit replaces morloc-nexus // which will fail with "Text file busy" if the container has it open. let serve_name = serve::serve_container_name(&env_name); let running = serve::find_running_serve_containers(ec.engine); if running.iter().any(|n| n == &serve_name) { return Err(ManagerError::EnvError(format!( "Cannot reinit environment '{env_name}' while its serve container is running.\n \ Run 'morloc-manager stop {env_name}' first." ))); } run_morloc_init_for(Some((env_name.clone(), env_scope, ec)), verbose)?; } eprintln!("{}", bold_green(&format!("Environment '{env_name}' updated."))); if env_scope == Scope::System && !check_podman_additional_stores( cfg::read_env_config(env_scope, &env_name) .map(|ec| ec.engine) .unwrap_or(ContainerEngine::Podman), ) { eprintln!(); warn_podman_additional_stores(); } Ok(()) } // ---- freeze ---- Cmd::Freeze { name, output, force } => { let output_dir = output.as_deref().unwrap_or("./morloc-freeze"); // Protect against silently overwriting a previous freeze let existing_tar = std::path::Path::new(output_dir).join("state.tar.gz"); if existing_tar.exists() && !force { return Err(ManagerError::FreezeError(format!( "Output directory already contains a freeze: {}\n \ Use --force to overwrite, or specify a different -o path.", existing_tar.display() ))); } let (env_name, env_scope, ec) = resolve_env_or_active(name)?; let engine = ec.engine; // Detect the version from the container binary for sanity check. // The morloc binary can't report prerelease tags (stack limitation), // so if major.minor.patch match, keep the recorded version which has // the full tag from the image. eprintln!("Detecting morloc version from image..."); let detected = environment::detect_morloc_version(ec.engine, ec.active_image())?; let ver = if let Some(ref recorded) = ec.morloc_version { if recorded.major == detected.major && recorded.minor == detected.minor && recorded.patch == detected.patch { recorded.clone() } else { eprintln!( "Warning: recorded morloc version ({}) does not match image ({}).", recorded.show(), detected.show() ); detected } } else { detected }; let data_dir = cfg::env_data_dir(env_scope, &env_name); let image = ec.active_image().to_string(); let result = freeze::freeze_from_dir(env_scope, ver.clone(), engine, &image, &data_dir.to_string_lossy(), output_dir, verbose); if result.is_ok() && ec.morloc_version.as_ref() != Some(&ver) { let mut updated = ec.clone(); updated.morloc_version = Some(ver); let _ = cfg::write_env_config(env_scope, &env_name, &updated); } result } // ---- unfreeze ---- Cmd::Unfreeze { from, tag, base, engine: engine_override, rebuild } => { let from = { let p = std::path::Path::new(&from); if p.is_dir() { let tar = p.join("state.tar.gz"); if tar.is_file() { tar.to_string_lossy().to_string() } else { return Err(ManagerError::UnfreezeError(format!( "Directory '{}' does not contain state.tar.gz. \ Pass the path to state.tar.gz directly, or the directory containing it.", from ))); } } else if p.is_file() { from } else { return Err(ManagerError::UnfreezeError(format!( "Input not found: {from}. \ Pass the path to state.tar.gz or the directory containing it." ))); } }; // Read version and engine from the freeze manifest so unfreeze // works on deployment machines with no morloc environments. let tarball_dir = std::path::Path::new(&from) .parent() .unwrap_or(std::path::Path::new(".")); let manifest_path = tarball_dir.join("freeze-manifest.json"); let manifest = freeze::read_freeze_manifest(&manifest_path.to_string_lossy()) .map_err(|_| ManagerError::UnfreezeError(format!( "Cannot read freeze manifest at {}. Ensure state.tar.gz and freeze-manifest.json are in the same directory.", manifest_path.display() )))?; let engine = match engine_override { Some(EngineArg::Docker) => ContainerEngine::Docker, Some(EngineArg::Podman) => ContainerEngine::Podman, None => { let e = ensure_engine()?; eprintln!( "Note: using {} engine from global config. Override with --engine if needed.", display_engine(e) ); e } }; serve::build_serve_image(engine, verbose, &from, &tag, manifest.morloc_version, base.as_deref(), rebuild, &manifest.programs) } // ---- start ---- Cmd::Start { name, port, env_vars, env_file, force } => { let (env_name, env_scope, ec) = resolve_env_or_active(name)?; let image = ec.active_image().to_string(); let data_dir = cfg::env_data_dir(env_scope, &env_name); let container_name = serve::serve_container_name(&env_name); // Warn if a Dockerfile is configured but the layered image hasn't been built if ec.dockerfile.is_some() && ec.built_image.is_none() { eprintln!("Warning: Dockerfile is configured but image has not been built. Using base image."); eprintln!(" Run 'morloc-manager update {env_name}' to build the Dockerfile layer."); } // Refuse to replace a running container unless --force is passed if container::container_exists(ec.engine, &container_name) { if !force { return Err(ManagerError::EnvError(format!( "Serve container already running for '{env_name}'. Use --force to replace." ))); } eprintln!("Warning: replacing existing serve container '{container_name}'"); } let port_mappings = if port.is_empty() { vec![(8080, 8080)] } else { port }; let flags_path = cfg::env_flags_path(env_scope, &env_name); let extra_flags = cfg::read_flags_file(&flags_path); let user_env = collect_env_vars(&env_vars, env_file.as_deref())?; serve::serve_environment( ec.engine, verbose, &image, &data_dir.to_string_lossy(), &container_name, &port_mappings, &extra_flags, &Some(ec.shm_size.clone()), &user_env, ) } // ---- stop ---- Cmd::Stop { name } => { let (env_name, _, ec) = resolve_env_or_active(name)?; let container_name = serve::serve_container_name(&env_name); if crate::container::container_exists(ec.engine, &container_name) { serve::stop_serve_container(ec.engine, verbose, &container_name)?; eprintln!("Stopped serving environment: {env_name}"); } else { return Err(ManagerError::EnvError( format!("No serve container running for environment '{env_name}'") )); } Ok(()) } // ---- logs ---- Cmd::Logs { name, follow } => { let (container_name, engine) = if let Some(ref n) = name { let (_, _, ec) = resolve_env_or_active(Some(n.clone()))?; let cname = serve::serve_container_name(n); if !container::container_exists(ec.engine, &cname) { return Err(ManagerError::EnvError( format!("No serve container running for environment '{n}'") )); } (cname, ec.engine) } else { find_running_serve_container()? }; let exe = match engine { ContainerEngine::Podman => "podman", ContainerEngine::Docker => "docker", }; let mut cmd_args = vec!["logs"]; if follow { cmd_args.push("-f"); } cmd_args.push(&container_name); // Log content is the primary data of this command, so both the // container's original stdout and stderr should go to our stdout. // docker/podman logs preserves the original stream split; we merge // them so that `morloc-manager logs | grep ERROR` works. let stdout_handle = std::io::stdout(); let status = std::process::Command::new(exe) .args(&cmd_args) .stdin(Stdio::null()) .stdout(Stdio::inherit()) .stderr(Stdio::from(stdout_handle)) .status() .map_err(|e| ManagerError::EnvError(format!("Failed to run {exe} logs: {e}")))?; if !status.success() { return Err(ManagerError::EngineError { engine, code: status.code().unwrap_or(1), stderr: String::new(), }); } Ok(()) } // ---- eval ---- Cmd::Eval { first, second, port } => { let expr = if let Some(ref expr_arg) = second { // first is env name — validate it exists and its serve container is running let (env_name, _, ec) = resolve_env_or_active(Some(first))?; let container_name = serve::serve_container_name(&env_name); if !container::container_exists(ec.engine, &container_name) { return Err(ManagerError::EnvError(format!( "No serve container running for '{env_name}'. Start with: morloc-manager start {env_name}" ))); } expr_arg.clone() } else { first }; use std::io::{Read as IoRead, Write as IoWrite}; let body = format!("{{\"expr\":{}}}", serde_json::to_string(&expr).unwrap_or_default()); let request = format!( "POST /eval HTTP/1.1\r\nHost: localhost\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", body.len(), body ); let addr = format!("127.0.0.1:{port}"); let mut stream = std::net::TcpStream::connect(&addr).map_err(|e| { ManagerError::EnvError(format!( "Cannot connect to serve container on {addr}: {e}\n Is a serve container running? Start with: morloc-manager start" )) })?; stream.write_all(request.as_bytes()).map_err(|e| { ManagerError::EnvError(format!("Failed to send request: {e}")) })?; let mut response = String::new(); stream.read_to_string(&mut response).map_err(|e| { ManagerError::EnvError(format!("Failed to read response: {e}")) })?; // Extract body from HTTP response (after \r\n\r\n) if let Some(pos) = response.find("\r\n\r\n") { let body = &response[pos + 4..]; println!("{body}"); } else { println!("{response}"); } Ok(()) } // ---- status ---- Cmd::Status => { let mut all_containers: Vec = Vec::new(); let mut any_engine = false; for engine in [ContainerEngine::Podman, ContainerEngine::Docker] { let exe = match engine { ContainerEngine::Podman => "podman", ContainerEngine::Docker => "docker", }; if which(exe) { any_engine = true; if let Ok(containers) = serve::query_serve_containers(engine, verbose) { all_containers.extend(containers); } } } if !any_engine { return Err(ManagerError::EngineNotFound); } if json { #[derive(serde::Serialize)] struct StatusOutput { containers: Vec } let output = StatusOutput { containers: all_containers }; println!("{}", serde_json::to_string_pretty(&output).unwrap()); } else if all_containers.is_empty() { println!("No morloc serve containers running."); } else { println!("Running servers:"); for c in &all_containers { println!(" {} {} ({}) [{}]", c.name, c.ports, c.env, c.status); } } Ok(()) } // ---- doctor ---- Cmd::Doctor { name, system, deep, strict } => { let (env_name, env_scope, ec) = if let Some(ref n) = name { let s = if system { Scope::System } else { cfg::find_env_scope(n)? }; let c = cfg::read_env_config(s, n)?; (n.clone(), s, c) } else { resolve_env_or_active(None)? }; doctor::doctor(ec.engine, verbose, &env_name, env_scope, &ec, deep, strict, json) } } } // ====================================================================== // Serve container discovery // ====================================================================== /// Find exactly one running morloc-serve-* container across all engines. /// Returns (container_name, engine). Errors if zero or multiple found. fn find_running_serve_container() -> Result<(String, ContainerEngine)> { let mut found: Vec<(String, ContainerEngine)> = Vec::new(); for engine in [ContainerEngine::Podman, ContainerEngine::Docker] { let exe = match engine { ContainerEngine::Podman => "podman", ContainerEngine::Docker => "docker", }; if which(exe) { for name in serve::find_running_serve_containers(engine) { found.push((name, engine)); } } } match found.len() { 0 => Err(ManagerError::EnvError( "No morloc serve containers running".to_string(), )), 1 => Ok(found.into_iter().next().unwrap()), _ => { let names: Vec = found.iter().map(|(n, _)| n.clone()).collect(); Err(ManagerError::EnvError(format!( "Multiple serve containers running. Specify one explicitly:\n {}", names.join("\n ") ))) } } } // ====================================================================== // Container run // ====================================================================== fn run_in_container( verbose: bool, shell: bool, args: &[String], user_env: &[(String, String)], ) -> Result<()> { run_in_container_for(None, verbose, shell, args, user_env) } fn run_in_container_for( target: Option<(String, Scope, EnvironmentConfig)>, verbose: bool, shell: bool, args: &[String], user_env: &[(String, String)], ) -> Result<()> { let (env_name, env_scope, ec) = match target { Some(t) => t, None => environment::resolve_active_environment()?, }; let engine = ec.engine; let image = ec.active_image().to_string(); let data_dir = cfg::env_data_dir(env_scope, &env_name); let v_data_dir = data_dir.to_string_lossy().to_string(); // Warn if a Dockerfile is configured but the layered image hasn't been built if ec.dockerfile.is_some() && ec.built_image.is_none() { eprintln!("Warning: Dockerfile is configured but image has not been built. Using base image."); eprintln!(" Run 'morloc-manager update {env_name}' to build the Dockerfile layer."); } // Fail fast with a clear message if docker socket is unreachable require_docker_socket(engine)?; // Verify the image is accessible before attempting to run if !container::image_exists_locally(engine, &image) { // Show the raw container engine error before our hint if let Some(raw_err) = container::image_inspect_stderr(engine, &image) { let trimmed = raw_err.trim(); if !trimmed.is_empty() { eprintln!("{trimmed}"); } } if env_scope == Scope::System && !check_podman_additional_stores(engine) { return Err(ManagerError::EnvError(format!( "Image '{image}' not found. The environment '{env_name}' is a system environment \ but Podman is not configured to see rootful images.\n\ Option 1 (recommended): Use Docker for system environments.\n\ Option 2: Add to [storage.options] in /etc/containers/storage.conf:\n\n \ additionalimagestores = [\"/var/lib/containers/storage\"]\n\n\ Note: Option 2 may cause storage locking conflicts on Fedora and Debian.\n" ))); } let hint = if env_scope == Scope::System { format!("Ask your administrator to run: sudo morloc-manager update {env_name}") } else { format!("Run 'morloc-manager update {env_name}' to build it.") }; return Err(ManagerError::EnvError(format!( "Image '{image}' not found locally. {hint}" ))); } let se_mode = detect_selinux(); let suffix = volume_suffix(se_mode); let home = dirs::home_dir() .unwrap_or_default() .to_string_lossy() .to_string(); let cwd = std::env::current_dir() .unwrap_or_default() .to_string_lossy() .to_string(); // Refuse to run from the root directory — container engines cannot // bind-mount "/" and the resulting error is opaque. if !shell && cwd == "/" { return Err(ManagerError::EnvError( "Cannot run from the root directory (/). \ Change to a subdirectory first (e.g., cd /tmp).".to_string() )); } // Read flags from the environment's flags file let flags_path = cfg::env_flags_path(env_scope, &env_name); let extra_flags = cfg::read_flags_file(&flags_path); let is_init = matches!(args, [a, b, ..] if a == "morloc" && b == "init"); let is_home_dir = normalize_trailing(&cwd) == normalize_trailing(&home); if !is_init && !suffix.is_empty() && !is_home_dir { selinux::validate_mount_path(&cwd)?; run_with_config( engine, verbose, &image, &v_data_dir, &home, &cwd, suffix, shell, args, false, &ec.shm_size, &extra_flags, user_env, ) } else { let (cwd_final, skip_work_mount) = if is_home_dir && !suffix.is_empty() && !is_init { eprintln!("Warning: running from home directory with SELinux; working directory mount skipped."); eprintln!("Workaround: create a project subdirectory and work from there:"); eprintln!(" mkdir ~/myproject && cd ~/myproject"); (home.clone(), true) } else { (cwd, false) }; run_with_config( engine, verbose, &image, &v_data_dir, &home, &cwd_final, suffix, shell, args, is_init || skip_work_mount, &ec.shm_size, &extra_flags, user_env, ) } } fn run_with_config( engine: ContainerEngine, verbose: bool, image: &str, v_data_dir: &str, home: &str, cwd: &str, suffix: &str, shell: bool, args: &[String], is_init: bool, shm_size: &str, extra_flags: &[String], user_env: &[(String, String)], ) -> Result<()> { if shell { if !io::stdin().is_terminal() || !io::stdout().is_terminal() { eprintln!("Error: --shell requires an interactive terminal (TTY)."); eprintln!("If connecting over SSH, use: ssh -t morloc-manager run --shell"); std::process::exit(1); } } // Mount data at /opt/morloc — matching the serve container (start). // The compiler reads MORLOC_HOME to resolve all generated paths. let mh = "/opt/morloc"; let base_mounts = vec![ (v_data_dir.to_string(), mh.to_string()), ]; let work_mount = if is_init { Vec::new() } else { vec![(cwd.to_string(), cwd.to_string())] }; let all_mounts: Vec<(String, String)> = base_mounts.into_iter().chain(work_mount).collect(); let work_dir = if is_init { mh.to_string() } else { cwd.to_string() }; let mut env_vars = vec![ ("HOME".to_string(), home.to_string()), ("MORLOC_HOME".to_string(), mh.to_string()), ( "PATH".to_string(), format!("{mh}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"), ), ]; env_vars.extend(user_env.iter().cloned()); let cmd = if shell { Some(vec!["/bin/bash".to_string()]) } else if args.is_empty() { None } else { Some(args.to_vec()) }; let cfg = RunConfig { image: image.to_string(), bind_mounts: all_mounts, env: env_vars, interactive: shell, shm_size: Some(shm_size.to_string()), work_dir: Some(work_dir), selinux_suffix: suffix.to_string(), command: cmd, extra_flags: extra_flags.to_vec(), ..RunConfig::new(image) }; let status = container_run_passthrough(engine, verbose, shell, &cfg); let code = status.code().unwrap_or(1); if status.success() { Ok(()) } else if code >= 125 { // Exit 125+ = container engine error (not the user's program) Err(ManagerError::EngineError { engine, code, stderr: "Container engine error".to_string(), }) } else { // Exit 1-124 = program exited with non-zero, pass through silently std::process::exit(code); } } fn run_morloc_init_for( target: Option<(String, Scope, EnvironmentConfig)>, verbose: bool, ) -> Result<()> { let init_args: Vec = if verbose { ["morloc", "init", "-f"].iter().map(|s| s.to_string()).collect() } else { ["morloc", "init", "-f", "-q"].iter().map(|s| s.to_string()).collect() }; eprintln!("Initializing morloc (this may take several minutes)..."); run_in_container_for(target, verbose, false, &init_args, &[]) } fn normalize_trailing(p: &str) -> String { let mut s = p.to_string(); if !s.ends_with('/') { s.push('/'); } s } // ====================================================================== // Tests // ====================================================================== #[cfg(test)] mod tests { use super::*; use crate::container::{build_build_args, build_run_args, engine_executable, engine_specific_run_flags, BuildConfig}; // ---- Type tests ---- #[test] fn show_version_formats_correctly() { assert_eq!(Version::new(0, 67, 0).show(), "0.67.0"); } #[test] fn parse_version_round_trips() { assert_eq!("0.67.0".parse::().ok(), Some(Version::new(0, 67, 0))); } #[test] fn parse_version_rejects_invalid() { assert!("abc".parse::().is_err()); } #[test] fn parse_version_rejects_incomplete() { assert!("0.67".parse::().is_err()); } #[test] fn version_ordering_is_semantic() { assert!(Version::new(1, 0, 0) > Version::new(0, 99, 99)); } #[test] fn version_ordering_minor() { assert!(Version::new(0, 2, 0) > Version::new(0, 1, 99)); } #[test] fn version_equality() { assert_eq!(Version::new(0, 67, 0), Version::new(0, 67, 0)); } #[test] fn parse_version_with_prerelease() { for (input, expected_pre) in [ ("0.77.0-rc.1", "rc.1"), ("1.0.0-alpha", "alpha"), ("1.0.0-beta.2", "beta.2"), ("0.1.0-dev.20260414", "dev.20260414"), ] { let ver: Version = input.parse().unwrap(); assert_eq!(ver.prerelease, Some(expected_pre.to_string()), "input: {input}"); assert_eq!(ver.show(), input, "round-trip failed for: {input}"); } } #[test] fn prerelease_sorts_before_release() { let rc: Version = "0.77.0-rc.1".parse().unwrap(); let release = Version::new(0, 77, 0); assert!(rc < release); } // ---- Error message tests ---- #[test] fn invalid_version_renders() { let err = ManagerError::InvalidVersion("abc".to_string()); assert!(err.to_string().contains("Invalid version")); } #[test] fn no_command_renders() { let err = ManagerError::NoCommand; assert!(err.to_string().contains("No command")); } #[test] fn no_active_environment_suggests_new() { let err = ManagerError::NoActiveEnvironment; assert!(err.to_string().contains("new")); } #[test] fn config_permission_denied_mentions_permissions() { let err = ManagerError::ConfigPermissionDenied("/etc/morloc/config.json".to_string()); assert!(err.to_string().contains("Permission")); } #[test] fn freeze_error_renders() { let err = ManagerError::FreezeError("tar error".to_string()); assert!(err.to_string().contains("Freeze failed")); } // ---- Config default tests ---- #[test] fn default_config_has_no_active_env() { assert_eq!(Config::default().active_env, None); } #[test] fn default_config_uses_podman() { assert_eq!(Config::default().engine, ContainerEngine::Podman); } // ---- Config JSON round-trip tests ---- #[test] fn config_json_round_trip() { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("config.json"); let cfg = Config { active_env: Some("ml".to_string()), engine: ContainerEngine::Docker, }; cfg::write_config(&path, &cfg).unwrap(); let cfg2: Config = cfg::read_config(&path).unwrap(); assert_eq!(cfg2.active_env.as_deref(), Some("ml")); assert_eq!(cfg2.engine, ContainerEngine::Docker); } #[test] fn config_read_missing_returns_not_found() { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("nonexistent.json"); let result = cfg::read_config::(&path); assert!(matches!(result, Err(ManagerError::ConfigNotFound(_)))); } #[test] fn config_read_invalid_json_returns_parse_error() { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("bad.json"); fs::write(&path, "not json at all").unwrap(); let result = cfg::read_config::(&path); assert!(matches!(result, Err(ManagerError::ConfigParseError { .. }))); } #[test] fn env_config_json_round_trip() { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("env.json"); let ec = EnvironmentConfig { name: "test".to_string(), base_image: "ghcr.io/morloc-project/morloc/morloc-full:0.67.0".to_string(), original_image: None, dockerfile: None, content_hash: None, built_image: None, engine: ContainerEngine::Podman, shm_size: "1g".to_string(), morloc_version: Some(Version::new(0, 67, 0)), }; cfg::write_config(&path, &ec).unwrap(); let ec2: EnvironmentConfig = cfg::read_config(&path).unwrap(); assert_eq!(ec2.name, "test"); assert_eq!(ec2.shm_size, "1g"); assert_eq!(ec2.morloc_version, Some(Version::new(0, 67, 0))); } #[test] fn freeze_manifest_json_round_trip() { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("fm.json"); let fm = FreezeManifest { morloc_version: Version::new(0, 67, 0), frozen_at: chrono::Utc::now(), modules: vec![ModuleEntry { name: "math".to_string(), version: Some("0.3.0".to_string()), sha256: "abc123".to_string(), }], programs: vec![ProgramEntry { name: "svc".to_string(), commands: vec!["hello".to_string(), "compute".to_string()], }], base_image: "morloc-full:0.67.0".to_string(), env_layer: Some(FrozenEnvLayer { name: "ml".to_string(), dockerfile: "FROM scratch".to_string(), content_hash: "abc".to_string(), image_tag: None, }), env_vars: Vec::new(), }; cfg::write_config(&path, &fm).unwrap(); let fm2: FreezeManifest = cfg::read_config(&path).unwrap(); assert_eq!(fm2.morloc_version, Version::new(0, 67, 0)); assert_eq!(fm2.modules.len(), 1); assert_eq!(fm2.programs.len(), 1); assert_eq!(fm2.programs[0].commands, vec!["hello", "compute"]); // env_vars is no longer written but can still be read from old manifests assert!(fm2.env_vars.is_empty()); } #[test] fn freeze_manifest_reads_legacy_env_vars() { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("legacy.json"); // Simulate an old manifest that included env_vars let json = r#"{ "morloc_version": {"major":0,"minor":67,"patch":0,"pre":null}, "frozen_at": "2025-01-01T00:00:00Z", "modules": [], "programs": [], "base_image": "morloc-full:0.67.0", "env_layer": null, "env_vars": ["API_KEY", "DB_URL"] }"#; std::fs::write(&path, json).unwrap(); let fm: FreezeManifest = cfg::read_config(&path).unwrap(); assert_eq!(fm.env_vars, vec!["API_KEY", "DB_URL"]); } // ---- Config flags tests ---- #[test] fn read_flags_file_parses() { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("test.flags"); fs::write( &path, "# This is a comment\n--gpus all\n\n -v /data:/data \n# another comment\n--network host\n", ) .unwrap(); let flags = cfg::read_flags_file(&path); assert_eq!( flags, vec!["--gpus", "all", "-v", "/data:/data", "--network", "host"] ); } #[test] fn read_flags_file_missing() { let dir = tempfile::tempdir().unwrap(); let flags = cfg::read_flags_file(&dir.path().join("nope.flags")); assert!(flags.is_empty()); } #[test] fn read_flags_file_expands_env_vars() { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("test.flags"); fs::write(&path, "-v $HOME/data:/data\n").unwrap(); let flags = cfg::read_flags_file(&path); let home = std::env::var("HOME").unwrap(); assert_eq!(flags, vec!["-v", &format!("{home}/data:/data")]); } #[test] fn read_flags_file_expands_tilde() { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("test.flags"); fs::write(&path, "-v ~/data:/data\n").unwrap(); let flags = cfg::read_flags_file(&path); let home = std::env::var("HOME").unwrap(); assert_eq!(flags, vec!["-v", &format!("{home}/data:/data")]); } // ---- Container CLI argument tests ---- #[test] fn engine_executable_docker() { assert_eq!(engine_executable(ContainerEngine::Docker), "docker"); } #[test] fn engine_executable_podman() { assert_eq!(engine_executable(ContainerEngine::Podman), "podman"); } #[test] fn build_run_args_minimal() { let cfg = RunConfig::new("myimage:latest"); let args = build_run_args( ContainerEngine::Docker, &engine_specific_run_flags(ContainerEngine::Docker), &cfg, ); assert_eq!(args[0], "run"); assert!(args.contains(&"--rm".to_string())); assert!(args.contains(&"myimage:latest".to_string())); assert!(!args.contains(&"-it".to_string())); } #[test] fn build_run_args_podman_userns() { let cfg = RunConfig::new("myimage:latest"); let args = build_run_args( ContainerEngine::Podman, &engine_specific_run_flags(ContainerEngine::Podman), &cfg, ); assert!(args.contains(&"--userns=keep-id".to_string())); } #[test] fn build_run_args_interactive() { let mut cfg = RunConfig::new("img"); cfg.interactive = true; let args = build_run_args( ContainerEngine::Docker, &engine_specific_run_flags(ContainerEngine::Docker), &cfg, ); assert!(args.contains(&"-it".to_string())); } #[test] fn build_run_args_selinux_suffix() { let mut cfg = RunConfig::new("img"); cfg.bind_mounts = vec![("/host".to_string(), "/container".to_string())]; cfg.selinux_suffix = ":z".to_string(); let args = build_run_args( ContainerEngine::Docker, &engine_specific_run_flags(ContainerEngine::Docker), &cfg, ); assert!(args.contains(&"-v".to_string())); assert!(args.contains(&"/host:/container:z".to_string())); } #[test] fn build_run_args_workdir() { let mut cfg = RunConfig::new("img"); cfg.work_dir = Some("/work".to_string()); let args = build_run_args( ContainerEngine::Docker, &engine_specific_run_flags(ContainerEngine::Docker), &cfg, ); assert!(args.contains(&"-w".to_string())); assert!(args.contains(&"/work".to_string())); } #[test] fn build_run_args_read_only() { let mut cfg = RunConfig::new("img"); cfg.read_only = true; let args = build_run_args( ContainerEngine::Docker, &engine_specific_run_flags(ContainerEngine::Docker), &cfg, ); assert!(args.contains(&"--read-only".to_string())); } #[test] fn build_run_args_command_at_end() { let mut cfg = RunConfig::new("img"); cfg.command = Some(vec![ "morloc".to_string(), "make".to_string(), "-o".to_string(), "svc".to_string(), "svc.loc".to_string(), ]); let args = build_run_args( ContainerEngine::Docker, &engine_specific_run_flags(ContainerEngine::Docker), &cfg, ); let img_idx = args.iter().position(|a| a == "img").unwrap(); let cmd_idx = args.iter().position(|a| a == "morloc").unwrap(); assert!(img_idx < cmd_idx); } #[test] fn build_build_args_includes_tag_and_dockerfile() { let cfg = BuildConfig { dockerfile: "/tmp/Dockerfile".to_string(), context: "/tmp/ctx".to_string(), tag: "test:v1".to_string(), build_args: vec![("BASE".to_string(), "ubuntu:22.04".to_string())], }; let args = build_build_args(&cfg); assert_eq!(args[0], "build"); assert!(args.contains(&"-f".to_string())); assert!(args.contains(&"-t".to_string())); assert!(args.contains(&"--build-arg".to_string())); assert_eq!(args.last().unwrap(), "/tmp/ctx"); } // ---- SELinux tests ---- #[test] fn root_is_unsafe() { assert!(!selinux::is_safe_to_relabel("/")); } #[test] fn tmp_is_unsafe() { assert!(!selinux::is_safe_to_relabel("/tmp")); } #[test] fn tmp_subdir_is_unsafe() { assert!(!selinux::is_safe_to_relabel("/tmp/foo")); } #[test] fn home_subdir_is_safe() { assert!(selinux::is_safe_to_relabel("/home/user/project")); } #[test] fn var_tmp_is_unsafe() { assert!(!selinux::is_safe_to_relabel("/var/tmp")); } } ================================================ FILE: data/rust/morloc-manager/src/selinux.rs ================================================ use std::path::Path; use std::process::Command; use crate::error::{ManagerError, Result}; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SELinuxMode { Enforcing, Permissive, Disabled, } pub fn detect_selinux() -> SELinuxMode { if !Path::new("/usr/sbin/getenforce").exists() { return SELinuxMode::Disabled; } let Ok(output) = Command::new("getenforce").output() else { return SELinuxMode::Disabled; }; if !output.status.success() { return SELinuxMode::Disabled; } let stdout = String::from_utf8_lossy(&output.stdout); let first_line = stdout.lines().next().unwrap_or(""); match first_line { "Enforcing" => SELinuxMode::Enforcing, "Permissive" => SELinuxMode::Permissive, _ => SELinuxMode::Disabled, } } pub fn volume_suffix(mode: SELinuxMode) -> &'static str { match mode { SELinuxMode::Enforcing => ":z", SELinuxMode::Permissive | SELinuxMode::Disabled => "", } } pub fn is_safe_to_relabel(path: &str) -> bool { let home = dirs::home_dir().unwrap_or_default(); let norm = normalize(path); let home_norm = normalize_trailing(&home.to_string_lossy()); let is_home_root = normalize_trailing(&norm) == home_norm; !is_unsafe_system_path(&norm) && !is_home_root } pub fn validate_mount_path(path: &str) -> Result<()> { if is_safe_to_relabel(path) { Ok(()) } else { Err(ManagerError::SELinuxError(format!( "Cannot bind-mount {path} with SELinux relabeling. \ This path is unsafe to relabel. \ Use a subdirectory instead (e.g., {path}/project/)." ))) } } fn is_unsafe_system_path(p: &str) -> bool { let norm = normalize_trailing(p); norm == "/" || norm.starts_with("/tmp/") || norm == "/tmp/" || norm.starts_with("/var/tmp/") || norm == "/var/tmp/" } fn normalize(p: &str) -> String { // Simple normalization: resolve . and remove trailing / let path = Path::new(p); path.to_string_lossy().to_string() } fn normalize_trailing(p: &str) -> String { let mut s = normalize(p); if !s.ends_with('/') { s.push('/'); } s } ================================================ FILE: data/rust/morloc-manager/src/serve.rs ================================================ use std::fs; use std::path::Path; use std::process::{Command, Stdio}; use std::thread; use std::time::Duration; use crate::container::{ container_build, container_pull, container_run, container_run_quiet, container_stop, container_remove, engine_executable, exit_code_to_int, image_exists_locally, BuildConfig, RunConfig, }; use crate::error::{ManagerError, Result}; use crate::types::*; pub fn build_serve_image( engine: ContainerEngine, verbose: bool, state_tarball: &str, tag: &str, ver: Version, base_override: Option<&str>, rebuild: bool, programs: &[ProgramEntry], ) -> Result<()> { if !Path::new(state_tarball).exists() { return Err(ManagerError::UnfreezeError(format!( "Tarball not found: {state_tarball}" ))); } if !rebuild && image_exists_locally(engine, tag) { eprintln!("Image '{tag}' already exists locally; skipping build (use --rebuild to force)"); return Ok(()); } let tarball_dir = Path::new(state_tarball) .parent() .unwrap_or(Path::new(".")); let manifest_path = tarball_dir.join("freeze-manifest.json"); let m_manifest = if manifest_path.exists() { crate::freeze::read_freeze_manifest(&manifest_path.to_string_lossy()).ok() } else { None }; let base_image = match base_override { Some(b) => b.to_string(), None => resolve_base_from_manifest(engine, m_manifest.as_ref(), ver), }; eprintln!("Using base image: {base_image}"); if !image_exists_locally(engine, &base_image) { let exe = engine_executable(engine); if verbose { eprintln!("[morloc-manager] {exe} pull {base_image}"); } let (pull_status, _, pull_err) = container_pull(engine, &base_image); if !pull_status.success() { return Err(ManagerError::EngineError { engine, code: exit_code_to_int(pull_status), stderr: pull_err, }); } } let context_dir = tarball_dir.join("serve-build"); fs::create_dir_all(&context_dir) .map_err(|e| ManagerError::UnfreezeError(format!("mkdir failed: {e}")))?; eprintln!("Extracting frozen state..."); let tar_status = Command::new("tar") .args(["-xzf", state_tarball, "-C", &context_dir.to_string_lossy()]) .stdin(Stdio::null()) .stdout(Stdio::null()) .stderr(Stdio::inherit()) .status() .map_err(|e| ManagerError::UnfreezeError(format!("tar extract failed: {e}")))?; if !tar_status.success() { return Err(ManagerError::UnfreezeError( "tar extract failed (see error output above)".to_string() )); } // Rewrite build.path in each manifest so the nexus chdirs to the // container-internal path instead of the original host path. rewrite_manifest_paths(&context_dir)?; let dockerfile_path = context_dir.join("Dockerfile"); let has_exe = context_dir.join("exe").is_dir() && fs::read_dir(context_dir.join("exe")) .map(|mut d| d.next().is_some()) .unwrap_or(false); let has_opt = context_dir.join("opt").is_dir() && fs::read_dir(context_dir.join("opt")) .map(|mut d| d.next().is_some()) .unwrap_or(false); let has_src = context_dir.join("src").is_dir() && fs::read_dir(context_dir.join("src")) .map(|mut d| d.next().is_some()) .unwrap_or(false); let mh = CONTAINER_MORLOC_HOME; let exe_line = if has_exe { format!("COPY exe/ {mh}/exe/\n") } else { String::new() }; let opt_line = if has_opt { format!("COPY opt/ {mh}/opt/\n") } else { String::new() }; let src_line = if has_src { format!("COPY src/ {mh}/src/\n") } else { String::new() }; // Podman's OCI format drops HEALTHCHECK; omit it to avoid warnings. let healthcheck = if engine == ContainerEngine::Docker { "# Health check for container orchestrators\n\ HEALTHCHECK --interval=30s --timeout=5s --retries=3 \\\n\ CMD curl -sf http://localhost:8080/health || exit 1\n\ \n" .to_string() } else { String::new() }; let dockerfile_content = format!( "# Auto-generated by morloc-manager serve-image\n\ FROM {base_image}\n\ \n\ # Ensure morloc binaries are on PATH\n\ ENV PATH=\"{mh}/bin:${{PATH}}\"\n\ \n\ # Morloc home for pool path resolution\n\ ENV MORLOC_HOME=\"{mh}\"\n\ \n\ # Copy frozen morloc state (modules, manifests, binaries, pools)\n\ COPY lib/ {mh}/lib/\n\ COPY fdb/ {mh}/fdb/\n\ COPY bin/ {mh}/bin/\n\ {exe_line}\ {opt_line}\ {src_line}\ RUN chmod -R a+rX {mh}\n\ \n\ {healthcheck}\ # Entrypoint: nexus router aggregates all installed programs\n\ ENTRYPOINT [\"morloc-nexus\", \"--router\", \\\n\ \"--fdb\", \"{mh}/fdb\", \\\n\ \"--http-port\", \"8080\"]\n" ); fs::write(&dockerfile_path, &dockerfile_content) .map_err(|e| ManagerError::UnfreezeError(format!("Write Dockerfile failed: {e}")))?; eprintln!("Building serve image {tag} (base: {base_image})..."); let build_cfg = BuildConfig { dockerfile: dockerfile_path.to_string_lossy().to_string(), context: context_dir.to_string_lossy().to_string(), tag: tag.to_string(), build_args: Vec::new(), }; if verbose { let exe = engine_executable(engine); eprintln!( "[morloc-manager] {exe} build -f {} -t {tag} {}", build_cfg.dockerfile, build_cfg.context ); } let (status, _, build_err) = container_build(engine, &build_cfg); if !status.success() { return Err(ManagerError::EngineError { engine, code: exit_code_to_int(status), stderr: build_err, }); } eprintln!("Built serve image: {tag}"); // Validate programs work inside the built image validate_programs(engine, tag, programs, Vec::new(), verbose)?; // Clean up the temporary build context if let Err(e) = fs::remove_dir_all(&context_dir) { eprintln!("Warning: failed to clean up {}: {e}", context_dir.display()); } Ok(()) } #[allow(dead_code)] pub fn run_serve_container( engine: ContainerEngine, verbose: bool, image: &str, name: &str, ports: &[(u16, u16)], ) -> Result<()> { // Clean up any existing dead container with this name (silently) let _ = crate::container::container_remove_quiet(engine, name); let port_str: Vec = ports .iter() .map(|(h, c)| format!("{h}:{c}")) .collect(); eprintln!( "Starting serve container {name} on ports {}...", port_str.join(", ") ); let mut cfg = RunConfig::new(image); cfg.read_only = true; cfg.remove_after = false; cfg.name = Some(name.to_string()); cfg.ports = ports.to_vec(); cfg.extra_flags = vec!["-d".to_string()]; if verbose { let exe = engine_executable(engine); let extra = crate::container::engine_specific_run_flags_io(engine); let args = crate::container::build_run_args(engine, &extra, &cfg); let quoted: Vec = args.iter().map(|a| { if a.contains(' ') { format!("'{a}'") } else { a.clone() } }).collect(); eprintln!("[morloc-manager] {exe} {}", quoted.join(" ")); } let (status, _stdout, run_err) = container_run(engine, &cfg); if !status.success() { let _ = crate::container::container_remove_quiet(engine, name); return Err(ManagerError::EngineError { engine, code: exit_code_to_int(status), stderr: run_err, }); } // Verify container reached running state thread::sleep(Duration::from_secs(1)); let exe = engine_executable(engine); let insp_output = Command::new(exe) .args(["inspect", "--format", "{{.State.Status}}", name]) .output(); match insp_output { Ok(o) if o.status.success() => { let state = String::from_utf8_lossy(&o.stdout).trim().to_string(); if state == "running" { eprintln!("Container {name} started"); eprintln!(" Logs: morloc-manager logs"); eprintln!(" Stop: morloc-manager stop {name}"); eprintln!(" Status: morloc-manager status"); Ok(()) } else { let log_output = Command::new(exe).args(["logs", name]).output(); let logs = log_output .map(|o| { let stdout = String::from_utf8_lossy(&o.stdout); let stderr = String::from_utf8_lossy(&o.stderr); format!("{stdout}{stderr}") }) .unwrap_or_default(); // Clean up the dead container to prevent name conflicts on retry let _ = container_remove(engine, name); Err(ManagerError::EngineError { engine, code: 1, stderr: format!("Container failed to start (state: {state}):\n{logs}"), }) } } _ => Err(ManagerError::EngineError { engine, code: 1, stderr: "Failed to inspect container state".to_string(), }), } } /// Serve an environment by bind-mounting its data directory into the container. pub fn serve_environment( engine: ContainerEngine, verbose: bool, image: &str, data_dir: &str, container_name: &str, ports: &[(u16, u16)], extra_flags: &[String], shm_size: &Option, user_env: &[(String, String)], ) -> Result<()> { // Clean up any existing dead container with this name (silently) let _ = crate::container::container_remove_quiet(engine, container_name); let port_str: Vec = ports .iter() .map(|(h, c)| format!("{h}:{c}")) .collect(); eprintln!( "Starting serve container {container_name} on ports {}...", port_str.join(", ") ); let mut cfg = RunConfig::new(image); cfg.read_only = true; cfg.remove_after = false; cfg.name = Some(container_name.to_string()); cfg.ports = ports.to_vec(); let mh = CONTAINER_MORLOC_HOME; cfg.bind_mounts = vec![(data_dir.to_string(), mh.to_string())]; cfg.env = vec![ ("PATH".to_string(), format!("{mh}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")), ("MORLOC_HOME".to_string(), mh.to_string()), ]; cfg.env.extend(user_env.iter().cloned()); cfg.command = Some(vec![ "morloc-nexus".to_string(), "--router".to_string(), "--fdb".to_string(), format!("{mh}/fdb"), "--http-port".to_string(), "8080".to_string(), ]); cfg.shm_size = shm_size.clone(); cfg.extra_flags = vec!["-d".to_string()]; cfg.extra_flags.extend(extra_flags.iter().cloned()); if verbose { let exe = engine_executable(engine); let extra = crate::container::engine_specific_run_flags_io(engine); let args = crate::container::build_run_args(engine, &extra, &cfg); let quoted: Vec = args.iter().map(|a| { if a.contains(' ') { format!("'{a}'") } else { a.clone() } }).collect(); eprintln!("[morloc-manager] {exe} {}", quoted.join(" ")); } let (status, _stdout, run_err) = container_run(engine, &cfg); if !status.success() { // `container_run` may have left a partially-created container behind // (e.g., port conflict after container creation). Clean it up so the // next `start` doesn't fail on a name collision. let _ = crate::container::container_remove_quiet(engine, container_name); // Detect port conflict and provide a friendlier error message let lower = run_err.to_lowercase(); if lower.contains("address already in use") || lower.contains("port is already allocated") || lower.contains("pasta failed") { // Try to extract the port number from the error let port_hint = ports.first() .map(|(h, _)| format!(" Port {h} is already in use.")) .unwrap_or_default(); return Err(ManagerError::EnvError(format!( "{port_hint}\n \ Another container or process is using this port.\n \ Use '-p :8080' to choose a different host port, or\n \ check running containers with 'morloc-manager status'." ))); } return Err(ManagerError::EngineError { engine, code: exit_code_to_int(status), stderr: run_err, }); } // Verify container reached running state thread::sleep(Duration::from_secs(1)); let exe = engine_executable(engine); let insp_output = Command::new(exe) .args(["inspect", "--format", "{{.State.Status}}", container_name]) .output(); match insp_output { Ok(o) if o.status.success() => { let state = String::from_utf8_lossy(&o.stdout).trim().to_string(); if state == "running" { eprintln!("Container {container_name} started"); eprintln!(" Logs: morloc-manager logs"); eprintln!(" Stop: morloc-manager stop"); eprintln!(" Status: morloc-manager status"); Ok(()) } else { let log_output = Command::new(exe).args(["logs", container_name]).output(); let logs = log_output .map(|o| { let stdout = String::from_utf8_lossy(&o.stdout); let stderr = String::from_utf8_lossy(&o.stderr); format!("{stdout}{stderr}") }) .unwrap_or_default(); let _ = container_remove(engine, container_name); Err(ManagerError::EngineError { engine, code: 1, stderr: format!("Container failed to start (state: {state}):\n{logs}"), }) } } _ => Err(ManagerError::EngineError { engine, code: 1, stderr: "Failed to inspect container state".to_string(), }), } } pub fn stop_serve_container(engine: ContainerEngine, verbose: bool, name: &str) -> Result<()> { if !crate::container::container_exists(engine, name) { return Err(ManagerError::EnvError(format!( "No serve container running for '{name}'" ))); } if verbose { let exe = engine_executable(engine); eprintln!("[morloc-manager] {exe} stop {name}"); } let (status, err) = container_stop(engine, name); let _ = crate::container::container_remove_quiet(engine, name); if !status.success() { return Err(ManagerError::EngineError { engine, code: exit_code_to_int(status), stderr: err, }); } Ok(()) } /// Build the serve container name for an environment. /// Format: morloc-serve-- pub fn serve_container_name(env_name: &str) -> String { let user = std::env::var("USER") .or_else(|_| std::env::var("LOGNAME")) .unwrap_or_else(|_| "unknown".to_string()); format!("morloc-serve-{user}-{env_name}") } /// The prefix used to filter all serve containers for the current user. pub fn serve_container_prefix() -> String { let user = std::env::var("USER") .or_else(|_| std::env::var("LOGNAME")) .unwrap_or_else(|_| "unknown".to_string()); format!("morloc-serve-{user}-") } /// Extract the environment name from a serve container name. pub fn env_name_from_container(container_name: &str) -> &str { let prefix = serve_container_prefix(); container_name.strip_prefix(&prefix).unwrap_or(container_name) } #[derive(serde::Serialize)] pub struct ServeContainerInfo { pub name: String, pub env: String, pub ports: String, pub status: String, } /// Query running serve containers and return structured info. pub fn query_serve_containers(engine: ContainerEngine, verbose: bool) -> Result> { let exe = engine_executable(engine); let fmt = "{{.Names}}\t{{.Status}}\t{{.Ports}}"; let prefix = serve_container_prefix(); let filter = format!("name={prefix}"); if verbose { eprintln!("[morloc-manager] {exe} ps -a --filter {filter} --format '{fmt}'"); } let output = Command::new(exe) .args([ "ps", "-a", "--filter", &filter, "--format", fmt, ]) // Use /tmp as cwd to avoid podman "cannot chdir" failures when the // current directory is inaccessible (e.g. another user's home). .current_dir("/tmp") .output() .map_err(|e| ManagerError::EngineError { engine, code: 1, stderr: format!("Failed to list containers: {e}"), })?; if !output.status.success() { return Err(ManagerError::EngineError { engine, code: exit_code_to_int(output.status), stderr: String::from_utf8_lossy(&output.stderr).to_string(), }); } let text = String::from_utf8_lossy(&output.stdout).trim().to_string(); let mut result = Vec::new(); for line in text.lines() { let parts: Vec<&str> = line.split('\t').collect(); if parts.len() >= 3 { let name = parts[0]; let status = parts[1]; let ports = parts[2]; let env = env_name_from_container(name); result.push(ServeContainerInfo { name: name.to_string(), env: env.to_string(), ports: if ports.is_empty() { "-".to_string() } else { ports.to_string() }, status: status.to_string(), }); } } Ok(result) } /// Find running serve container names for the current user. pub fn find_running_serve_containers(engine: ContainerEngine) -> Vec { let exe = engine_executable(engine); let filter = format!("name={}", serve_container_prefix()); let output = Command::new(exe) .args(["ps", "--filter", &filter, "--format", "{{.Names}}"]) .current_dir("/tmp") .output(); match output { Ok(o) if o.status.success() => { String::from_utf8_lossy(&o.stdout) .lines() .filter(|l| !l.is_empty()) .map(|l| l.to_string()) .collect() } _ => Vec::new(), } } // ====================================================================== // Program validation // ====================================================================== /// Run `--help` for each installed program inside a container image to /// verify that pool processes start correctly (e.g. all imports resolve). /// /// `bind_mounts` should be non-empty for pre-freeze validation (where the /// data dir is on the host) and empty for post-unfreeze validation (where /// everything is baked into the image). pub fn validate_programs( engine: ContainerEngine, image: &str, programs: &[ProgramEntry], bind_mounts: Vec<(String, String)>, verbose: bool, ) -> Result<()> { if programs.is_empty() { return Ok(()); } eprintln!("Validating installed programs..."); let mut any_failed = false; for prog in programs { let exe_path = format!("{}/bin/{}", CONTAINER_MORLOC_HOME, prog.name); if verbose { let exe = engine_executable(engine); eprintln!("[morloc-manager] {exe} run --rm --entrypoint '' {image} {exe_path} --help"); } let cfg = RunConfig { bind_mounts: bind_mounts.clone(), command: Some(vec![exe_path, "--help".to_string()]), env: vec![ ("MORLOC_HOME".to_string(), CONTAINER_MORLOC_HOME.to_string()), ], // Override the image ENTRYPOINT so the command runs directly // instead of being appended to the router entrypoint. extra_flags: vec!["--entrypoint".to_string(), "".to_string()], ..RunConfig::new(image) }; let (status, _stdout, stderr) = container_run_quiet(engine, &cfg); if status.success() { let n = prog.commands.len(); eprintln!(" [ok] {} ({} commands)", prog.name, n); } else { let snippet: String = stderr.lines().take(5).collect::>().join("\n "); eprintln!(" [FAIL] {}: {}", prog.name, snippet); any_failed = true; } } if any_failed { return Err(ManagerError::FreezeError( "Some programs failed validation (see errors above)".to_string(), )); } Ok(()) } // ====================================================================== // Manifest path rewriting for frozen images // ====================================================================== const CONTAINER_MORLOC_HOME: &str = "/opt/morloc"; const MANIFEST_MARKER: &str = "### MANIFEST ###"; /// Rewrite `build.path` in every `.manifest` file under `fdb/` so the /// nexus inside the container chdirs to the correct location instead of /// the original host path. fn rewrite_manifest_paths(context_dir: &Path) -> Result<()> { let fdb_dir = context_dir.join("fdb"); if !fdb_dir.is_dir() { return Ok(()); } let entries = fs::read_dir(&fdb_dir) .map_err(|e| ManagerError::UnfreezeError(format!("read fdb/: {e}")))?; for entry in entries { let entry = entry .map_err(|e| ManagerError::UnfreezeError(format!("read fdb/ entry: {e}")))?; let path = entry.path(); let name = entry.file_name(); let name_str = name.to_string_lossy(); if !name_str.ends_with(".manifest") { continue; } let prog_name = &name_str[..name_str.len() - ".manifest".len()]; let container_build_path = format!("{}/exe/{}", CONTAINER_MORLOC_HOME, prog_name); rewrite_one_manifest(&path, &container_build_path)?; } Ok(()) } /// Rewrite the `build.path` field in a single manifest wrapper script. fn rewrite_one_manifest(path: &Path, new_build_path: &str) -> Result<()> { let content = fs::read_to_string(path) .map_err(|e| ManagerError::UnfreezeError(format!("read {}: {e}", path.display())))?; let (prefix, json_str) = if content.starts_with("#!") { if let Some(marker_pos) = content.find(MANIFEST_MARKER) { let after_marker = &content[marker_pos..]; let json_start = after_marker .find('\n') .map(|i| marker_pos + i + 1) .unwrap_or(content.len()); (&content[..json_start], &content[json_start..]) } else { return Ok(()); // no marker, skip } } else { ("", content.as_str()) }; let mut manifest: serde_json::Value = serde_json::from_str(json_str) .map_err(|e| ManagerError::UnfreezeError(format!("parse {}: {e}", path.display())))?; if let Some(build) = manifest.get_mut("build") { if let Some(p) = build.get_mut("path") { *p = serde_json::Value::String(new_build_path.to_string()); } } let new_json = serde_json::to_string(&manifest) .map_err(|e| ManagerError::UnfreezeError(format!("serialize {}: {e}", path.display())))?; let new_content = format!("{}{}\n", prefix, new_json); fs::write(path, new_content) .map_err(|e| ManagerError::UnfreezeError(format!("write {}: {e}", path.display())))?; Ok(()) } // ====================================================================== // Manifest and image resolution // ====================================================================== fn resolve_base_from_manifest( engine: ContainerEngine, m_manifest: Option<&FreezeManifest>, ver: Version, ) -> String { let ghcr_fallback = format!( "ghcr.io/morloc-project/morloc/morloc-full:{}", ver.show() ); let Some(fm) = m_manifest else { return ghcr_fallback; }; // Resolve the effective base image: use manifest's base_image if it exists // locally, otherwise fall back to the GHCR image. The manifest may record a // locally-retagged image (e.g. localhost/morloc:0.69.0) that won't exist on // other machines. let effective_base = if image_exists_locally(engine, &fm.base_image) { fm.base_image.clone() } else { eprintln!( "Base image '{}' not found locally, trying GHCR fallback...", fm.base_image ); ghcr_fallback }; match &fm.env_layer { None => effective_base, Some(fel) => { // Fast path: env image tag exists locally if let Some(ref tag) = fel.image_tag { let exe = engine_executable(engine); let check = Command::new(exe) .args(["image", "inspect", tag]) .stdout(Stdio::null()) .stderr(Stdio::null()) .status(); if check.map(|s| s.success()).unwrap_or(false) { return tag.clone(); } } // Rebuild env layer from stored Dockerfile using effective base rebuild_env_image(engine, &effective_base, fm, fel) } } } fn rebuild_env_image( engine: ContainerEngine, effective_base: &str, fm: &FreezeManifest, fel: &FrozenEnvLayer, ) -> String { let env_tag = format!( "localhost/morloc-env:{}-{}", fm.morloc_version.show(), fel.name ); let exe = engine_executable(engine); // Check if tagged image exists locally let check = Command::new(exe) .args(["image", "inspect", &env_tag]) .stdout(Stdio::null()) .stderr(Stdio::null()) .status(); if check.map(|s| s.success()).unwrap_or(false) { return env_tag; } eprintln!("Building deployment image (environment layer: {})", fel.name); let build_dir = "/tmp/morloc-env-rebuild"; let _ = fs::create_dir_all(build_dir); let df_path = format!("{build_dir}/Dockerfile"); let _ = fs::write(&df_path, &fel.dockerfile); let build_cfg = BuildConfig { dockerfile: df_path, context: build_dir.to_string(), tag: env_tag.clone(), build_args: vec![("CONTAINER_BASE".to_string(), effective_base.to_string())], }; let (status, _, build_err) = container_build(engine, &build_cfg); if status.success() { env_tag } else { eprintln!( "Warning: env rebuild failed, falling back to base image: {build_err}" ); effective_base.to_string() } } ================================================ FILE: data/rust/morloc-manager/src/types.rs ================================================ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::cmp::Ordering; use std::str::FromStr; // ====================================================================== // Core enumerations // ====================================================================== #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum Scope { Local, System, } impl Serialize for Scope { fn serialize(&self, serializer: S) -> Result { match self { Scope::Local => serializer.serialize_str("local"), Scope::System => serializer.serialize_str("system"), } } } impl<'de> Deserialize<'de> for Scope { fn deserialize>(deserializer: D) -> Result { let s = String::deserialize(deserializer)?; match s.as_str() { "local" => Ok(Scope::Local), "system" => Ok(Scope::System), _ => Err(serde::de::Error::custom(format!("Unknown scope: {s}"))), } } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ContainerEngine { Docker, Podman, } impl Serialize for ContainerEngine { fn serialize(&self, serializer: S) -> Result { match self { ContainerEngine::Docker => serializer.serialize_str("docker"), ContainerEngine::Podman => serializer.serialize_str("podman"), } } } impl<'de> Deserialize<'de> for ContainerEngine { fn deserialize>(deserializer: D) -> Result { let s = String::deserialize(deserializer)?; match s.as_str() { "docker" => Ok(ContainerEngine::Docker), "podman" => Ok(ContainerEngine::Podman), _ => Err(serde::de::Error::custom(format!( "Unknown container engine: {s}" ))), } } } // ====================================================================== // Version // ====================================================================== #[derive(Debug, Clone, PartialEq, Eq)] pub struct Version { pub major: u32, pub minor: u32, pub patch: u32, pub prerelease: Option, } impl Version { #[cfg(test)] pub fn new(major: u32, minor: u32, patch: u32) -> Self { Self { major, minor, patch, prerelease: None, } } pub fn show(&self) -> String { match &self.prerelease { Some(pre) => format!("{}.{}.{}-{}", self.major, self.minor, self.patch, pre), None => format!("{}.{}.{}", self.major, self.minor, self.patch), } } } impl Ord for Version { fn cmp(&self, other: &Self) -> Ordering { self.major .cmp(&other.major) .then(self.minor.cmp(&other.minor)) .then(self.patch.cmp(&other.patch)) .then(match (&self.prerelease, &other.prerelease) { (None, None) => Ordering::Equal, (Some(_), None) => Ordering::Less, // pre-release < release (None, Some(_)) => Ordering::Greater, // release > pre-release (Some(a), Some(b)) => a.cmp(b), }) } } impl PartialOrd for Version { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl FromStr for Version { type Err = String; fn from_str(s: &str) -> Result { // Split off pre-release suffix on first '-': "0.77.0-rc.1" -> ("0.77.0", Some("rc.1")) let (version_part, prerelease) = match s.find('-') { Some(idx) => (&s[..idx], Some(s[idx + 1..].to_string())), None => (s, None), }; let parts: Vec<&str> = version_part.split('.').collect(); if parts.len() != 3 { return Err(format!("Invalid version: {s}. Expected format: MAJOR.MINOR.PATCH[-PRERELEASE]")); } let major = parts[0] .parse() .map_err(|_| format!("Invalid major version: {}", parts[0]))?; let minor = parts[1] .parse() .map_err(|_| format!("Invalid minor version: {}", parts[1]))?; let patch = parts[2] .parse() .map_err(|_| format!("Invalid patch version: {}", parts[2]))?; Ok(Version { major, minor, patch, prerelease, }) } } impl Serialize for Version { fn serialize(&self, serializer: S) -> Result { serializer.serialize_str(&self.show()) } } impl<'de> Deserialize<'de> for Version { fn deserialize>(deserializer: D) -> Result { let s = String::deserialize(deserializer)?; s.parse().map_err(serde::de::Error::custom) } } // ====================================================================== // Configuration // ====================================================================== #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Config { /// Name of the active environment. pub active_env: Option, /// Default container engine. #[serde(default = "default_engine")] pub engine: ContainerEngine, } fn default_engine() -> ContainerEngine { ContainerEngine::Podman } impl Default for Config { fn default() -> Self { Self { active_env: None, engine: ContainerEngine::Podman, } } } // ====================================================================== // Environment configuration // ====================================================================== #[derive(Debug, Clone, Serialize, Deserialize)] pub struct EnvironmentConfig { /// Human-readable name (also the directory name). pub name: String, /// Base container image reference. pub base_image: String, /// Original pullable image reference (e.g., :edge tag) before local re-tagging. #[serde(default)] pub original_image: Option, /// Filename of the custom Dockerfile layer (within the env config dir). #[serde(default)] pub dockerfile: Option, /// SHA256 hash of the Dockerfile content (for rebuild detection). #[serde(default)] pub content_hash: Option, /// Built image tag after applying the Dockerfile layer. /// None when only the base image is used. #[serde(default)] pub built_image: Option, /// Container engine for this environment. pub engine: ContainerEngine, /// Shared memory size for container runs. #[serde(default = "default_shm_size")] pub shm_size: String, /// Morloc version this environment was created from. #[serde(default)] pub morloc_version: Option, } fn default_shm_size() -> String { "512m".to_string() } impl EnvironmentConfig { /// Returns the image to use for running containers. /// Prefers the built Dockerfile layer image, falls back to base_image. pub fn active_image(&self) -> &str { self.built_image.as_deref().unwrap_or(&self.base_image) } } // ====================================================================== // Freeze manifest // ====================================================================== #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FreezeManifest { pub morloc_version: Version, pub frozen_at: chrono::DateTime, pub modules: Vec, pub programs: Vec, pub base_image: String, pub env_layer: Option, /// Deprecated: previously held expected env var names. Retained for backward /// compatibility when reading older freeze manifests. #[serde(default, skip_serializing)] #[allow(dead_code)] pub env_vars: Vec, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FrozenEnvLayer { pub name: String, pub dockerfile: String, pub content_hash: String, /// Container image tag (e.g. localhost/morloc-env:0.79.2-dnd). /// Named image_tag because it stores a mutable tag, not a content-addressed digest. #[serde(alias = "image_digest")] pub image_tag: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ModuleEntry { pub name: String, pub version: Option, pub sha256: String, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ProgramEntry { pub name: String, pub commands: Vec, } ================================================ FILE: data/rust/morloc-manifest/Cargo.toml ================================================ [package] name = "morloc-manifest" version = "0.81.0" edition = "2021" description = "Morloc manifest schema (v2): shared Rust types for the .manifest JSON blob produced by the morloc compiler and consumed by the nexus, runtime, and any future tooling." # Version is intentionally synchronized with the morloc compiler version # (see ../../../package.yaml). Manifests record the morloc_version that # built them; consumers reject manifests whose version differs from # CARGO_PKG_VERSION at parse time. Bumping the morloc compiler version # requires bumping this version in lockstep. [dependencies] serde = { workspace = true } serde_json = { workspace = true } ================================================ FILE: data/rust/morloc-manifest/src/lib.rs ================================================ //! Morloc manifest schema (v2) -- canonical Rust types. //! //! The morloc compiler emits a `.manifest` JSON blob describing every //! exported command's interface. This crate is the **single source of //! truth** for that schema's Rust representation. Both the CLI nexus //! (`morloc-nexus`) and the C-FFI runtime (`morloc-runtime`) depend on //! these types so neither has to maintain its own deserialization //! logic. //! //! ## Versioning //! //! The manifest does not carry a dedicated schema version. Manifests //! are transient build artifacts (always regenerated on `morloc make`, //! never stored in version control), so the morloc compiler version //! recorded in the [`Build`] sub-object serves as the staleness //! indicator. The check happens in [`parse_manifest`]. //! //! Version coupling: this crate's `CARGO_PKG_VERSION` is intentionally //! kept in lockstep with the morloc Haskell compiler version (see //! `package.yaml`). The same is true of `morloc-nexus` and //! `morloc-runtime` -- bumping the morloc compiler requires bumping //! all three Rust crates in the same commit. //! //! ## Extension slots //! //! Every entity object (manifest, pool, command, arg, return, group, //! service) carries: //! //! - `constraints: Vec` -- enforceable rules. Currently //! the compiler emits only the `kind` constraint on named-type args //! (record/object/table). Future constraints (`min`, `max`, `regex`, //! `length`, `non_empty`, `row_count`, ...) will append to this //! list without any schema change. //! //! - `metadata: BTreeMap` -- free-form //! informational key-value pairs. Always emitted as `{}` today; //! reserved so consumers never have to check whether the field //! exists. Future doc hints, studio annotations, telemetry tags, //! etc. live here until they stabilize into first-class fields. //! //! Many of these slots are `#[allow(dead_code)]` because no current //! consumer reads them. They are deliberate forward-compatible //! placeholders, not vestigial fields. //! //! ## Unknown-field tolerance //! //! All structs use `#[serde(default)]` on optional fields and silently //! ignore unknown JSON keys. A manifest written by a newer morloc //! compiler will still parse with an older nexus (modulo the version //! mismatch error in [`parse_manifest`]). use serde::Deserialize; use std::collections::BTreeMap; /// Convenient alias for the `metadata` extension slot. Using /// `BTreeMap` (rather than `HashMap` or raw `serde_json::Value`) gives /// us (a) compile-time enforcement that metadata is always a JSON /// object, and (b) deterministic iteration order for stable diffs. pub type Metadata = BTreeMap; // -- Top-level manifest ------------------------------------------------------- /// The top-level manifest object. Embedded in every built nexus binary /// as a JSON blob after the `### MANIFEST ###` marker. #[derive(Debug, Deserialize)] #[allow(dead_code)] pub struct Manifest { /// Program identifier -- comes from the morloc `module` declaration. pub name: String, /// Compiler-sourced build metadata (path, timestamp, version). /// Distinct from the user-sourced top-level `metadata` slot. pub build: Build, /// Language pool daemons that this program dispatches to. #[serde(default)] pub pools: Vec, /// Exported commands the user can invoke. #[serde(default)] pub commands: Vec, /// Command groups for organizing CLI subcommands in help output. #[serde(default)] pub groups: Vec, /// Daemon-mode service configuration. None for normal CLI mode. #[serde(default)] pub service: Option, /// Module-level description lines (from docstrings before `module`). /// Shown after "Usage:" and before "Nexus options" in top-level help. #[serde(default)] pub desc: Vec, /// Epilogue blocks shown at the end of top-level help output. #[serde(default)] pub epilogues: Vec>, /// **Reserved.** User-sourced free-form annotations on the module. /// Always emitted as `{}` today. Distinct from `build` (which is /// compiler-sourced). #[serde(default)] pub metadata: Metadata, } /// Compiler-sourced metadata about how this manifest was produced. /// /// Future build fields (`hash`, `source_hash`, `host`, `user`, `system`, /// `dependencies`, `cflags`, `reproducible`, ...) will be added directly /// to this struct as additive non-breaking changes -- no sub-metadata /// nesting required. #[derive(Debug, Deserialize)] #[allow(dead_code)] pub struct Build { /// Absolute path to the build directory containing this program's /// pool executables and generated source files. The nexus chdirs /// here at startup so relative pool exec paths resolve. pub path: String, /// Unix timestamp at which the manifest was generated. pub time: i64, /// Version of the morloc compiler that produced this manifest. The /// nexus compares this against its own compile-time /// `CARGO_PKG_VERSION` (which is intentionally synchronized with /// the morloc compiler version) in [`parse_manifest`]; a mismatch /// produces an actionable "rebuild with the current compiler" /// error. pub morloc_version: String, } /// A single language pool daemon. Each pool is one OS process that /// hosts the language-specific implementations of source functions. #[derive(Debug, Deserialize)] #[allow(dead_code)] pub struct Pool { /// Language tag (e.g. `"py"`, `"cpp"`, `"r"`, `"julia"`). pub lang: String, /// argv used to spawn the pool process (e.g. `["python3", "pool.py"]`). pub exec: Vec, /// Unix domain socket basename (under tmpdir) for IPC. pub socket: String, /// **Reserved.** Per-pool metadata. Future slots: `resource` /// (cpu/memory limits), `env` (environment variables), /// `startup_timeout`, `health_check`. #[serde(default)] pub metadata: Metadata, } // -- Commands ----------------------------------------------------------------- /// Discriminator for the command kind. Closed enum so adding a new /// variant requires explicit code changes everywhere it's matched. #[derive(Debug, Deserialize, Clone, Copy, PartialEq, Eq)] #[serde(rename_all = "lowercase")] pub enum CmdType { /// Dispatched to a language pool process via IPC. Remote, /// Evaluated inline by the nexus from an embedded expression tree. Pure, } /// One exported morloc function the user can invoke as a CLI subcommand. #[derive(Debug, Deserialize)] #[allow(dead_code)] pub struct Command { /// CLI subcommand name (defaults to the morloc function name; can /// be overridden via a `--' name:` docstring directive). pub name: String, /// Discriminator: [`CmdType::Remote`] (dispatch to a pool) or /// [`CmdType::Pure`] (evaluate inline via the manifest's `expr` /// tree). #[serde(rename = "type")] pub cmd_type: CmdType, // -- Remote-only dispatch info ---------------------------------------- /// Manifold ID -- the integer key under which the pool's dispatch /// table contains this function's entry. Remote commands only. #[serde(default)] pub mid: u32, /// Index into [`Manifest::pools`] for the primary pool that hosts /// this command's top-level function. Remote commands only. #[serde(default, rename = "pool")] pub pool_index: usize, /// Indices of every pool transitively required to execute this /// command (the primary pool plus any pools called as foreign /// functions from inside it). Remote commands only. #[serde(default)] pub needed_pools: Vec, // -- Common fields ---------------------------------------------------- /// Description lines shown in CLI help. The first line is the /// summary used in subcommand listings. #[serde(default)] pub desc: Vec, /// Argument list, in declaration order. Each entry is a /// discriminated union -- see [`Arg`]. #[serde(default)] pub args: Vec, /// Return-value descriptor. Always present, even for nullary /// returns (use a Nil schema in that case). #[serde(default, rename = "return")] pub ret: Return, /// **Reserved.** Command-level constraints -- invariants that span /// multiple arguments (e.g. `equal_length` of two list args). /// Empty in v2; populated when the constraint system rolls out. #[serde(default)] pub constraints: Vec, /// **Reserved.** Per-command metadata. Future slots: `effects` /// (declared I/O / network / filesystem effect set), `resource` /// (CPU/memory/time limits), `auth` (required capabilities), /// `version` (semantic version of the command's API), /// `deprecated` (migration notice). #[serde(default)] pub metadata: Metadata, // -- Pure-only evaluation info ---------------------------------------- /// Embedded expression tree (NexusExpr JSON) for pure commands. /// Evaluated inline by the nexus instead of being dispatched to a /// pool. Absent on remote commands. #[serde(default)] pub expr: Option, // -- Command group ---------------------------------------------------- /// Optional name of the command group this command belongs to. Used /// to organize subcommands in help output. The Haskell emitter now /// writes a real JSON null for absent groups (see /// `Morloc.CodeGenerator.Nexus.cmdGroupField`), so no custom /// deserializer is needed. #[serde(default)] pub group: Option, } impl Command { pub fn is_pure(&self) -> bool { self.cmd_type == CmdType::Pure } } /// Return-value descriptor. Structurally similar to a typed [`Arg`] /// minus the CLI-specific fields (kind, metavar, quoted, short/long, /// default). Always present on every command. #[derive(Debug, Deserialize, Default)] #[allow(dead_code)] pub struct Return { /// Morloc serialization schema string for the return type. The /// nexus uses this to deserialize the bytes coming back from the /// pool process. #[serde(default)] pub schema: String, /// User-facing type name as written in the morloc source (e.g. /// `"Int"`, `"Config"`, `"[Int]"`). Used in help output and error /// messages. JSON key is `type`; the Rust field is `type_desc` /// because `type` is a reserved keyword. #[serde(default, rename = "type")] pub type_desc: String, /// Description lines for the return value, parsed from `--' return:` /// docstrings. #[serde(default)] pub desc: Vec, /// **Reserved.** Constraints on the return value. Currently used /// only for `kind: record|object|table` on named return types; /// future constraints (min/max/regex/...) will live here. #[serde(default)] pub constraints: Vec, /// **Reserved.** Per-return metadata; same forward-compatibility /// rationale as the per-arg slot. #[serde(default)] pub metadata: Metadata, } // -- Constraints -------------------------------------------------------------- /// A single constraint entry attached to an arg, return value, or /// command. Discriminated by `type`. /// /// **Currently emitted constraint types**: /// /// - `kind`: marks a named type as `record` / `object` / `table`. The /// `value` payload is the lowercased name. The CLI help renderer /// uses this to partition into Record Schemas / Table Schemas /// sections. /// /// **Reserved constraint types** (defined shapes, not yet emitted by /// any compiler pass -- names and payloads chosen so the schema /// doesn't need to bump when the constraint feature lands): /// /// - `min`, `max`: numeric bounds with `value: `. /// - `length`: `value: ` for a fixed length, or `{min, max}` for /// a bounded range. /// - `non_empty`: list/string must be non-empty (no payload). /// - `regex`: string must match `value: ""`. /// - `enum`: value must be one of `value: [...]`. /// - `unique`: list elements must be pairwise distinct (no payload). /// - `row_count`: fixed/bounded row count for `table`-tagged args. /// - `sorted`: `value: "asc"|"desc"`. /// /// **Extensibility rules**: /// /// 1. Unknown `type` values MUST be silently ignored by readers. /// 2. Adding a new constraint type does not bump the manifest version. /// 3. Changing the payload shape of an existing type DOES bump. /// 4. Constraints are unordered. /// 5. Multiple constraints of the same type on the same entity are /// allowed. #[derive(Debug, Deserialize, Clone)] #[allow(dead_code)] pub struct Constraint { /// Constraint discriminator. JSON key is `type`; the Rust field is /// `ctype` because `type` is a reserved keyword. #[serde(rename = "type")] pub ctype: String, /// Constraint-specific payload. Shape depends on `ctype`. Some /// constraint types (`non_empty`, `unique`) carry no payload. #[serde(default)] pub value: Option, } // -- Arguments ---------------------------------------------------------------- /// CLI argument variants. Each command's argument list is a sequence /// of these, in declaration order. /// /// Three of the four variants (`Positional`, `Optional`, `Group`) /// carry type/schema/constraints information because they represent /// typed values that flow through to a pool. `Flag` is a pure boolean /// toggle with no associated type -- and therefore no `schema`, /// `type_desc`, or `constraints` slot. /// /// `#[allow(dead_code)]` covers the `metadata` slots that are reserved /// for future use and not read by any current consumer. #[derive(Debug, Deserialize)] #[serde(tag = "kind")] #[allow(dead_code)] pub enum Arg { /// A positional CLI argument. #[serde(rename = "pos")] Positional { /// Morloc serialization schema string. Used at dispatch time /// to parse the user's CLI input into a binary data packet. #[serde(default)] schema: Option, /// User-facing type name (e.g. `"Int"`, `"Config"`). The Rust /// field is `type_desc` because `type` is a reserved keyword. #[serde(default, rename = "type")] type_desc: Option, /// Display placeholder shown in help (e.g. `"FILE"`). None /// falls back to a generic `ARG` placeholder. #[serde(default)] metavar: Option, /// If true, the user's CLI value is JSON-wrapped before being /// passed to the pool. Used for `Str`-typed positionals /// declared with `--' literal: true`. #[serde(default)] quoted: bool, /// Description lines from `--' desc:` docstrings. #[serde(default)] desc: Vec, /// Per-argument enforceable invariants. Currently only the /// `kind` constraint is emitted (for named-type args). Future /// constraints (min/max/regex/length/...) will populate this. #[serde(default)] constraints: Vec, /// **Reserved.** Per-argument informational metadata. Always /// emitted as `{}` in v2; not yet read by any consumer. #[serde(default)] metadata: Metadata, }, /// An optional CLI argument with a long/short option name. #[serde(rename = "opt")] Optional { /// Morloc serialization schema for the option's value type. #[serde(default)] schema: Option, /// User-facing type name. JSON key is `type`. #[serde(default, rename = "type")] type_desc: Option, /// Required display placeholder (e.g. `"FILE"`). #[serde(default)] metavar: Option, /// JSON-wrap flag for `Str`-typed literal options. #[serde(default)] quoted: bool, /// Single-character short option (e.g. `"f"` for `-f`). #[serde(default, rename = "short")] short_opt: Option, /// Long option name (e.g. `"file"` for `--file`). #[serde(default, rename = "long")] long_opt: Option, /// Default value used when the user does not pass the option. /// Always present (declared via `--' default:` docstring). #[serde(default, rename = "default")] default_val: Option, /// Description lines. #[serde(default)] desc: Vec, /// Per-argument constraints -- see [`Arg::Positional`]'s /// `constraints` field. #[serde(default)] constraints: Vec, /// **Reserved.** Per-argument metadata. Not read in v2. #[serde(default)] metadata: Metadata, }, /// A pure boolean flag toggle. Carries no type, schema, or /// constraints because it has no payload -- flipping the flag /// produces the value `true` or `false`. #[serde(rename = "flag")] Flag { /// Single-character short option (e.g. `"v"` for `-v`). #[serde(default, rename = "short")] short_opt: Option, /// Long option name (e.g. `"verbose"` for `--verbose`). #[serde(default, rename = "long")] long_opt: Option, /// Long option name that flips the flag in the opposite /// direction (e.g. `"no-verbose"` for `--no-verbose`). #[serde(default)] long_rev: Option, /// Default value when the flag is not present on the CLI. /// String form: `"true"` or `"false"`. #[serde(default, rename = "default")] default_val: Option, /// Description lines. #[serde(default)] desc: Vec, /// **Reserved.** Per-flag metadata. Not read in v2. #[serde(default)] metadata: Metadata, }, /// A record-typed argument that has been "unrolled" into a flat /// collection of CLI flags/options, one per record field. The /// group's top-level `schema` is the schema of the whole record; /// dispatch sends the assembled record to the pool, so individual /// entries never need their own schemas. #[serde(rename = "grp")] Group { /// Morloc schema for the whole record (a `Map` schema). #[serde(default)] schema: Option, /// User-facing record type name (e.g. `"SysConfig"`). #[serde(default, rename = "type")] type_desc: Option, /// Display placeholder for the group as a whole. #[serde(default)] metavar: Option, /// Description lines for the group. #[serde(default)] desc: Vec, /// Optional CLI option that accepts the entire record as a /// single JSON value (e.g. `--sys-config '{...}'`). #[serde(default)] group_opt: Option, /// Flattened per-field options/flags. Each entry's `arg` is a /// nested [`Arg`] (typically `Optional` or `Flag`) that has /// no schema of its own -- only the group's top-level schema /// matters at dispatch time. #[serde(default)] entries: Vec, /// Per-group constraints. Currently the `kind` constraint /// (almost always `record`) is emitted. #[serde(default)] constraints: Vec, /// **Reserved.** Per-group metadata. Not read in v2. #[serde(default)] metadata: Metadata, }, } /// Nested CLI option that accepts the entire record (associated with /// an [`Arg::Group`]) as a single JSON value. #[derive(Debug, Deserialize)] #[allow(dead_code)] pub struct GroupOpt { /// Short option char that accepts the whole record as one JSON value. #[serde(default, rename = "short")] pub short_opt: Option, /// Long option name that accepts the whole record as one JSON value. #[serde(default, rename = "long")] pub long_opt: Option, } /// One entry inside an [`Arg::Group`] -- pairs a record field name /// with the CLI flag/option that backs it. #[derive(Debug, Deserialize)] #[allow(dead_code)] pub struct GroupEntry { /// Record field name. pub key: String, /// CLI binding for this field. Always a [`Arg::Optional`] or /// [`Arg::Flag`] in practice; never carries its own schema (the /// containing group's schema covers all fields). pub arg: Arg, } /// CLI command group -- purely organizational metadata used to bucket /// related subcommands together in the help output. #[derive(Debug, Deserialize)] #[allow(dead_code)] pub struct CmdGroup { /// Group name; matches `Command::group` on member commands. pub name: String, /// Group description lines for the help output. #[serde(default)] pub desc: Vec, /// **Reserved.** Per-group metadata. Not read in v2. #[serde(default)] pub metadata: Metadata, } /// Daemon-mode service configuration. Present only when the program /// is configured to run as a long-lived service rather than a one-shot /// CLI invocation. #[derive(Debug, Deserialize)] #[allow(dead_code)] pub struct Service { /// Transport type: typically `"http"`, `"tcp"`, or `"unix"`. #[serde(rename = "type")] pub service_type: Option, /// Listening host address (TCP/HTTP). pub host: Option, /// Listening port (TCP/HTTP). pub port: Option, /// Unix socket path (when `service_type` is `"unix"`). pub socket: Option, /// **Reserved.** Per-service metadata. Not read in v2. #[serde(default)] pub metadata: Metadata, } // -- I/O ---------------------------------------------------------------------- /// Read the manifest payload from a built-nexus wrapper script. The /// nexus binary is wrapped in a shell script that contains a /// `### MANIFEST ###` marker followed by the JSON blob. Plain JSON /// files (no shebang) are returned as-is. pub fn read_manifest_payload(path: &str) -> Result { let content = std::fs::read_to_string(path) .map_err(|e| format!("Cannot open manifest file '{}': {}", path, e))?; if content.starts_with("#!") { if let Some(pos) = content.find("### MANIFEST ###") { let after_marker = &content[pos..]; let payload_start = after_marker .find('\n') .map(|i| pos + i + 1) .unwrap_or(content.len()); Ok(content[payload_start..].to_string()) } else { Err("No ### MANIFEST ### marker found in wrapper script".into()) } } else { Ok(content) } } /// Parse a manifest JSON payload into a [`Manifest`]. Performs a /// staleness check on `build.morloc_version` against this crate's own /// `CARGO_PKG_VERSION` (which is intentionally pinned to match the /// morloc compiler version). Mismatched versions return a clean /// "rebuild with the current compiler" error rather than silently /// misinterpreting the manifest. pub fn parse_manifest(payload: &str) -> Result { let m: Manifest = serde_json::from_str(payload) .map_err(|e| format!("Failed to parse manifest JSON: {}", e))?; let crate_version = env!("CARGO_PKG_VERSION"); if m.build.morloc_version != crate_version { return Err(format!( "manifest built with morloc {}, runtime is {}; rebuild with the current compiler", m.build.morloc_version, crate_version )); } Ok(m) } // -- Arg accessors ------------------------------------------------------------ // // Variant-agnostic helpers for the fields that exist on multiple Arg // variants. Callers in the nexus and runtime use these instead of // pattern-matching at every site. impl Arg { /// Single-character short option (e.g. `'f'` for `-f`). Returns /// None for positional and group args. pub fn short_opt_char(&self) -> Option { let s = match self { Arg::Optional { short_opt, .. } => short_opt.as_deref(), Arg::Flag { short_opt, .. } => short_opt.as_deref(), _ => None, }; s.and_then(|s| s.chars().next()) } /// Long option name (e.g. `"verbose"` for `--verbose`). Returns /// None for positional and group args. pub fn long_opt_str(&self) -> Option<&str> { match self { Arg::Optional { long_opt, .. } => long_opt.as_deref(), Arg::Flag { long_opt, .. } => long_opt.as_deref(), _ => None, } } /// True if this arg is a boolean flag toggle. pub fn is_flag(&self) -> bool { matches!(self, Arg::Flag { .. }) } /// True if the user's CLI value should be JSON-wrapped before /// being passed to the pool. Used for `Str`-typed arguments /// declared with `--' literal: true`. Always false for flags and /// groups. pub fn is_quoted(&self) -> bool { match self { Arg::Positional { quoted, .. } | Arg::Optional { quoted, .. } => *quoted, _ => false, } } /// Default CLI value when the user does not pass the argument. /// Returns None for positional args (which are always required) /// and groups. pub fn default_val(&self) -> Option<&str> { match self { Arg::Optional { default_val, .. } => default_val.as_deref(), Arg::Flag { default_val, .. } => default_val.as_deref(), _ => None, } } /// CLI display placeholder (e.g. `"FILE"`, `"INT"`). None for /// flags and for positional args without an explicit metavar. pub fn metavar_str(&self) -> Option<&str> { match self { Arg::Positional { metavar, .. } => metavar.as_deref(), Arg::Optional { metavar, .. } => metavar.as_deref(), Arg::Group { metavar, .. } => metavar.as_deref(), _ => None, } } /// Description lines from the source-level docstring. Always /// available regardless of variant. pub fn desc_lines(&self) -> &[String] { match self { Arg::Positional { desc, .. } | Arg::Optional { desc, .. } | Arg::Flag { desc, .. } | Arg::Group { desc, .. } => desc, } } /// User-facing type name for typed args (e.g. `"Int"`, /// `"Config"`). Returns None for flags, which carry no type. pub fn type_desc_str(&self) -> Option<&str> { match self { Arg::Positional { type_desc, .. } | Arg::Optional { type_desc, .. } | Arg::Group { type_desc, .. } => type_desc.as_deref(), Arg::Flag { .. } => None, } } /// Morloc serialization schema string for typed args. Returns /// None for flags. The schema drives both dispatch (how to encode /// the value into a packet) and help rendering (how to extract /// record field layouts for the Record/Table Schemas sections). pub fn schema_str(&self) -> Option<&str> { match self { Arg::Positional { schema, .. } | Arg::Optional { schema, .. } | Arg::Group { schema, .. } => schema.as_deref(), Arg::Flag { .. } => None, } } /// All constraints attached to this arg. Empty for flags. The /// caller is responsible for filtering by constraint type and /// silently ignoring unknown types (per the extensibility rules /// on [`Constraint`]). pub fn constraints(&self) -> &[Constraint] { match self { Arg::Positional { constraints, .. } | Arg::Optional { constraints, .. } | Arg::Group { constraints, .. } => constraints, Arg::Flag { .. } => &[], } } /// Convenience accessor: extract the `value` of the `kind` /// constraint as a string slice. Returns the lowercased /// `"record"`, `"object"`, or `"table"` for named-type args. /// None for everything else (including untagged primitive types). pub fn kind_constraint(&self) -> Option<&str> { self.constraints() .iter() .find(|c| c.ctype == "kind") .and_then(|c| c.value.as_ref().and_then(|v| v.as_str())) } } // -- Tests -------------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; /// Wrap a v2 command body with the required top-level fields so /// each test fixture stays compact. fn wrap(commands_json: &str) -> String { let v = env!("CARGO_PKG_VERSION"); format!( r#"{{ "name": "main", "build": {{ "path": "/tmp/test", "time": 0, "morloc_version": "{}" }}, "pools": [ {{"lang": "py", "exec": ["python3", "pool.py"], "socket": "pipe-py", "metadata": {{}}}} ], "commands": {}, "groups": [], "metadata": {{}} }}"#, v, commands_json ) } #[test] fn test_parse_simple_manifest() { let json = wrap( r#"[ { "name": "f", "type": "remote", "mid": 1, "pool": 0, "needed_pools": [0], "desc": [], "args": [ { "kind": "pos", "schema": "s", "type": "Str", "metavar": null, "quoted": false, "desc": [], "constraints": [], "metadata": {} } ], "return": { "schema": "s", "type": "Str", "desc": [], "constraints": [], "metadata": {} }, "constraints": [], "metadata": {}, "group": null } ]"#, ); let m = parse_manifest(&json).unwrap(); assert_eq!(m.pools.len(), 1); assert_eq!(m.pools[0].lang, "py"); assert_eq!(m.commands.len(), 1); assert_eq!(m.commands[0].name, "f"); assert!(!m.commands[0].is_pure()); assert_eq!(m.commands[0].mid, 1); assert!(m.commands[0].group.is_none()); assert_eq!(m.commands[0].args.len(), 1); assert_eq!(m.commands[0].args[0].schema_str(), Some("s")); assert_eq!(m.commands[0].args[0].type_desc_str(), Some("Str")); assert_eq!(m.commands[0].ret.schema, "s"); assert_eq!(m.commands[0].ret.type_desc, "Str"); } #[test] fn test_parse_pure_command() { let json = wrap( r#"[ { "name": "greet", "type": "pure", "desc": ["Say hello"], "args": [ { "kind": "pos", "schema": "s", "type": "Str", "metavar": "NAME", "quoted": true, "desc": ["name"], "constraints": [], "metadata": {} } ], "return": { "schema": "s", "type": "Str", "desc": [], "constraints": [], "metadata": {} }, "expr": {"tag": "lit", "schema": "s", "lit_type": "str", "value": "hello"}, "constraints": [], "metadata": {}, "group": null } ]"#, ); let m = parse_manifest(&json).unwrap(); assert!(m.commands[0].is_pure()); assert!(m.commands[0].expr.is_some()); } #[test] fn test_parse_kind_constraint() { let json = wrap( r#"[ { "name": "process", "type": "remote", "mid": 1, "pool": 0, "needed_pools": [0], "desc": [], "args": [ { "kind": "pos", "schema": "m24nameas3ageai4", "type": "People", "metavar": null, "quoted": false, "desc": [], "constraints": [ {"type": "kind", "value": "table"} ], "metadata": {} } ], "return": { "schema": "i4", "type": "Int", "desc": [], "constraints": [], "metadata": {} }, "constraints": [], "metadata": {}, "group": null } ]"#, ); let m = parse_manifest(&json).unwrap(); assert_eq!(m.commands[0].args[0].kind_constraint(), Some("table")); } #[test] fn test_version_mismatch_rejected() { let json = r#"{ "name": "main", "build": {"path": "/tmp/x", "time": 0, "morloc_version": "0.0.1-stale"}, "pools": [], "commands": [], "groups": [], "metadata": {} }"#; let err = parse_manifest(json).unwrap_err(); assert!( err.contains("rebuild with the current compiler"), "got: {}", err ); } } ================================================ FILE: data/rust/morloc-nexus/Cargo.toml ================================================ [package] name = "morloc-nexus" version = "0.81.0" edition = "2021" description = "Morloc nexus: CLI dispatcher for multi-language pool orchestration" # Version is intentionally synchronized with the morloc compiler version # (see ../../../package.yaml). Manifests record the morloc_version that # built them; the nexus rejects manifests whose version differs from its # own CARGO_PKG_VERSION at parse time. Bumping the morloc compiler # version requires bumping this version in lockstep. [[bin]] name = "morloc-nexus" path = "src/main.rs" [dependencies] morloc-manifest = { path = "../morloc-manifest" } morloc-runtime = { path = "../morloc-runtime" } libc = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } nix = { workspace = true } clap = { workspace = true } thiserror = { workspace = true } ================================================ FILE: data/rust/morloc-nexus/build.rs ================================================ fn main() { // Use MORLOC_HOME at build time if set, else fall back to $HOME default. // This is only for the compile-time link search path. let morloc_lib = std::env::var("MORLOC_HOME") .map(|h| format!("{}/lib", h)) .unwrap_or_else(|_| { format!( "{}/.local/share/morloc/lib", std::env::var("HOME").unwrap_or_else(|_| "/root".into()) ) }); println!("cargo:rustc-link-search=native={}", morloc_lib); println!("cargo:rustc-link-lib=dylib=morloc"); // Embed $ORIGIN-relative rpaths so the nexus finds libmorloc.so // regardless of install location: // $ORIGIN/../lib covers /opt/morloc/bin -> /opt/morloc/lib // $ORIGIN/../share/morloc/lib covers ~/.local/bin -> ~/.local/share/morloc/lib println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/../lib"); println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/../share/morloc/lib"); // The morloc compiler version is sourced from CARGO_PKG_VERSION // (this crate's Cargo.toml), which is intentionally kept in // lockstep with the morloc Haskell package.yaml. No build-time // extraction needed -- Cargo guarantees CARGO_PKG_VERSION is set // and rebuilds when Cargo.toml changes. } ================================================ FILE: data/rust/morloc-nexus/src/dispatch.rs ================================================ //! Command dispatch: CLI argument parsing and routing to pools. //! //! Replaces the dispatch_command, dispatch, run_command, and run_pure_command //! functions from nexus.c. Uses the C libmorloc for packet construction and //! serialization until Phase 2/3 replaces those. //! //! For Phase 1, the nexus links against the C libmorloc.so for: //! - make_call_packet_from_cli, parse_cli_data_argument //! - send_and_receive_over_socket //! - pack_with_schema, print_voidstar, etc. //! - morloc_eval for pure commands use crate::help; use crate::manifest::{Arg, Command, Manifest}; use crate::process::{self, PoolSocket}; /// Output format enum. #[derive(Debug, Clone, Copy, PartialEq)] pub enum OutputFormat { Json, MessagePack, VoidStar, Packet, } /// Nexus configuration parsed from CLI options. #[derive(Debug, Clone)] pub struct NexusConfig { pub help_flag: bool, pub print_flag: bool, pub packet_path: Option, pub socket_base: Option, pub output_path: Option, pub output_format: OutputFormat, pub daemon_flag: bool, pub router_flag: bool, pub unix_socket_path: Option, pub tcp_port: Option, pub http_port: Option, pub fdb_path: Option, pub eval_timeout: i32, } impl Default for NexusConfig { fn default() -> Self { NexusConfig { help_flag: false, print_flag: false, packet_path: None, socket_base: None, output_path: None, output_format: OutputFormat::Json, daemon_flag: false, router_flag: false, unix_socket_path: None, tcp_port: None, http_port: None, fdb_path: None, eval_timeout: 30, } } } /// Emit a uniform error when pool communication fails, then exit. /// /// The pool's stderr was inherited by the nexus, so any traceback the pool /// printed before dying is already on the user's terminal. This helper /// reports the communication error plus the pool's exit status (if it has /// been reaped) so the user can correlate the two. /// /// Race condition: the pool process may still be writing its error output /// (traceback, panic message, etc.) to stderr when the nexus detects the /// broken connection. If we call clean_exit immediately, it sends SIGTERM /// to the pool process group, which can kill the pool before its stderr /// buffer is flushed. We insert a brief drain window to let any in-flight /// stderr from the dying pool reach the terminal before tearing everything /// down. This is best-effort: a pool killed by SIGKILL (OOM killer, etc.) /// won't have pending output, and a pool stuck in a blocking syscall won't /// flush within the window. But for the common case of a Python exception /// traceback, this is enough. fn die_with_pool_error( socket: &PoolSocket, pool_index: usize, context: &str, comm_err: &dyn std::fmt::Display, ) -> ! { // Give the dying pool process time to flush its stderr/stdout before // we tear down the process group. Without this, a Python traceback or // error message that is still in a pipe buffer gets lost when // clean_exit sends SIGTERM/SIGKILL to the pool's process group. std::thread::sleep(std::time::Duration::from_millis(100)); eprintln!("Error: {}: {}", context, comm_err); if let Some(info) = process::pool_death_info(pool_index) { eprintln!("Pool '{}' {}", socket.lang, info); } process::clean_exit(1); } /// Parse nexus-level options from argv. Returns the index of the first /// non-option argument (the manifest path or subcommand). pub fn parse_nexus_options(args: &[String], config: &mut NexusConfig) -> usize { let mut i = 1; // skip argv[0] while i < args.len() { let arg = &args[i]; match arg.as_str() { "-h" | "--help" => { config.help_flag = true; i += 1; } "-p" | "--print" => { config.print_flag = true; i += 1; } "-c" | "--call-packet" => { i += 1; if i < args.len() { config.packet_path = Some(args[i].clone()); i += 1; } } "-s" | "--socket-base" => { i += 1; if i < args.len() { config.socket_base = Some(args[i].clone()); i += 1; } } "-o" | "--output-file" => { i += 1; if i < args.len() { config.output_path = Some(args[i].clone()); i += 1; } } "-f" | "--output-form" => { i += 1; if i < args.len() { config.output_format = parse_output_format(&args[i]); i += 1; } } "--daemon" => { config.daemon_flag = true; i += 1; } "--router" => { config.router_flag = true; i += 1; } "--socket" => { i += 1; if i < args.len() { config.unix_socket_path = Some(args[i].clone()); i += 1; } } "--port" => { i += 1; if i < args.len() { config.tcp_port = args[i].parse().ok(); i += 1; } } "--http-port" => { i += 1; if i < args.len() { config.http_port = args[i].parse().ok(); i += 1; } } "--fdb" => { i += 1; if i < args.len() { config.fdb_path = Some(args[i].clone()); i += 1; } } "--eval-timeout" => { i += 1; if i < args.len() { config.eval_timeout = args[i].parse().unwrap_or(30); i += 1; } } _ => { // Handle --key=value forms if let Some(val) = arg.strip_prefix("--socket=") { config.unix_socket_path = Some(val.to_string()); i += 1; } else if let Some(val) = arg.strip_prefix("--port=") { config.tcp_port = val.parse().ok(); i += 1; } else if let Some(val) = arg.strip_prefix("--http-port=") { config.http_port = val.parse().ok(); i += 1; } else if let Some(val) = arg.strip_prefix("--fdb=") { config.fdb_path = Some(val.to_string()); i += 1; } else if let Some(val) = arg.strip_prefix("--eval-timeout=") { config.eval_timeout = val.parse().unwrap_or(30); i += 1; } else { // Not a nexus option - stop parsing break; } } } } i } /// Extract daemon/server long options from argv in single-command mode. /// Removes matched options from the args vector. pub fn extract_global_options(args: &mut Vec, config: &mut NexusConfig) { let mut i = 1; while i < args.len() { if args[i] == "--" { break; } let mut matched = false; let mut consumed = 1; match args[i].as_str() { "--daemon" => { config.daemon_flag = true; matched = true; } "--socket" if i + 1 < args.len() => { config.unix_socket_path = Some(args[i + 1].clone()); consumed = 2; matched = true; } "--port" if i + 1 < args.len() => { config.tcp_port = args[i + 1].parse().ok(); consumed = 2; matched = true; } "--http-port" if i + 1 < args.len() => { config.http_port = args[i + 1].parse().ok(); consumed = 2; matched = true; } "--fdb" if i + 1 < args.len() => { config.fdb_path = Some(args[i + 1].clone()); consumed = 2; matched = true; } "--eval-timeout" if i + 1 < args.len() => { config.eval_timeout = args[i + 1].parse().unwrap_or(30); consumed = 2; matched = true; } _ => { // Check --key=value forms if let Some(val) = args[i].strip_prefix("--socket=") { config.unix_socket_path = Some(val.to_string()); matched = true; } else if let Some(val) = args[i].strip_prefix("--port=") { config.tcp_port = val.parse().ok(); matched = true; } else if let Some(val) = args[i].strip_prefix("--http-port=") { config.http_port = val.parse().ok(); matched = true; } else if let Some(val) = args[i].strip_prefix("--fdb=") { config.fdb_path = Some(val.to_string()); matched = true; } else if let Some(val) = args[i].strip_prefix("--eval-timeout=") { config.eval_timeout = val.parse().unwrap_or(30); matched = true; } } } if matched { for _ in 0..consumed { args.remove(i); } } else { i += 1; } } } fn parse_output_format(s: &str) -> OutputFormat { match s { "json" => OutputFormat::Json, "mpk" => OutputFormat::MessagePack, "voidstar" => OutputFormat::VoidStar, "packet" => OutputFormat::Packet, _ => { eprintln!("Invalid output format: {}", s); std::process::exit(1); } } } /// Wrap a string in JSON quotes (for literal string arguments). pub fn quoted(s: &str) -> String { // JSON-escape the string let escaped = serde_json::to_string(s).unwrap_or_else(|_| format!("\"{}\"", s)); escaped } /// Main dispatch entry point. Routes to the correct command based on argv. pub fn dispatch( args: &[String], arg_start: usize, _shm_basename: &str, config: &NexusConfig, manifest: &Manifest, sockets: &mut [PoolSocket], prog_name: &str, ) { if arg_start >= args.len() { help::print_usage(prog_name, manifest); } let cmd_name = &args[arg_start]; let next = arg_start + 1; // Check if it matches a group name for grp in &manifest.groups { if grp.name == *cmd_name { if next >= args.len() { help::print_group_usage(prog_name, manifest, cmd_name); } let subcmd = &args[next]; if subcmd == "-h" || subcmd == "--help" { help::print_group_usage(prog_name, manifest, cmd_name); } // Find command within this group for cmd in &manifest.commands { if cmd.group.as_deref() == Some(cmd_name.as_str()) && cmd.name == *subcmd { dispatch_command(args, next + 1, config, manifest, cmd, sockets, prog_name); return; } } eprintln!("Unrecognized command '{}' in group '{}'", subcmd, cmd_name); process::clean_exit(1); } } // Try ungrouped commands for cmd in &manifest.commands { if cmd.name == *cmd_name && cmd.group.is_none() { dispatch_command(args, next, config, manifest, cmd, sockets, prog_name); return; } } eprintln!("Unrecognized command '{}'", cmd_name); process::clean_exit(1); } /// Dispatch a single command: parse its args, start needed daemons, execute. pub fn dispatch_command( args: &[String], arg_start: usize, config: &NexusConfig, manifest: &Manifest, cmd: &Command, sockets: &mut [PoolSocket], prog_name: &str, ) { let single_cmd = manifest.commands.len() == 1 && manifest.groups.is_empty(); // Parse command-specific arguments let (parsed_args, _remaining_start) = parse_command_args(args, arg_start, cmd, config, single_cmd, prog_name); // Start daemons for remote commands if !cmd.is_pure() { if let Err(e) = process::start_daemons(sockets, &cmd.needed_pools) { eprintln!("Error: {}", e); process::clean_exit(1); } } // Execute the command if cmd.is_pure() { run_pure_command(cmd, &parsed_args, config); } else { run_remote_command(cmd, &parsed_args, sockets, config); } } /// Parsed CLI argument value for a manifest arg slot. #[derive(Debug)] pub enum ArgValue { /// A value string (already quoted if needed). Value(String), /// Null/absent value. Null, /// Group argument with per-entry values. Group { grp_val: Option, fields: Vec>, defaults: Vec>, }, } /// Parse command-specific arguments from argv. fn parse_command_args( args: &[String], pos: usize, cmd: &Command, _config: &NexusConfig, single_cmd: bool, prog_name: &str, ) -> (Vec, usize) { let mut parsed = Vec::with_capacity(cmd.args.len()); // Simple option tracking: collect all --opt=val and -o val let mut opt_values: std::collections::HashMap = std::collections::HashMap::new(); let mut flag_values: std::collections::HashMap = std::collections::HashMap::new(); let mut positional_idx = 0; let mut positionals: Vec = Vec::new(); // First pass: separate options from positionals let mut i = pos; while i < args.len() { let arg = &args[i]; if arg == "--" { i += 1; // Everything after -- is positional while i < args.len() { positionals.push(args[i].clone()); i += 1; } break; } if arg == "-h" || arg == "--help" { if single_cmd { help::print_command_help_single(prog_name, cmd); } else { help::print_command_help(prog_name, cmd); } } if arg.starts_with("--") && arg.len() > 2 { // Long option if let Some(eq_pos) = arg.find('=') { let key = &arg[2..eq_pos]; let val = &arg[eq_pos + 1..]; opt_values.insert(key.to_string(), val.to_string()); i += 1; } else { let key = &arg[2..]; // Check if it's a flag if is_flag_opt(cmd, key) { flag_values.insert(key.to_string(), flag_forward_value(cmd, key)); i += 1; } else if is_rev_flag(cmd, key) { if let Some(orig) = find_flag_by_rev(cmd, key) { flag_values.insert(orig, flag_reverse_value_by_rev(cmd, key)); } i += 1; } else if i + 1 < args.len() { opt_values.insert(key.to_string(), args[i + 1].clone()); i += 2; } else { eprintln!("Error: option --{} requires a value", key); process::clean_exit(1); } } } else if arg.starts_with('-') && arg.len() == 2 && arg.as_bytes()[1].is_ascii_alphabetic() { let ch = arg.chars().nth(1).unwrap(); if is_short_flag(cmd, ch) { flag_values.insert( short_to_long(cmd, ch).unwrap_or_else(|| ch.to_string()), flag_forward_value_by_short(cmd, ch), ); i += 1; } else if i + 1 < args.len() { opt_values.insert( short_to_long(cmd, ch).unwrap_or_else(|| ch.to_string()), args[i + 1].clone(), ); i += 2; } else { eprintln!("Error: option -{} requires a value", ch); process::clean_exit(1); } } else { positionals.push(arg.clone()); i += 1; } } // Second pass: build ArgValue for each manifest arg for arg_def in &cmd.args { match arg_def { Arg::Positional { quoted, .. } => { if positional_idx < positionals.len() { let val = if *quoted { self::quoted(&positionals[positional_idx]) } else { positionals[positional_idx].clone() }; parsed.push(ArgValue::Value(val)); positional_idx += 1; } else { eprintln!("Error: too few positional arguments"); process::clean_exit(1); } } Arg::Optional { long_opt, short_opt, default_val, quoted, .. } => { let key = long_opt .as_deref() .or_else(|| short_opt.as_deref()) .unwrap_or(""); let user_val = opt_values.get(key); if let Some(val) = user_val { let v = if *quoted { self::quoted(val) } else { val.clone() }; parsed.push(ArgValue::Value(v)); } else if let Some(def) = default_val { parsed.push(ArgValue::Value(def.clone())); } else { parsed.push(ArgValue::Null); } } Arg::Flag { long_opt, default_val, .. } => { let key = long_opt.as_deref().unwrap_or(""); if let Some(val) = flag_values.get(key) { parsed.push(ArgValue::Value(val.clone())); } else { parsed.push(ArgValue::Value( default_val.as_deref().unwrap_or("false").to_string(), )); } } Arg::Group { entries, group_opt, .. } => { let grp_val = group_opt.as_ref().and_then(|go| { go.long_opt .as_deref() .and_then(|k| opt_values.get(k)) .cloned() }); let mut fields = Vec::new(); let mut defaults = Vec::new(); for entry in entries { // Look up by long option name or short option character let long_key = entry.arg.long_opt_str().unwrap_or(""); let short_key = entry.arg.short_opt_char() .map(|c| c.to_string()) .unwrap_or_default(); let user = opt_values .get(long_key) .or_else(|| opt_values.get(&short_key)) .or_else(|| flag_values.get(long_key)) .or_else(|| flag_values.get(&short_key)) .map(|v| { if entry.arg.is_quoted() { self::quoted(v) } else { v.clone() } }); fields.push(user); defaults.push(entry.arg.default_val().map(|s| s.to_string())); } parsed.push(ArgValue::Group { grp_val, fields, defaults, }); } } } if positional_idx < positionals.len() { eprintln!("Error: too many positional arguments given"); process::clean_exit(1); } (parsed, i) } // -- Command execution ------------------------------------------------------ /// Execute a remote command by sending a call packet to the pool. fn run_remote_command( cmd: &Command, args: &[ArgValue], sockets: &[PoolSocket], config: &NexusConfig, ) { use morloc_runtime::packet; use morloc_runtime::schema::{parse_schema, SerialType}; use std::io::{Read, Write}; use std::os::unix::net::UnixStream; // C library functions from libmorloc.so extern "C" { fn parse_cli_data_argument( dest: *mut u8, arg: *const std::ffi::c_void, schema: *const morloc_runtime::cschema::CSchema, errmsg: *mut *mut std::ffi::c_char, ) -> *mut u8; fn initialize_positional(value: *mut std::ffi::c_char) -> *mut std::ffi::c_void; fn free_argument_t(arg: *mut std::ffi::c_void); fn morloc_packet_size(packet: *const u8, errmsg: *mut *mut std::ffi::c_char) -> usize; fn make_morloc_local_call_packet( midx: u32, arg_packets: *const *const u8, nargs: usize, errmsg: *mut *mut std::ffi::c_char, ) -> *mut u8; fn get_morloc_data_packet_value( data: *const u8, schema: *const morloc_runtime::cschema::CSchema, errmsg: *mut *mut std::ffi::c_char, ) -> *mut u8; } let socket = &sockets[cmd.pool_index]; // Parse return schema let return_schema = match parse_schema(&cmd.ret.schema) { Ok(s) => s, Err(e) => { eprintln!("Error: failed to parse return schema '{}': {}", cmd.ret.schema, e); process::clean_exit(1); } }; // The parsed `args` list and `cmd.args` are index-aligned 1:1 in // declaration order: parse_command_args pushes one ArgValue for // EVERY arg (including flags). The Haskell compiler emits one // schema per arg position too. Walk both lists in lockstep; for // flags, schema_str() returns None and the flag's ArgValue is // already a ready-to-send "true"/"false" string that doesn't need // packet conversion -- but the original v1 dispatch path still // ran flags through parse_cli_data_argument with the flag's bool // schema, so we mirror that to keep the wire format consistent. let mut arg_packets: Vec> = Vec::new(); for (i, (arg_val, arg_def)) in args.iter().zip(cmd.args.iter()).enumerate() { let schema_str = arg_def.schema_str().unwrap_or("b"); let schema = match parse_schema(schema_str) { Ok(s) => s, Err(e) => { eprintln!("Error: failed to parse arg schema #{}: {}", i, e); process::clean_exit(1); } }; let c_schema = morloc_runtime::cschema::CSchema::from_rust(&schema); let mut errmsg: *mut std::ffi::c_char = std::ptr::null_mut(); let c_arg; match arg_val { ArgValue::Group { grp_val, fields, defaults } => { // Group arg: use initialize_unrolled (matches C nexus behavior) extern "C" { fn initialize_unrolled( size: usize, default_value: *mut std::ffi::c_char, fields: *mut *mut std::ffi::c_char, default_fields: *mut *mut std::ffi::c_char, ) -> *mut std::ffi::c_void; } let n = fields.len(); let grp_val_c = grp_val.as_ref() .map(|s| std::ffi::CString::new(s.as_str()).unwrap().into_raw()) .unwrap_or(std::ptr::null_mut()); let mut c_fields: Vec<*mut std::ffi::c_char> = fields.iter() .map(|f| f.as_ref() .map(|s| std::ffi::CString::new(s.as_str()).unwrap().into_raw()) .unwrap_or(std::ptr::null_mut())) .collect(); let mut c_defaults: Vec<*mut std::ffi::c_char> = defaults.iter() .map(|d| d.as_ref() .map(|s| std::ffi::CString::new(s.as_str()).unwrap().into_raw()) .unwrap_or(std::ptr::null_mut())) .collect(); c_arg = unsafe { initialize_unrolled(n, grp_val_c, c_fields.as_mut_ptr(), c_defaults.as_mut_ptr()) }; } _ => { let json_str = match arg_val { ArgValue::Value(s) => s.clone(), ArgValue::Null => "null".to_string(), _ => unreachable!(), }; let json_c = std::ffi::CString::new(json_str.as_str()).unwrap(); c_arg = unsafe { initialize_positional(json_c.into_raw()) }; } } let c_pkt = unsafe { parse_cli_data_argument(std::ptr::null_mut(), c_arg, c_schema, &mut errmsg) }; unsafe { free_argument_t(c_arg) }; unsafe { morloc_runtime::cschema::CSchema::free(c_schema) }; if c_pkt.is_null() { let msg = if !errmsg.is_null() { let s = unsafe { std::ffi::CStr::from_ptr(errmsg) }.to_string_lossy().into_owned(); unsafe { libc::free(errmsg as *mut std::ffi::c_void) }; s } else { "unknown error".into() }; eprintln!("Error: failed to parse argument #{}: {}", i, msg); process::clean_exit(1); } // Get packet size and copy to Vec let pkt_size = unsafe { morloc_packet_size(c_pkt, &mut errmsg) }; let data_pkt = unsafe { std::slice::from_raw_parts(c_pkt, pkt_size).to_vec() }; unsafe { libc::free(c_pkt as *mut std::ffi::c_void) }; arg_packets.push(data_pkt); } // Build call packet via C library let arg_ptrs: Vec<*const u8> = arg_packets.iter().map(|p| p.as_ptr()).collect(); let mut errmsg_call: *mut std::ffi::c_char = std::ptr::null_mut(); let c_call = unsafe { make_morloc_local_call_packet(cmd.mid, arg_ptrs.as_ptr(), arg_packets.len(), &mut errmsg_call) }; if c_call.is_null() { eprintln!("Error: failed to create call packet"); process::clean_exit(1); } // Get call packet size let call_size = unsafe { let mut e: *mut std::ffi::c_char = std::ptr::null_mut(); morloc_packet_size(c_call, &mut e) }; let call_packet = unsafe { std::slice::from_raw_parts(c_call, call_size).to_vec() }; unsafe { libc::free(c_call as *mut std::ffi::c_void) }; // Send to pool and receive response let mut stream = match UnixStream::connect(&socket.socket_path) { Ok(s) => s, Err(e) => { die_with_pool_error( socket, cmd.pool_index, &format!("failed to connect to pool '{}'", socket.lang), &e, ); } }; if let Err(e) = stream.write_all(&call_packet) { die_with_pool_error( socket, cmd.pool_index, &format!("failed to send call packet to pool '{}'", socket.lang), &e, ); } // Read response header let mut resp_header_bytes = [0u8; 32]; if let Err(e) = stream.read_exact(&mut resp_header_bytes) { die_with_pool_error( socket, cmd.pool_index, &format!("failed to read response header from pool '{}'", socket.lang), &e, ); } let resp_header = match packet::PacketHeader::from_bytes(&resp_header_bytes) { Ok(h) => h, Err(e) => { eprintln!("Error: invalid response packet: {}", e); process::clean_exit(1); } }; // Read full response (metadata + payload) let offset = { resp_header.offset } as usize; let length = { resp_header.length } as usize; let remaining = offset + length; let mut resp_body = vec![0u8; remaining]; if remaining > 0 { if let Err(e) = stream.read_exact(&mut resp_body) { die_with_pool_error( socket, cmd.pool_index, &format!("failed to read response body from pool '{}'", socket.lang), &e, ); } } // Reconstruct full packet (header + body) let mut full_packet = Vec::with_capacity(32 + remaining); full_packet.extend_from_slice(&resp_header_bytes); full_packet.extend_from_slice(&resp_body); // Check for error match packet::get_error_message(&full_packet) { Ok(Some(err_msg)) => { eprintln!("Error: run failed\n{}", err_msg); process::clean_exit(1); } Ok(None) => {} Err(e) => { eprintln!("Error: failed to parse response: {}", e); process::clean_exit(1); } } // Extract and print via C library for correct voidstar handling let c_schema = morloc_runtime::cschema::CSchema::from_rust(&return_schema); let mut errmsg: *mut std::ffi::c_char = std::ptr::null_mut(); let result_ptr = unsafe { get_morloc_data_packet_value(full_packet.as_ptr(), c_schema, &mut errmsg) }; if result_ptr.is_null() { let msg = if !errmsg.is_null() { let s = unsafe { std::ffi::CStr::from_ptr(errmsg) }.to_string_lossy().into_owned(); unsafe { libc::free(errmsg as *mut std::ffi::c_void) }; s } else { "unknown error".into() }; eprintln!("Error: failed to extract result: {}", msg); unsafe { morloc_runtime::cschema::CSchema::free(c_schema) }; process::clean_exit(1); } // Check if response is Arrow format let is_arrow = resp_header.is_data() && unsafe { resp_header.command.data.format } == packet::PACKET_FORMAT_ARROW; // Print using the C library for correct output. // Suppress "null" for Unit-returning commands (CLI convention). if return_schema.serial_type != SerialType::Nil { print_result_c(result_ptr, c_schema, &full_packet, is_arrow, config); } unsafe { morloc_runtime::cschema::CSchema::free(c_schema) }; } /// Print using the C library functions for correct voidstar handling. fn print_result_c( ptr: *mut u8, schema: *const morloc_runtime::cschema::CSchema, full_packet: &[u8], is_arrow: bool, config: &NexusConfig, ) { extern "C" { fn print_voidstar( voidstar: *const std::ffi::c_void, schema: *const morloc_runtime::cschema::CSchema, errmsg: *mut *mut std::ffi::c_char, ) -> bool; fn pretty_print_voidstar( voidstar: *const std::ffi::c_void, schema: *const morloc_runtime::cschema::CSchema, errmsg: *mut *mut std::ffi::c_char, ) -> bool; fn print_arrow_as_json( data: *const std::ffi::c_void, errmsg: *mut *mut std::ffi::c_char, ) -> bool; fn print_arrow_as_table( data: *const std::ffi::c_void, errmsg: *mut *mut std::ffi::c_char, ) -> bool; fn pack_with_schema( mlc: *const std::ffi::c_void, schema: *const morloc_runtime::cschema::CSchema, mpkptr: *mut *mut std::ffi::c_char, mpk_size: *mut usize, errmsg: *mut *mut std::ffi::c_char, ) -> i32; } let mut errmsg: *mut std::ffi::c_char = std::ptr::null_mut(); match config.output_format { OutputFormat::Json => { let ok = unsafe { if is_arrow && config.print_flag { print_arrow_as_table(ptr as *const std::ffi::c_void, &mut errmsg) } else if is_arrow { print_arrow_as_json(ptr as *const std::ffi::c_void, &mut errmsg) } else if config.print_flag { pretty_print_voidstar(ptr as *const std::ffi::c_void, schema, &mut errmsg) } else { print_voidstar(ptr as *const std::ffi::c_void, schema, &mut errmsg) } }; if !ok { let msg = if !errmsg.is_null() { let s = unsafe { std::ffi::CStr::from_ptr(errmsg) }.to_string_lossy().into_owned(); unsafe { libc::free(errmsg as *mut std::ffi::c_void) }; s } else { "unknown error".into() }; eprintln!("Error: {}", msg); process::clean_exit(1); } } OutputFormat::MessagePack => { let mut mpk_ptr: *mut std::ffi::c_char = std::ptr::null_mut(); let mut mpk_size: usize = 0; let rc = unsafe { pack_with_schema( ptr as *const std::ffi::c_void, schema, &mut mpk_ptr, &mut mpk_size, &mut errmsg, ) }; if rc != 0 { eprintln!("Error: msgpack serialization failed"); process::clean_exit(1); } if config.print_flag { let bytes = unsafe { std::slice::from_raw_parts(mpk_ptr as *const u8, mpk_size) }; for (i, b) in bytes.iter().enumerate() { if i > 0 && i % 16 == 0 { println!(); } print!("{:02x} ", b); } println!(); } else { use std::io::Write; let bytes = unsafe { std::slice::from_raw_parts(mpk_ptr as *const u8, mpk_size) }; let _ = std::io::stdout().lock().write_all(bytes); } if !mpk_ptr.is_null() { unsafe { libc::free(mpk_ptr as *mut std::ffi::c_void) }; } } OutputFormat::VoidStar => { extern "C" { fn print_morloc_data_packet( packet: *const u8, schema: *const morloc_runtime::cschema::CSchema, errmsg: *mut *mut std::ffi::c_char, ) -> i32; } if config.print_flag { // Hex dump for (i, b) in full_packet.iter().enumerate() { if i > 0 && i % 4 == 0 { if i % 24 == 0 { println!(); } else { print!(" "); } } print!("{:02X}", b); } if !full_packet.is_empty() { println!(); } } else { let mut errmsg2: *mut std::ffi::c_char = std::ptr::null_mut(); unsafe { print_morloc_data_packet(full_packet.as_ptr(), schema, &mut errmsg2) }; } } OutputFormat::Packet => { // Packet format: write raw binary packet to stdout (used by SLURM) use std::io::Write; let _ = std::io::stdout().lock().write_all(&full_packet); } } process::clean_exit(0); } /// Print using Rust-native functions (kept for reference, currently unused). #[allow(dead_code)] fn print_result( ptr: morloc_runtime::shm::AbsPtr, schema: &morloc_runtime::Schema, config: &NexusConfig, ) { use morloc_runtime::{json, mpack}; match config.output_format { OutputFormat::Json => { if config.print_flag { if let Err(e) = json::pretty_print_voidstar(ptr, schema) { eprintln!("Error: {}", e); process::clean_exit(1); } } else { if let Err(e) = json::print_voidstar(ptr, schema) { eprintln!("Error: {}", e); process::clean_exit(1); } } } OutputFormat::MessagePack => { let mpk = match mpack::pack_with_schema(ptr, schema) { Ok(m) => m, Err(e) => { eprintln!("Error: {}", e); process::clean_exit(1); } }; if config.print_flag { // Hex dump for human-readable msgpack for (i, byte) in mpk.iter().enumerate() { if i > 0 && i % 16 == 0 { println!(); } print!("{:02x} ", byte); } println!(); } else { use std::io::Write; let stdout = std::io::stdout(); let mut handle = stdout.lock(); let _ = handle.write_all(&mpk); } } OutputFormat::VoidStar | OutputFormat::Packet => { eprintln!("Error: voidstar/packet output not supported in Rust-native print path"); process::clean_exit(1); } } process::clean_exit(0); } /// Execute a pure command by evaluating the expression via C library. fn run_pure_command(cmd: &Command, args: &[ArgValue], config: &NexusConfig) { use morloc_runtime::schema::{parse_schema, SerialType}; extern "C" { fn build_manifest_expr( json_str: *const std::ffi::c_char, errmsg: *mut *mut std::ffi::c_char, ) -> *mut std::ffi::c_void; // morloc_expression_t* fn morloc_eval( expr: *mut std::ffi::c_void, return_schema: *const morloc_runtime::cschema::CSchema, arg_voidstar: *const *mut u8, arg_schemas: *const *const morloc_runtime::cschema::CSchema, nargs: usize, errmsg: *mut *mut std::ffi::c_char, ) -> *mut std::ffi::c_void; // absptr_t fn parse_cli_data_argument( dest: *mut u8, arg: *const std::ffi::c_void, schema: *const morloc_runtime::cschema::CSchema, errmsg: *mut *mut std::ffi::c_char, ) -> *mut u8; fn initialize_positional(value: *mut std::ffi::c_char) -> *mut std::ffi::c_void; fn free_argument_t(arg: *mut std::ffi::c_void); fn get_morloc_data_packet_value( data: *const u8, schema: *const morloc_runtime::cschema::CSchema, errmsg: *mut *mut std::ffi::c_char, ) -> *mut u8; fn make_standard_data_packet( relptr: isize, schema: *const morloc_runtime::cschema::CSchema, ) -> *mut u8; fn abs2rel(ptr: *mut std::ffi::c_void, errmsg: *mut *mut std::ffi::c_char) -> isize; } // Build expression tree from manifest JSON let expr_json = match &cmd.expr { Some(v) => serde_json::to_string(v).unwrap_or_default(), None => { eprintln!("Error: pure command '{}' has no expression", cmd.name); process::clean_exit(1); } }; let expr_c = std::ffi::CString::new(expr_json.as_str()).unwrap(); let mut errmsg: *mut std::ffi::c_char = std::ptr::null_mut(); let expr = unsafe { build_manifest_expr(expr_c.as_ptr(), &mut errmsg) }; if expr.is_null() { let msg = unsafe_errmsg_to_string(errmsg); eprintln!("Error: failed to build expression: {}", msg); process::clean_exit(1); } // Parse return schema let return_schema = match parse_schema(&cmd.ret.schema) { Ok(s) => s, Err(e) => { eprintln!("Error: failed to parse return schema '{}': {}", cmd.ret.schema, e); process::clean_exit(1); } }; let c_return_schema = morloc_runtime::cschema::CSchema::from_rust(&return_schema); // The parsed `args` list and `cmd.args` are index-aligned 1:1 in // declaration order: parse_command_args pushes one ArgValue for // EVERY arg (including flags). The Haskell compiler emits one // schema per arg position too. Walk both lists in lockstep; for // flags, the schema_str() accessor returns None and we fall back // to the bool schema "b" so the wire format stays consistent. let mut c_arg_schemas: Vec<*const morloc_runtime::cschema::CSchema> = Vec::new(); let mut c_arg_voidstars: Vec<*mut u8> = Vec::new(); for (i, (arg_val, arg_def)) in args.iter().zip(cmd.args.iter()).enumerate() { let schema_str = arg_def.schema_str().unwrap_or("b"); let schema = match parse_schema(schema_str) { Ok(s) => s, Err(e) => { eprintln!("Error: failed to parse arg schema #{}: {}", i, e); process::clean_exit(1); } }; let c_schema = morloc_runtime::cschema::CSchema::from_rust(&schema); let json_str = match arg_val { ArgValue::Value(s) => s.clone(), ArgValue::Null => "null".to_string(), ArgValue::Group { .. } => "null".to_string(), }; // Parse CLI arg to data packet, then extract voidstar let json_c = std::ffi::CString::new(json_str.as_str()).unwrap(); let c_arg = unsafe { initialize_positional(json_c.into_raw()) }; let c_pkt = unsafe { parse_cli_data_argument(std::ptr::null_mut(), c_arg, c_schema, &mut errmsg) }; unsafe { free_argument_t(c_arg) }; if c_pkt.is_null() { let msg = unsafe_errmsg_to_string(errmsg); eprintln!("Error: failed to parse argument #{}: {}", i, msg); process::clean_exit(1); } let voidstar = unsafe { get_morloc_data_packet_value(c_pkt, c_schema, &mut errmsg) }; unsafe { libc::free(c_pkt as *mut std::ffi::c_void) }; if voidstar.is_null() { let msg = unsafe_errmsg_to_string(errmsg); eprintln!("Error: failed to extract argument #{}: {}", i, msg); process::clean_exit(1); } c_arg_schemas.push(c_schema); c_arg_voidstars.push(voidstar); } // Call morloc_eval let result = unsafe { morloc_eval( expr, c_return_schema, c_arg_voidstars.as_ptr(), c_arg_schemas.as_ptr(), c_arg_voidstars.len(), &mut errmsg, ) }; if result.is_null() { let msg = unsafe_errmsg_to_string(errmsg); eprintln!("Error: evaluation failed: {}", msg); process::clean_exit(1); } // Convert result to relptr and make a data packet for printing let result_relptr = unsafe { abs2rel(result, &mut errmsg) }; let result_packet = unsafe { make_standard_data_packet(result_relptr, c_return_schema) }; if result_packet.is_null() { eprintln!("Error: failed to create result packet"); process::clean_exit(1); } // Get packet as bytes for print_result_c extern "C" { fn morloc_packet_size(packet: *const u8, errmsg: *mut *mut std::ffi::c_char) -> usize; } let pkt_size = unsafe { morloc_packet_size(result_packet, &mut errmsg) }; let pkt_bytes = unsafe { std::slice::from_raw_parts(result_packet, pkt_size).to_vec() }; // Extract voidstar value from the result packet let result_ptr = unsafe { get_morloc_data_packet_value(pkt_bytes.as_ptr(), c_return_schema, &mut errmsg) }; if return_schema.serial_type != SerialType::Nil { print_result_c(result_ptr, c_return_schema, &pkt_bytes, false, config); } // Cleanup for cs in &c_arg_schemas { unsafe { morloc_runtime::cschema::CSchema::free(*cs as *mut morloc_runtime::cschema::CSchema) }; } unsafe { morloc_runtime::cschema::CSchema::free(c_return_schema); libc::free(result_packet as *mut std::ffi::c_void); } } fn unsafe_errmsg_to_string(errmsg: *mut std::ffi::c_char) -> String { if errmsg.is_null() { "unknown error".into() } else { let s = unsafe { std::ffi::CStr::from_ptr(errmsg) }.to_string_lossy().into_owned(); unsafe { libc::free(errmsg as *mut std::ffi::c_void) }; s } } // -- Helpers for command argument parsing ------------------------------------ fn is_flag_opt(cmd: &Command, long_name: &str) -> bool { cmd.args.iter().any(|a| match a { Arg::Flag { long_opt, .. } => long_opt.as_deref() == Some(long_name), Arg::Group { entries, .. } => entries.iter().any(|e| match &e.arg { Arg::Flag { long_opt, .. } => long_opt.as_deref() == Some(long_name), _ => false, }), _ => false, }) } fn is_rev_flag(cmd: &Command, name: &str) -> bool { cmd.args.iter().any(|a| match a { Arg::Flag { long_rev, .. } => long_rev.as_deref() == Some(name), Arg::Group { entries, .. } => entries.iter().any(|e| match &e.arg { Arg::Flag { long_rev, .. } => long_rev.as_deref() == Some(name), _ => false, }), _ => false, }) } fn find_flag_by_rev(cmd: &Command, rev_name: &str) -> Option { for a in &cmd.args { match a { Arg::Flag { long_opt, long_rev, .. } => { if long_rev.as_deref() == Some(rev_name) { return long_opt.clone(); } } Arg::Group { entries, .. } => { for e in entries { if let Arg::Flag { long_opt, long_rev, .. } = &e.arg { if long_rev.as_deref() == Some(rev_name) { return long_opt.clone(); } } } } _ => {} } } None } fn flag_forward_value(cmd: &Command, long_name: &str) -> String { for a in &cmd.args { if let Arg::Flag { long_opt, default_val, .. } = a { if long_opt.as_deref() == Some(long_name) { let def = default_val.as_deref().unwrap_or("false"); return if def == "true" { "false".into() } else { "true".into() }; } } } "true".into() } fn flag_forward_value_by_short(cmd: &Command, ch: char) -> String { for a in &cmd.args { if let Arg::Flag { short_opt, default_val, .. } = a { if short_opt.as_deref().and_then(|s| s.chars().next()) == Some(ch) { let def = default_val.as_deref().unwrap_or("false"); return if def == "true" { "false".into() } else { "true".into() }; } } } "true".into() } fn flag_reverse_value_by_rev(cmd: &Command, rev_name: &str) -> String { // Search top-level and group entries let check = |long_rev: &Option, default_val: &Option| -> Option { if long_rev.as_deref() == Some(rev_name) { let def = default_val.as_deref().unwrap_or("false"); Some(if def == "true" { "true".into() } else { "false".into() }) } else { None } }; for a in &cmd.args { match a { Arg::Flag { long_rev, default_val, .. } => { if let Some(v) = check(long_rev, default_val) { return v; } } Arg::Group { entries, .. } => { for e in entries { if let Arg::Flag { long_rev, default_val, .. } = &e.arg { if let Some(v) = check(long_rev, default_val) { return v; } } } } _ => {} } } "false".into() } fn is_short_flag(cmd: &Command, ch: char) -> bool { cmd.args.iter().any(|a| match a { Arg::Flag { short_opt, .. } => { short_opt.as_deref().and_then(|s| s.chars().next()) == Some(ch) } _ => false, }) } fn short_to_long(cmd: &Command, ch: char) -> Option { for a in &cmd.args { let (s, l) = match a { Arg::Optional { short_opt, long_opt, .. } => (short_opt.as_deref(), long_opt.clone()), Arg::Flag { short_opt, long_opt, .. } => (short_opt.as_deref(), long_opt.clone()), Arg::Group { entries, .. } => { // Search inside group entries for entry in entries { let (es, el) = match &entry.arg { Arg::Optional { short_opt, long_opt, .. } => (short_opt.as_deref(), long_opt.clone()), Arg::Flag { short_opt, long_opt, .. } => (short_opt.as_deref(), long_opt.clone()), _ => (None, None), }; if es.and_then(|s| s.chars().next()) == Some(ch) { return el.or_else(|| Some(ch.to_string())); } } (None, None) } _ => (None, None), }; if s.and_then(|s| s.chars().next()) == Some(ch) { return l; } } None } ================================================ FILE: data/rust/morloc-nexus/src/help.rs ================================================ //! Help text generation matching the C nexus output format. use crate::manifest::{Arg, Command, GroupEntry, Manifest}; /// Print nexus-level usage (no manifest loaded). pub fn print_nexus_usage(prog_name: &str) -> ! { eprintln!("Usage: {} [OPTION...] COMMAND [ARG...]", prog_name); eprintln!(); eprintln!("morloc-nexus is the morloc program dispatcher."); eprintln!(); eprintln!("Arguments:"); eprintln!(" Path to a .manifest file or wrapper script"); eprintln!(); eprintln!("Nexus options:"); eprintln!(" -h, --help Print this help message"); eprintln!(" -p, --print Pretty-print output for human consumption"); eprintln!(" -o, --output-file Print to this file instead of STDOUT"); eprintln!(" -f, --output-format Output format [json|mpk|voidstar]"); eprintln!(); eprintln!("Daemon mode:"); eprintln!(" --daemon Run as a long-lived daemon"); eprintln!(" --http-port PORT Listen on HTTP port"); eprintln!(" --port PORT Listen on TCP port"); eprintln!(" --socket PATH Listen on Unix socket"); eprintln!(" --eval-timeout SECS Timeout for /eval requests (default: 30)"); eprintln!(); eprintln!("Router mode:"); eprintln!(" --router Run as a multi-program router"); eprintln!(" --fdb Path to fdb manifest directory"); std::process::exit(0); } /// Print usage for a multi-command program. pub fn print_usage(prog_name: &str, manifest: &Manifest) -> ! { eprintln!("Usage: {} [OPTION...] COMMAND [ARG...]", prog_name); // Module-level description if !manifest.desc.is_empty() { eprintln!(); for line in &manifest.desc { eprintln!("{}", line); } } eprintln!(); eprintln!("Nexus options:"); eprintln!(" -h, --help Print this help message"); eprintln!(" -p, --print Pretty-print output for human consumption"); eprintln!(" -o, --output-file Print to this file instead of STDOUT"); eprintln!(" -f, --output-format Output format [json|mpk|voidstar]"); eprintln!(); eprintln!("Daemon mode:"); eprintln!(" --daemon Run as a long-lived daemon"); eprintln!(" --http-port PORT Listen on HTTP port"); eprintln!(" --port PORT Listen on TCP port"); eprintln!(" --socket PATH Listen on Unix socket"); eprintln!(); // Ungrouped commands let ungrouped: Vec<&Command> = manifest .commands .iter() .filter(|c| c.group.is_none()) .collect(); if !ungrouped.is_empty() { eprintln!("Commands (call with -h/--help for more info):"); let longest = ungrouped.iter().map(|c| c.name.len()).max().unwrap_or(0); for cmd in &ungrouped { eprint!(" {}", cmd.name); if let Some(first) = cmd.desc.first() { let pad = longest - cmd.name.len() + 2; eprint!("{:pad$}{}", "", first, pad = pad); } eprintln!(); } } if !manifest.groups.is_empty() { if !ungrouped.is_empty() { eprintln!(); } eprintln!("Command groups (call with -h/--help for more info):"); let longest = manifest.groups.iter().map(|g| g.name.len()).max().unwrap_or(0); for grp in &manifest.groups { eprint!(" {}", grp.name); if let Some(first) = grp.desc.first() { let pad = longest - grp.name.len() + 2; eprint!("{:pad$}{}", "", first, pad = pad); } eprintln!(); } } // Epilogues for epilogue in &manifest.epilogues { eprintln!(); for line in epilogue { eprintln!("{}", line); } } std::process::exit(0); } /// Print usage for a command group. pub fn print_group_usage(prog_name: &str, manifest: &Manifest, group_name: &str) -> ! { let grp = manifest.groups.iter().find(|g| g.name == group_name); eprintln!("Usage: {} {} COMMAND [ARG...]", prog_name, group_name); if let Some(g) = grp { if !g.desc.is_empty() { eprintln!(); for line in &g.desc { eprintln!("{}", line); } } } eprintln!("\nCommands:"); let cmds: Vec<&Command> = manifest .commands .iter() .filter(|c| c.group.as_deref() == Some(group_name)) .collect(); let longest = cmds.iter().map(|c| c.name.len()).max().unwrap_or(0); for cmd in &cmds { eprint!(" {}", cmd.name); if let Some(first) = cmd.desc.first() { let pad = longest - cmd.name.len() + 2; eprint!("{:pad$}{}", "", first, pad = pad); } eprintln!(); } std::process::exit(0); } /// Print help for a specific subcommand. pub fn print_command_help(prog_name: &str, cmd: &Command) -> ! { // Usage line if let Some(ref group) = cmd.group { eprint!("Usage: {} {} {}", prog_name, group, cmd.name); } else { eprint!("Usage: {} {}", prog_name, cmd.name); } print_usage_suffix(cmd); eprintln!(); if !cmd.desc.is_empty() { eprintln!(); } print_command_body(cmd); std::process::exit(0); } /// Print help for a single-command program. pub fn print_command_help_single(prog_name: &str, cmd: &Command) -> ! { eprint!("Usage: {}", prog_name); print_usage_suffix(cmd); eprintln!(); // Description if !cmd.desc.is_empty() { eprintln!(); for (i, line) in cmd.desc.iter().enumerate() { if i == 0 && line.is_empty() { continue; } eprintln!("{}", line); } } // Nexus options eprintln!("\nNexus options:"); eprintln!(" --print Pretty-print output for human consumption"); eprintln!(" --output-file Print to this file instead of STDOUT"); eprintln!(" --output-form Output format [json|mpk|voidstar]"); eprintln!("\nDaemon mode:"); eprintln!(" --daemon Run as a long-lived daemon"); eprintln!(" --http-port PORT Listen on HTTP port"); eprintln!(" --port PORT Listen on TCP port"); eprintln!(" --socket PATH Listen on UNIX socket"); print_args_body(cmd); print_type_definitions(cmd); print_return_info(cmd); std::process::exit(0); } // -- Helpers ---------------------------------------------------------------- fn print_usage_suffix(cmd: &Command) { let has_opts = cmd.args.iter().any(|a| !matches!(a, Arg::Positional { .. })); if has_opts { eprint!(" [OPTION...]"); } for arg in &cmd.args { if let Arg::Positional { metavar, .. } = arg { eprint!(" {}", metavar.as_deref().unwrap_or("ARG")); } } } fn print_command_body(cmd: &Command) { // Description if !cmd.desc.is_empty() { for (i, line) in cmd.desc.iter().enumerate() { if i == 0 && line.is_empty() { continue; } eprintln!("{}", line); } } print_args_body(cmd); print_type_definitions(cmd); print_return_info(cmd); } fn print_args_body(cmd: &Command) { // Positional arguments let has_pos = cmd.args.iter().any(|a| matches!(a, Arg::Positional { .. })); if has_pos { eprintln!("\nPositional arguments:"); for arg in &cmd.args { if let Arg::Positional { metavar, type_desc, desc, .. } = arg { eprint!(" {}", metavar.as_deref().unwrap_or("ARG")); if let Some(first) = desc.first() { eprint!(" {}", first); } eprintln!(); if let Some(td) = type_desc { eprintln!(" type: {}", td); } } } } // Optional arguments (opts and flags) let has_opt = cmd .args .iter() .any(|a| matches!(a, Arg::Optional { .. } | Arg::Flag { .. })); if has_opt { eprintln!("\nOptional arguments:"); for arg in &cmd.args { print_opt_or_flag(arg); } } // Group arguments for arg in &cmd.args { if let Arg::Group { metavar, desc, group_opt, entries, .. } = arg { eprintln!("\nGroup arguments:"); eprint!(" {}", metavar.as_deref().unwrap_or("")); if let Some(first) = desc.first() { eprint!(": {}", first); } eprintln!(); if let Some(go) = group_opt { eprint!(" "); if let Some(ref s) = go.short_opt { eprint!("-{}, ", s); } if let Some(ref l) = go.long_opt { eprint!("--{} {}", l, metavar.as_deref().unwrap_or("")); } eprintln!(); eprintln!(" provide record as file or JSON string"); } for entry in entries { print_group_entry(entry); } } } } fn print_opt_or_flag(arg: &Arg) { match arg { Arg::Optional { short_opt, long_opt, metavar, default_val, desc, type_desc, .. } => { eprint!(" "); match (short_opt.as_deref(), long_opt.as_deref()) { (Some(s), Some(l)) => eprint!( "-{}, --{} {}", s, l, metavar.as_deref().unwrap_or("") ), (Some(s), None) => { eprint!("-{} {}", s, metavar.as_deref().unwrap_or("")) } (None, Some(l)) => eprint!( "--{} {}", l, metavar.as_deref().unwrap_or("") ), _ => {} } eprintln!(); if let Some(d) = default_val { eprintln!(" default: {}", d); } for d in desc { eprintln!(" {}", d); } if let Some(td) = type_desc { eprintln!(" type: {}", td); } } Arg::Flag { short_opt, long_opt, long_rev, default_val, desc, .. } => { eprint!(" "); match (short_opt.as_deref(), long_opt.as_deref()) { (Some(s), Some(l)) => eprint!("-{}, --{}", s, l), (Some(s), None) => eprint!("-{}", s), (None, Some(l)) => eprint!("--{}", l), _ => {} } eprintln!(); if let Some(rev) = long_rev { eprintln!(" --{}", rev); } if let Some(d) = default_val { eprintln!(" default: {}", d); } for d in desc { eprintln!(" {}", d); } } _ => {} } } fn print_group_entry(entry: &GroupEntry) { let ea = &entry.arg; eprint!(" "); match ea { Arg::Optional { short_opt, long_opt, metavar, default_val, desc, .. } => { match (short_opt.as_deref(), long_opt.as_deref()) { (Some(s), Some(l)) => { eprint!("-{}, --{}", s, l); if let Some(m) = metavar { eprint!(" {}", m); } } (Some(s), None) => { eprint!("-{}", s); if let Some(m) = metavar { eprint!(" {}", m); } } (None, Some(l)) => { eprint!("--{}", l); if let Some(m) = metavar { eprint!(" {}", m); } } _ => {} } eprintln!(); if let Some(d) = default_val { eprintln!(" default: {}", d); } for d in desc { eprintln!(" {}", d); } } Arg::Flag { short_opt, long_opt, default_val, desc, .. } => { match (short_opt.as_deref(), long_opt.as_deref()) { (Some(s), Some(l)) => eprint!("-{}, --{}", s, l), (Some(s), None) => eprint!("-{}", s), (None, Some(l)) => eprint!("--{}", l), _ => {} } eprintln!(); if let Some(d) = default_val { eprintln!(" default: {}", d); } for d in desc { eprintln!(" {}", d); } } _ => {} } } fn print_return_info(cmd: &Command) { eprintln!("\nReturn: {}", cmd.ret.type_desc); for line in &cmd.ret.desc { eprintln!(" {}", line); } } // -- Schema-walking renderer for the Record / Table Schemas sections --------- // // In v2 the manifest no longer carries a parallel `type_definitions` list. // The same information is reconstructed at help-render time by walking // each command's args + return value: for every typed entry whose schema // parses to a Map at the top level, we treat the entry's `type` name as // the named-type label and its schema's keys + parameter schemas as the // field list. The record-vs-table distinction comes from the entry's // `kind` constraint. /// A rendered named-type layout for the help output, sourced from a /// parsed schema. struct TypeLayout<'a> { name: &'a str, /// "record" | "object" | "table" -- comes from the `kind` constraint. kind: &'a str, /// (field_name, rendered_type) fields: Vec<(String, String)>, } /// Pretty-render a parsed `Schema` as a morloc-flavored type string, /// suitable for the field-type column in the schemas block. fn render_schema_type(s: &morloc_runtime::schema::Schema) -> String { use morloc_runtime::schema::SerialType::*; match s.serial_type { Nil => "()".into(), Bool => "Bool".into(), Sint8 => "Int8".into(), Sint16 => "Int16".into(), Sint32 => "Int".into(), Sint64 => "Int64".into(), Uint8 => "UInt8".into(), Uint16 => "UInt16".into(), Uint32 => "UInt32".into(), Uint64 => "UInt64".into(), Float32 => "Float32".into(), Float64 => "Real".into(), String => "Str".into(), Array => format!( "[{}]", s.parameters .first() .map(render_schema_type) .unwrap_or_else(|| "?".into()) ), Tuple => { // `String` here is fully qualified because the surrounding // match brings `SerialType::String` into scope as a variant, // shadowing the std `String` type. let inner: Vec = s.parameters.iter().map(render_schema_type).collect(); format!("({})", inner.join(", ")) } Map => { // A nested record-ish thing. Use the hint string when present // (which carries the language-specific concrete type name); // otherwise show an inline placeholder. Either way, the // nested record will be listed separately in the same // schema block if its name appears as another arg's type. s.hint.clone().unwrap_or_else(|| "{..}".into()) } Optional => format!( "?{}", s.parameters .first() .map(render_schema_type) .unwrap_or_else(|| "?".into()) ), Tensor => format!( "Tensor<{}>", s.parameters .first() .map(render_schema_type) .unwrap_or_else(|| "?".into()) ), } } /// Try to extract a `TypeLayout` from a (name, schema_string, kind) /// triple. Returns None if any input is missing or the schema does not /// parse to a top-level Map. Tables (whose fields are arrays in the wire /// schema) render their fields by the array's element type, mirroring /// how the user wrote them in the source. fn extract_named_layout<'a>( type_name: Option<&'a str>, schema_str: Option<&str>, kind: Option<&'a str>, ) -> Option> { use morloc_runtime::schema::SerialType; let name = type_name?; let schema = schema_str?; let kind = kind?; let parsed = morloc_runtime::schema::parse_schema(schema).ok()?; if parsed.serial_type != SerialType::Map { return None; } // For a table, every field's wire schema is an Array -- peel one layer // off so the user sees `name :: Str` instead of `name :: [Str]`. let strip_array = kind == "table"; let fields = parsed .keys .iter() .zip(parsed.parameters.iter()) .map(|(k, p)| { let inner = if strip_array && p.serial_type == SerialType::Array { p.parameters.first().unwrap_or(p) } else { p }; (k.clone(), render_schema_type(inner)) }) .collect(); Some(TypeLayout { name, kind, fields }) } /// Walk every arg + the return of a command. For each typed entry, try /// to build a layout. Deduplicate by type name, preserving discovery /// order so the rendering matches the order types appear in the /// signature. fn collect_command_layouts<'a>(cmd: &'a Command) -> Vec> { use std::collections::HashSet; let mut seen: HashSet<&str> = HashSet::new(); let mut out: Vec> = Vec::new(); for arg in &cmd.args { // Skip unrolled groups without a group_opt: each field already // appears as its own flag in the usage, so the schema is redundant. // Keep the schema when group_opt is present (the user can pass the // entire record as JSON and needs the full field spec). if let Arg::Group { group_opt: None, .. } = arg { continue; } if let Some(layout) = extract_named_layout(arg.type_desc_str(), arg.schema_str(), arg.kind_constraint()) { if seen.insert(layout.name) { out.push(layout); } } } let ret_kind = cmd .ret .constraints .iter() .find(|c| c.ctype == "kind") .and_then(|c| c.value.as_ref().and_then(|v| v.as_str())); if let Some(layout) = extract_named_layout(Some(&cmd.ret.type_desc), Some(&cmd.ret.schema), ret_kind) { if seen.insert(layout.name) { out.push(layout); } } out } /// Print the Record Schemas / Table Schemas sections for any named /// types referenced in this command's signature. The whole block is /// skipped when there are none. fn print_type_definitions(cmd: &Command) { let layouts = collect_command_layouts(cmd); if layouts.is_empty() { return; } let records: Vec<&TypeLayout> = layouts.iter().filter(|l| l.kind != "table").collect(); let tables: Vec<&TypeLayout> = layouts.iter().filter(|l| l.kind == "table").collect(); if !records.is_empty() { eprintln!("\nRecord Schemas:"); print_layouts(&records); } if !tables.is_empty() { eprintln!("\nTable Schemas:"); print_layouts(&tables); } } /// Render a list of layouts. Each layout shows its type name on its own /// line followed by the field list with `::`-aligned column widths. /// Definitions are separated by blank lines. fn print_layouts(defs: &[&TypeLayout]) { for (i, def) in defs.iter().enumerate() { if i > 0 { eprintln!(); } eprintln!(" {}", def.name); let name_width = def .fields .iter() .map(|(k, _)| k.len()) .max() .unwrap_or(0); for (k, v) in &def.fields { eprintln!(" {:width$} :: {}", k, v, width = name_width); } } } ================================================ FILE: data/rust/morloc-nexus/src/main.rs ================================================ //! Morloc Nexus: CLI dispatcher for multi-language pool orchestration. //! //! Replaces data/nexus.c. Entry point for all morloc programs. //! Reads a .manifest JSON, spawns language pool daemons, and routes //! function calls to them over Unix sockets. mod dispatch; mod help; mod manifest; mod process; use dispatch::NexusConfig; /// Resolve the morloc data directory: MORLOC_HOME if set, else ~/.local/share/morloc. fn morloc_home() -> String { std::env::var("MORLOC_HOME").unwrap_or_else(|_| { format!( "{}/.local/share/morloc", std::env::var("HOME").unwrap_or_else(|_| "/root".into()) ) }) } fn main() { let args: Vec = std::env::args().collect(); let mut config = NexusConfig::default(); // First pass: parse nexus-level options let opt_end = dispatch::parse_nexus_options(&args, &mut config); // Handle --router mode (no manifest needed) if config.router_flag { run_router(&config); std::process::exit(0); } // If -h with no manifest argument, show nexus help let prog_name = args.first().map(|s| s.as_str()).unwrap_or("morloc-nexus"); if config.help_flag && opt_end >= args.len() { help::print_nexus_usage(prog_name); } // Manifest path: either an explicit argument or derived from argv[0]. // In daemon mode (`./test --daemon`), the manifest is at `.manifest`. // In normal mode (`./test add 1 2`), argv[0] is also the manifest source. // An explicit path argument is only needed for multi-command mode. let manifest_path = if opt_end < args.len() { args[opt_end].clone() } else if config.daemon_flag || config.router_flag { // Daemon/router: derive from argv[0] args[0].clone() } else { help::print_nexus_usage(prog_name) }; let prog_name = std::path::Path::new(&manifest_path) .file_name() .and_then(|n| n.to_str()) .unwrap_or(&manifest_path) .to_string(); let mut arg_cursor = if opt_end < args.len() { opt_end + 1 } else { args.len() }; // Read and parse manifest let payload = match manifest::read_manifest_payload(&manifest_path) { Ok(p) => p, Err(e) => { eprintln!("Failed to load manifest '{}': {}", manifest_path, e); std::process::exit(1); } }; let manifest = match manifest::parse_manifest(&payload) { Ok(m) => m, Err(e) => { eprintln!("Failed to parse manifest '{}': {}", manifest_path, e); std::process::exit(1); } }; let single_command = manifest.commands.len() == 1 && manifest.groups.is_empty(); // Second pass: parse options after manifest path (skip in single-command mode) let mut remaining_args = args.clone(); if !single_command { arg_cursor = dispatch::parse_nexus_options(&args[opt_end..], &mut config) + opt_end; } else { // In single-command mode, extract daemon/server long options manually dispatch::extract_global_options(&mut remaining_args, &mut config); } // Pool paths in the manifest are absolute, so no chdir is needed. // This lets user programs resolve file paths relative to the caller's CWD. // Source imports in pools resolve via __file__-relative paths (Python sys.path) // or script-relative paths (R .morloc.source) rather than depending on CWD. // Validate pool executables exist if let Err(e) = process::validate_pools(&manifest.pools) { eprintln!("Error: {}", e); std::process::exit(1); } // Handle help flag with manifest loaded if config.help_flag { if single_command { help::print_command_help_single(&prog_name, &manifest.commands[0]); } else { help::print_usage(&prog_name, &manifest); } } // Setup tmpdir and SHM let tmpdir = match process::make_tmpdir() { Ok(t) => t, Err(e) => { eprintln!("Error: {}", e); std::process::exit(1); } }; process::set_tmpdir(tmpdir.clone()); let job_hash = process::make_job_hash(42); let shm_basename = format!("morloc-{}", job_hash); // Initialize shared memory via libmorloc.so using dlsym. // CRITICAL: We must use dlsym to call the CDYLIB's shinit, not the rlib's. // The rlib and cdylib have separate static globals (VOLUMES, ALLOC_MUTEX, etc.). // All SHM operations in pool-facing C code go through the cdylib's globals. // If we call the rlib's shinit, the cdylib's globals stay uninitialized. { let _lib = unsafe { libc::dlopen(std::ptr::null(), libc::RTLD_NOW) }; // RTLD_DEFAULT (NULL handle) searches in order: executable, then loaded libs // But the rlib symbols come first. Use RTLD_NEXT-style lookup via the .so path. let lib_path = std::ffi::CString::new( format!("{}/lib/libmorloc.so", morloc_home()) ).unwrap(); let lib = unsafe { libc::dlopen(lib_path.as_ptr(), libc::RTLD_NOW | libc::RTLD_GLOBAL) }; if lib.is_null() { let err = unsafe { libc::dlerror() }; let err_msg = if err.is_null() { "unknown error".to_string() } else { unsafe { std::ffi::CStr::from_ptr(err) }.to_string_lossy().into_owned() }; eprintln!("Error: failed to load libmorloc.so: {}", err_msg); process::clean_exit(1); } type ShmSetFallbackFn = unsafe extern "C" fn(*const std::ffi::c_char); type ShinitFn = unsafe extern "C" fn(*const std::ffi::c_char, usize, usize, *mut *mut std::ffi::c_char) -> *mut std::ffi::c_void; let set_fb_sym = std::ffi::CString::new("shm_set_fallback_dir").unwrap(); let shinit_sym = std::ffi::CString::new("shinit").unwrap(); let set_fb: ShmSetFallbackFn = unsafe { std::mem::transmute(libc::dlsym(lib, set_fb_sym.as_ptr())) }; let do_shinit: ShinitFn = unsafe { std::mem::transmute(libc::dlsym(lib, shinit_sym.as_ptr())) }; let tmpdir_c = std::ffi::CString::new(tmpdir.as_str()).unwrap(); let basename_c = std::ffi::CString::new(shm_basename.as_str()).unwrap(); let mut errmsg: *mut std::ffi::c_char = std::ptr::null_mut(); unsafe { set_fb(tmpdir_c.as_ptr()); let shm = do_shinit(basename_c.as_ptr(), 0, 0xffff, &mut errmsg); if shm.is_null() { let msg = if !errmsg.is_null() { let s = std::ffi::CStr::from_ptr(errmsg).to_string_lossy().into_owned(); libc::free(errmsg as *mut std::ffi::c_void); s } else { "unknown error".into() }; eprintln!("Error: failed to initialize shared memory: {}", msg); process::clean_exit(1); } } unsafe { libc::dlclose(lib) }; } // Become subreaper for orphaned grandchildren process::set_child_subreaper(); // Install signal handlers process::install_signal_handlers(); // Setup sockets let mut sockets = process::setup_sockets(&manifest.pools, &tmpdir, &shm_basename); // Daemon mode if config.daemon_flag { let all_indices: Vec = (0..manifest.pools.len()).collect(); if let Err(e) = process::start_daemons(&mut sockets, &all_indices) { eprintln!("Error: {}", e); process::clean_exit(1); } // Build DaemonConfig and call daemon_run in libmorloc.so run_daemon(&config, &mut sockets, &shm_basename, &payload); process::clean_exit(0); } // Normal CLI mode if config.packet_path.is_none() { if single_command { // Single-command: dispatch directly to the command, no subcommand lookup // Allow optional command name prefix for backward compatibility let mut cmd_arg_start = arg_cursor; if cmd_arg_start < remaining_args.len() && remaining_args[cmd_arg_start] == manifest.commands[0].name { cmd_arg_start += 1; } dispatch::dispatch_command( &remaining_args, cmd_arg_start, &config, &manifest, &manifest.commands[0], &mut sockets, &prog_name, ); } else { if arg_cursor >= remaining_args.len() { help::print_usage(&prog_name, &manifest); } dispatch::dispatch( &remaining_args, arg_cursor, &shm_basename, &config, &manifest, &mut sockets, &prog_name, ); } } else { // Call-packet mode: read a pre-built call packet from file, // send to the appropriate pool, write result as MessagePack. // Used by SLURM workers on remote compute nodes. run_call_packet(&config, &tmpdir); } process::clean_exit(0); } /// Run the daemon event loop by calling daemon_run in libmorloc.so. fn run_daemon( config: &dispatch::NexusConfig, sockets: &mut [process::PoolSocket], shm_basename: &str, manifest_payload: &str, ) { use std::ffi::{c_char, c_void, CString}; use std::ptr; // daemon_run and parse_manifest signatures from libmorloc.so extern "C" { fn daemon_run( config: *mut c_void, // *mut DaemonConfig manifest: *mut c_void, // *mut Manifest (opaque) sockets: *mut c_void, // *mut MorlocSocket n_pools: usize, shm_basename: *const c_char, ); fn parse_manifest(text: *const c_char, errmsg: *mut *mut c_char) -> *mut c_void; } // Build C MorlocSocket array (matches daemon_ffi::MorlocSocket layout) #[repr(C)] struct CMorlocSocket { lang: *mut c_char, syscmd: *mut *mut c_char, socket_filename: *mut c_char, pid: i32, } let n_pools = sockets.len(); let mut c_sockets: Vec = Vec::with_capacity(n_pools); // Keep CStrings alive for the duration let mut _keepalive: Vec> = Vec::new(); for sock in sockets.iter() { let lang_c = CString::new(sock.lang.as_str()).unwrap(); let socket_c = CString::new(sock.socket_path.as_str()).unwrap(); // Build NULL-terminated syscmd array let mut cmd_ptrs: Vec<*mut c_char> = Vec::new(); let mut cmd_strs: Vec = Vec::new(); for arg in &sock.syscmd { let c = CString::new(arg.to_bytes()).unwrap(); cmd_ptrs.push(c.as_ptr() as *mut c_char); cmd_strs.push(c); } cmd_ptrs.push(ptr::null_mut()); c_sockets.push(CMorlocSocket { lang: lang_c.as_ptr() as *mut c_char, syscmd: cmd_ptrs.as_ptr() as *mut *mut c_char, socket_filename: socket_c.as_ptr() as *mut c_char, pid: sock.pid, }); // Keep everything alive _keepalive.push(cmd_strs); _keepalive.push(vec![lang_c, socket_c]); } // Build C DaemonConfig (matches daemon_ffi::DaemonConfig layout) #[repr(C)] struct CDaemonConfig { unix_socket_path: *const c_char, tcp_port: i32, http_port: i32, pool_check_fn: *const c_void, // Option as null pool_alive_fn: *const c_void, // Option as null n_pools: usize, eval_timeout: i32, } let unix_socket_cstr = config.unix_socket_path.as_ref() .map(|p| CString::new(p.as_str()).unwrap()); let mut daemon_config = CDaemonConfig { unix_socket_path: unix_socket_cstr.as_ref() .map_or(ptr::null(), |c| c.as_ptr()), tcp_port: config.tcp_port.unwrap_or(0), http_port: config.http_port.unwrap_or(0), pool_check_fn: ptr::null(), pool_alive_fn: process::pool_is_alive_ptr(), n_pools, eval_timeout: config.eval_timeout, }; // Parse manifest via the C FFI (so daemon_run gets the C-layout manifest). // The payload was already extracted from the wrapper script by the main flow. let manifest_c_str = CString::new(manifest_payload).unwrap(); let mut errmsg: *mut c_char = ptr::null_mut(); let c_manifest = unsafe { parse_manifest(manifest_c_str.as_ptr(), &mut errmsg) }; if c_manifest.is_null() { let msg = if !errmsg.is_null() { let s = unsafe { std::ffi::CStr::from_ptr(errmsg) }.to_string_lossy().into_owned(); unsafe { libc::free(errmsg as *mut c_void) }; s } else { "unknown error".into() }; eprintln!("Error: failed to parse manifest for daemon: {}", msg); process::clean_exit(1); } let shm_c = CString::new(shm_basename).unwrap(); unsafe { daemon_run( &mut daemon_config as *mut CDaemonConfig as *mut c_void, c_manifest, c_sockets.as_mut_ptr() as *mut c_void, n_pools, shm_c.as_ptr(), ); } } /// Run the multi-program router daemon. /// Scans the fdb directory for .manifest files and serves them all via HTTP/TCP/Unix. fn run_router(config: &dispatch::NexusConfig) { use std::ffi::{c_char, c_void, CString}; use std::ptr; extern "C" { fn router_init(fdb_path: *const c_char, errmsg: *mut *mut c_char) -> *mut c_void; fn router_run(config: *mut c_void, router: *mut c_void); fn router_free(router: *mut c_void); } let fdb_path = config.fdb_path.clone().unwrap_or_else(|| { format!("{}/fdb", morloc_home()) }); let fdb_c = CString::new(fdb_path.as_str()).unwrap(); let mut errmsg: *mut c_char = ptr::null_mut(); let router = unsafe { router_init(fdb_c.as_ptr(), &mut errmsg) }; if router.is_null() { let msg = if !errmsg.is_null() { let s = unsafe { std::ffi::CStr::from_ptr(errmsg) }.to_string_lossy().into_owned(); unsafe { libc::free(errmsg as *mut c_void) }; s } else { "unknown error".into() }; eprintln!("Error: failed to initialize router: {}", msg); std::process::exit(1); } // Build DaemonConfig for the router #[repr(C)] struct CDaemonConfig { unix_socket_path: *const c_char, tcp_port: i32, http_port: i32, pool_check_fn: *const c_void, pool_alive_fn: *const c_void, n_pools: usize, eval_timeout: i32, } let unix_cstr = config.unix_socket_path.as_ref() .map(|p| CString::new(p.as_str()).unwrap()); let mut dc = CDaemonConfig { unix_socket_path: unix_cstr.as_ref().map_or(ptr::null(), |c| c.as_ptr()), tcp_port: config.tcp_port.unwrap_or(0), http_port: config.http_port.unwrap_or(0), pool_check_fn: ptr::null(), pool_alive_fn: ptr::null(), n_pools: 0, eval_timeout: if config.eval_timeout > 0 { config.eval_timeout } else { 30 }, }; unsafe { router_run(&mut dc as *mut CDaemonConfig as *mut c_void, router); router_free(router); } } /// Run a pre-built call packet on a remote worker node (SLURM mode). /// Reads a call packet from file, sends it to the pool, writes result as MessagePack. fn run_call_packet(config: &dispatch::NexusConfig, tmpdir: &str) { use std::ffi::{c_char, c_void, CString}; use std::ptr; extern "C" { fn read_binary_file( filename: *const c_char, file_size: *mut usize, errmsg: *mut *mut c_char, ) -> *mut u8; fn send_and_receive_over_socket( socket_path: *const c_char, packet: *const u8, errmsg: *mut *mut c_char, ) -> *mut u8; fn get_morloc_data_packet_error_message( data: *const u8, errmsg: *mut *mut c_char, ) -> *mut c_char; fn read_schema_from_packet_meta( packet: *const u8, errmsg: *mut *mut c_char, ) -> *mut c_char; fn parse_schema( schema_str: *const c_char, errmsg: *mut *mut c_char, ) -> *mut morloc_runtime::cschema::CSchema; fn get_morloc_data_packet_value( data: *const u8, schema: *const morloc_runtime::cschema::CSchema, errmsg: *mut *mut c_char, ) -> *mut u8; fn pack_with_schema( mlc: *const c_void, schema: *const morloc_runtime::cschema::CSchema, mpkptr: *mut *mut c_char, mpk_size: *mut usize, errmsg: *mut *mut c_char, ) -> i32; fn write_atomic( filename: *const c_char, data: *const u8, size: usize, errmsg: *mut *mut c_char, ) -> i32; fn print_morloc_data_packet( packet: *const u8, schema: *const morloc_runtime::cschema::CSchema, errmsg: *mut *mut c_char, ) -> i32; } let packet_path = config.packet_path.as_ref().unwrap(); let socket_base = match &config.socket_base { Some(s) => s.clone(), None => { eprintln!("Error: --socket-base required for call-packet mode"); process::clean_exit(1); } }; let socket_path = format!("{}/{}", tmpdir, socket_base); let packet_c = CString::new(packet_path.as_str()).unwrap(); let socket_c = CString::new(socket_path.as_str()).unwrap(); let mut errmsg: *mut c_char = ptr::null_mut(); // Read call packet from file let mut packet_size: usize = 0; let call_packet = unsafe { read_binary_file(packet_c.as_ptr(), &mut packet_size, &mut errmsg) }; if call_packet.is_null() || !errmsg.is_null() { let msg = if !errmsg.is_null() { let s = unsafe { std::ffi::CStr::from_ptr(errmsg) }.to_string_lossy().into_owned(); unsafe { libc::free(errmsg as *mut c_void) }; s } else { "unknown error".into() }; eprintln!("Error: failed to read call packet '{}': {}", packet_path, msg); process::clean_exit(1); } // Send to pool and receive response let result_packet = unsafe { send_and_receive_over_socket(socket_c.as_ptr(), call_packet, &mut errmsg) }; unsafe { libc::free(call_packet as *mut c_void) }; if result_packet.is_null() || !errmsg.is_null() { let msg = if !errmsg.is_null() { let s = unsafe { std::ffi::CStr::from_ptr(errmsg) }.to_string_lossy().into_owned(); unsafe { libc::free(errmsg as *mut c_void) }; s } else { "unknown error".into() }; eprintln!("Error: run failed: {}", msg); process::clean_exit(1); } // Check for error in response let run_err = unsafe { get_morloc_data_packet_error_message(result_packet, &mut errmsg) }; if !run_err.is_null() { let s = unsafe { std::ffi::CStr::from_ptr(run_err) }.to_string_lossy().into_owned(); unsafe { libc::free(run_err as *mut c_void) }; eprintln!("Error: run failed: {}", s); process::clean_exit(1); } // If output-form is "packet", write raw packet to output file if config.output_format == dispatch::OutputFormat::Packet { if let Some(ref output_path) = config.output_path { let schema_str = unsafe { read_schema_from_packet_meta(result_packet, &mut errmsg) }; let schema = if !schema_str.is_null() { unsafe { parse_schema(schema_str, &mut errmsg) } } else { ptr::null_mut() }; unsafe { print_morloc_data_packet(result_packet, schema, &mut errmsg); }; // Also write as msgpack file if !schema.is_null() { let mlc = unsafe { get_morloc_data_packet_value(result_packet, schema, &mut errmsg) }; if !mlc.is_null() && errmsg.is_null() { let mut mpk_data: *mut c_char = ptr::null_mut(); let mut mpk_size: usize = 0; unsafe { pack_with_schema(mlc as *const c_void, schema, &mut mpk_data, &mut mpk_size, &mut errmsg) }; if !mpk_data.is_null() && errmsg.is_null() { let mpk_filename = format!("{}.mpk", output_path); let mpk_c = CString::new(mpk_filename.as_str()).unwrap(); unsafe { write_atomic(mpk_c.as_ptr(), mpk_data as *const u8, mpk_size, &mut errmsg) }; unsafe { libc::free(mpk_data as *mut c_void) }; } } } } } unsafe { libc::free(result_packet as *mut c_void) }; } ================================================ FILE: data/rust/morloc-nexus/src/manifest.rs ================================================ //! Manifest types -- thin re-export shim over the canonical schema //! definitions in the `morloc-manifest` crate. //! //! This module exists only so that existing code in this crate (and //! its consumers) can keep saying `crate::manifest::Manifest` without //! caring whether the types live here or in a sibling crate. The //! actual schema, with full doc comments and parsing logic, lives in //! `data/rust/morloc-manifest/src/lib.rs`. pub use morloc_manifest::{ parse_manifest, read_manifest_payload, Arg, Command, GroupEntry, Manifest, Pool, }; ================================================ FILE: data/rust/morloc-nexus/src/process.rs ================================================ //! Pool daemon process management, signal handling, and lifecycle. //! //! Replaces the fork/exec, SIGCHLD, SIGTERM, clean_exit logic from nexus.c. use std::ffi::CString; use std::path::Path; use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; use std::time::Duration; use crate::manifest::Pool; pub const MAX_DAEMONS: usize = 32; const INITIAL_PING_TIMEOUT: Duration = Duration::from_millis(10); const INITIAL_RETRY_DELAY: Duration = Duration::from_millis(1); const RETRY_MULTIPLIER: f64 = 1.25; const MAX_RETRIES: usize = 16; // ── Global state for signal handlers ─────────────────────────────────────── /// PIDs of spawned pool daemons. 0 = unused, -1 = reaped. static PIDS: [AtomicI32; MAX_DAEMONS] = { const INIT: AtomicI32 = AtomicI32::new(0); [INIT; MAX_DAEMONS] }; /// Process group IDs for cleanup. static PGIDS: [AtomicI32; MAX_DAEMONS] = { const INIT: AtomicI32 = AtomicI32::new(0); [INIT; MAX_DAEMONS] }; /// Exit statuses saved by SIGCHLD handler. static EXIT_STATUSES: [AtomicI32; MAX_DAEMONS] = { const INIT: AtomicI32 = AtomicI32::new(0); [INIT; MAX_DAEMONS] }; /// Re-entrancy guard for clean_exit. static CLEANING_UP: AtomicBool = AtomicBool::new(false); /// Global tmpdir path (set once in main, read during cleanup). static TMPDIR: std::sync::OnceLock = std::sync::OnceLock::new(); /// Socket info for each pool. /// /// Pool stderr and stdout are intentionally NOT captured or intercepted by /// the nexus: a core morloc guarantee is that anything a sourced function /// prints to stderr/stdout is passed through unchanged. Raised exceptions /// are caught inside each pool's dispatch wrapper (see pool.py/pool.cpp/ /// pool.R/pool.jl) and returned as morloc error packets, which the nexus /// then annotates with call-site context when bubbling them up. pub struct PoolSocket { pub lang: String, pub socket_path: String, pub syscmd: Vec, pub pid: i32, } // ── Signal handlers (async-signal-safe) ──────────────────────────────────── /// SIGCHLD handler: reap terminated children. extern "C" fn sigchld_handler(_sig: libc::c_int) { #[cfg(target_os = "linux")] let saved_errno = unsafe { *libc::__errno_location() }; #[cfg(target_os = "macos")] let saved_errno = unsafe { *libc::__error() }; loop { let mut status: libc::c_int = 0; let pid = unsafe { libc::waitpid(-1, &mut status, libc::WNOHANG) }; if pid <= 0 { break; } for i in 0..MAX_DAEMONS { if PIDS[i].load(Ordering::Relaxed) == pid { EXIT_STATUSES[i].store(status, Ordering::Relaxed); PIDS[i].store(-1, Ordering::Relaxed); break; } } } #[cfg(target_os = "linux")] unsafe { *libc::__errno_location() = saved_errno }; #[cfg(target_os = "macos")] unsafe { *libc::__error() = saved_errno }; } /// SIGTERM/SIGINT handler: clean shutdown. extern "C" fn signal_exit_handler(sig: libc::c_int) { if CLEANING_UP.load(Ordering::Relaxed) { unsafe { libc::_exit(128 + sig) }; } clean_exit(128 + sig); } /// Install signal handlers. pub fn install_signal_handlers() { unsafe { // SIGCHLD let mut sa: libc::sigaction = std::mem::zeroed(); sa.sa_sigaction = sigchld_handler as *const () as usize; libc::sigemptyset(&mut sa.sa_mask); sa.sa_flags = libc::SA_RESTART | libc::SA_NOCLDSTOP; libc::sigaction(libc::SIGCHLD, &sa, std::ptr::null_mut()); // SIGTERM and SIGINT let mut sa_exit: libc::sigaction = std::mem::zeroed(); sa_exit.sa_sigaction = signal_exit_handler as *const () as usize; libc::sigemptyset(&mut sa_exit.sa_mask); sa_exit.sa_flags = 0; libc::sigaction(libc::SIGTERM, &sa_exit, std::ptr::null_mut()); libc::sigaction(libc::SIGINT, &sa_exit, std::ptr::null_mut()); } } /// Set the global tmpdir for cleanup. pub fn set_tmpdir(path: String) { let _ = TMPDIR.set(path); } /// Get the tmpdir path. pub fn get_tmpdir() -> Option<&'static str> { TMPDIR.get().map(|s| s.as_str()) } // ── Clean exit ───────────────────────────────────────────────────────────── /// Terminate all pool daemons and clean up resources. /// /// Race condition with stderr output: when a pool process is dying (e.g., /// Python printing a traceback), its stderr writes may still be in a pipe /// buffer or mid-syscall when we send SIGTERM. The pool's signal handler /// (or SIG_DFL) may kill the process before its output reaches the /// terminal. We mitigate this by: /// 1. Flushing the nexus's own stderr first (so our error message is out) /// 2. Giving pools 200ms after SIGTERM before escalating to SIGKILL /// (up from the previous 50ms, which was too short for Python's /// atexit handlers and multiprocessing cleanup to flush buffers) pub fn clean_exit(exit_code: i32) -> ! { CLEANING_UP.store(true, Ordering::SeqCst); // Flush nexus stderr so our error messages are visible even if // the process is killed by a parent (e.g., shell pipeline). unsafe { libc::fsync(2) }; // Block SIGCHLD during cleanup unsafe { let mut block_chld: libc::sigset_t = std::mem::zeroed(); libc::sigemptyset(&mut block_chld); libc::sigaddset(&mut block_chld, libc::SIGCHLD); libc::sigprocmask(libc::SIG_BLOCK, &block_chld, std::ptr::null_mut()); } // Send SIGTERM to all pool process groups for i in 0..MAX_DAEMONS { let pgid = PGIDS[i].load(Ordering::Relaxed); if pgid > 0 { unsafe { libc::kill(-pgid, libc::SIGTERM) }; } } // Wait for groups to exit (up to 200ms per group, then SIGKILL). // The 200ms window serves two purposes: // - Lets pool signal handlers run (Python's signal_handler in pool.py // calls close_daemon and cleans up shared memory) // - Lets any pending stderr writes (tracebacks, error messages) drain // to the terminal before the process is force-killed for i in 0..MAX_DAEMONS { let pgid = PGIDS[i].load(Ordering::Relaxed); if pgid <= 0 { continue; } // Reap any available children while unsafe { libc::waitpid(-1, std::ptr::null_mut(), libc::WNOHANG) } > 0 {} if unsafe { libc::kill(-pgid, 0) } == -1 { continue; } let mut group_dead = false; for _ in 0..100 { while unsafe { libc::waitpid(-1, std::ptr::null_mut(), libc::WNOHANG) } > 0 {} if unsafe { libc::kill(-pgid, 0) } == -1 { group_dead = true; break; } std::thread::sleep(Duration::from_millis(2)); } if !group_dead { unsafe { libc::kill(-pgid, libc::SIGKILL) }; std::thread::sleep(Duration::from_millis(50)); } } // Final reap while unsafe { libc::waitpid(-1, std::ptr::null_mut(), libc::WNOHANG) } > 0 {} // Clean up shared memory segments extern "C" { fn shclose(errmsg: *mut *mut std::ffi::c_char) -> bool; } unsafe { let mut err: *mut std::ffi::c_char = std::ptr::null_mut(); shclose(&mut err); if !err.is_null() { libc::free(err as *mut libc::c_void); } } // Clean up tmpdir if let Some(dir) = get_tmpdir() { let _ = std::fs::remove_dir_all(dir); } std::process::exit(exit_code); } // ── Pool daemon spawning ─────────────────────────────────────────────────── /// Setup socket descriptors for all pools from the manifest. pub fn setup_sockets(pools: &[Pool], tmpdir: &str, shm_basename: &str) -> Vec { pools .iter() .map(|pool| { let socket_path = format!("{}/{}", tmpdir, pool.socket); // Build syscmd: exec_args... socket_path tmpdir shm_basename let mut syscmd: Vec = pool .exec .iter() .map(|s| CString::new(s.as_str()).unwrap()) .collect(); syscmd.push(CString::new(socket_path.as_str()).unwrap()); syscmd.push(CString::new(tmpdir).unwrap()); syscmd.push(CString::new(shm_basename).unwrap()); PoolSocket { lang: pool.lang.clone(), socket_path, syscmd, pid: 0, } }) .collect() } /// Fork and exec a language pool daemon. Returns child PID. /// /// The child inherits the nexus's stdin/stdout/stderr unchanged: anything a /// sourced function prints must reach the terminal byte-for-byte without /// morloc interposing. Runtime errors raised inside the pool are caught by /// the pool's own dispatch wrapper and returned as morloc error packets. fn start_language_server(socket: &PoolSocket) -> Result { let pid = unsafe { libc::fork() }; if pid == 0 { // Child process unsafe { libc::setpgid(0, 0) }; let argv: Vec<*const libc::c_char> = socket .syscmd .iter() .map(|s| s.as_ptr()) .chain(std::iter::once(std::ptr::null())) .collect(); unsafe { libc::execvp(argv[0], argv.as_ptr()); } // Only reached if exec fails. eprintln!( "execvp failed for {}: {}", socket.lang, std::io::Error::last_os_error() ); unsafe { libc::_exit(127) }; } else if pid > 0 { // Parent: ensure child is in its own process group unsafe { libc::setpgid(pid, pid) }; Ok(pid) } else { Err(format!("fork failed: {}", std::io::Error::last_os_error())) } } /// Start pool daemons for the given socket indices and wait for them to respond to pings. pub fn start_daemons(sockets: &mut [PoolSocket], indices: &[usize]) -> Result<(), String> { for &idx in indices { let pid = start_language_server(&sockets[idx])?; sockets[idx].pid = pid; PIDS[idx].store(pid, Ordering::Relaxed); PGIDS[idx].store(pid, Ordering::Relaxed); } // Wait for each daemon to respond to pings for &idx in indices { wait_for_daemon(&sockets[idx], idx)?; } Ok(()) } /// Ping a daemon with exponential backoff until it responds. /// Matches the C nexus behavior: initial delay 1ms, multiplier 1.25, /// plus socket timeout that doubles from 10ms to ~10s. fn wait_for_daemon(socket: &PoolSocket, pool_index: usize) -> Result<(), String> { use morloc_runtime::packet::PacketHeader; use std::os::unix::net::UnixStream; use std::io::{Read, Write}; let ping = PacketHeader::ping(); let ping_bytes = ping.to_bytes(); let mut retry_delay = INITIAL_RETRY_DELAY.as_secs_f64(); let mut ping_timeout = INITIAL_PING_TIMEOUT; for attempt in 0..=MAX_RETRIES { // Check if child already died. The pool's stderr was inherited // directly, so any traceback it printed is already on the user's // terminal; the nexus just reports the exit status here. if PIDS[pool_index].load(Ordering::Relaxed) == -1 { let status = EXIT_STATUSES[pool_index].load(Ordering::Relaxed); return Err(format!( "Pool process for '{}' died unexpectedly (status: {})", socket.lang, status )); } // Try to connect and ping match UnixStream::connect(&socket.socket_path) { Ok(mut stream) => { let _ = stream.set_read_timeout(Some(ping_timeout)); let _ = stream.set_write_timeout(Some(ping_timeout)); if stream.write_all(&ping_bytes).is_ok() { let mut resp = [0u8; 32]; if stream.read_exact(&mut resp).is_ok() { if let Ok(hdr) = PacketHeader::from_bytes(&resp) { if hdr.is_ping() { return Ok(()); } } } } } Err(_) => {} } if attempt == MAX_RETRIES { return Err(format!( "Failed to ping pool '{}' at {} after {} retries", socket.lang, socket.socket_path, MAX_RETRIES )); } // Sleep with exponential backoff // Use the larger of retry_delay or ping_timeout to ensure we wait // long enough for slow-starting pools (R, Python) let wait = retry_delay.max(ping_timeout.as_secs_f64()); let secs = wait as u64; let nanos = ((wait - secs as f64) * 1e9) as u32; std::thread::sleep(Duration::new(secs, nanos)); retry_delay *= RETRY_MULTIPLIER; ping_timeout = ping_timeout * 2; } unreachable!() } /// Return a C-compatible function pointer for pool_is_alive. pub fn pool_is_alive_ptr() -> *const std::ffi::c_void { extern "C" fn pool_alive_c(pool_index: usize) -> bool { pool_is_alive(pool_index) } pool_alive_c as *const std::ffi::c_void } /// Check if a pool at given index is alive. pub fn pool_is_alive(pool_index: usize) -> bool { if pool_index >= MAX_DAEMONS { return false; } let pid = PIDS[pool_index].load(Ordering::Relaxed); if pid <= 0 { return false; } unsafe { libc::kill(pid, 0) == 0 } } /// Get the exit status of a reaped pool, returning signal/exit info. pub fn pool_death_info(pool_index: usize) -> Option { if PIDS[pool_index].load(Ordering::Relaxed) != -1 { return None; } let st = EXIT_STATUSES[pool_index].load(Ordering::Relaxed); if libc::WIFSIGNALED(st) { let sig = libc::WTERMSIG(st); Some(format!("Pool process crashed with signal {sig}")) } else if libc::WIFEXITED(st) { let code = libc::WEXITSTATUS(st); Some(format!("Pool process exited with status {code}")) } else { Some("Pool process died unexpectedly".into()) } } /// Validate that all pool executables exist. pub fn validate_pools(pools: &[Pool]) -> Result<(), String> { for pool in pools { if let Some(exec) = pool.exec.last() { if !Path::new(exec).exists() { return Err(format!( "Build artifacts missing or stale. Pool file '{}' not found. Re-run `morloc make`.", exec )); } } } Ok(()) } /// Create a temporary directory for this nexus session. pub fn make_tmpdir() -> Result { let template = CString::new("/tmp/morloc.XXXXXX").unwrap(); let mut buf = template.into_bytes_with_nul(); let ptr = buf.as_mut_ptr() as *mut libc::c_char; let result = unsafe { libc::mkdtemp(ptr) }; if result.is_null() { return Err(format!( "Failed to create temporary directory: {}", std::io::Error::last_os_error() )); } let cstr = unsafe { std::ffi::CStr::from_ptr(result) }; Ok(cstr.to_string_lossy().into_owned()) } /// Generate a job hash from seed, pid, and timestamps. pub fn make_job_hash(seed: u64) -> u64 { use morloc_runtime::hash::xxh64; let pid = std::process::id() as u64; let now = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default(); let epoch_ns = now.as_nanos() as u64; let data = format!("{}:{}:{}", pid, epoch_ns, seed); xxh64(data.as_bytes()) } /// Become a subreaper so orphaned grandchildren get reparented to us. /// Only available on Linux; no-op on other platforms. pub fn set_child_subreaper() { #[cfg(target_os = "linux")] unsafe { libc::prctl(libc::PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0); } } ================================================ FILE: data/rust/morloc-runtime/Cargo.toml ================================================ [package] name = "morloc-runtime" version = "0.81.0" edition = "2021" description = "Morloc runtime library: shared memory, serialization, IPC, packet protocol" # Version is intentionally synchronized with the morloc compiler version # (see ../../../package.yaml). The C-FFI manifest reader rejects # manifests whose recorded morloc_version differs from this crate's # CARGO_PKG_VERSION at parse time. Bumping the morloc compiler version # requires bumping this version in lockstep. [lib] crate-type = ["cdylib", "staticlib", "rlib"] [dependencies] morloc-manifest = { path = "../morloc-manifest" } libc = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } rmp = "0.8" rmp-serde = { workspace = true } twox-hash = { workspace = true } nix = { workspace = true } thiserror = { workspace = true } [build-dependencies] cbindgen = "0.28" # cc crate no longer needed -- all C files ported to Rust ================================================ FILE: data/rust/morloc-runtime/build.rs ================================================ fn main() { let target = std::env::var("TARGET").unwrap_or_default(); println!("cargo:rustc-link-lib=pthread"); if target.contains("linux") { println!("cargo:rustc-link-lib=rt"); } // The morloc compiler version is sourced from CARGO_PKG_VERSION // (this crate's Cargo.toml), which is intentionally kept in // lockstep with the morloc Haskell package.yaml. } ================================================ FILE: data/rust/morloc-runtime/cbindgen.toml ================================================ language = "C" header = "/* Generated by cbindgen - do not edit manually */" autogen_warning = "" include_version = false documentation_style = "c99" no_includes = true [export] prefix = "" [fn] prefix = "" [parse] parse_deps = false ================================================ FILE: data/rust/morloc-runtime/src/arrow_ffi.rs ================================================ //! Arrow C Data Interface implementation. //! Replaces arrow.c. Provides SHM ↔ Arrow conversion. use std::ffi::{c_char, c_void}; use std::ptr; use crate::cschema::CSchema; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; use crate::shm::{self, RelPtr}; const ARROW_SHM_MAGIC: u32 = 0xA770DA7A; const ARROW_BUFFER_ALIGN: usize = 64; fn arrow_align_up(x: usize) -> usize { (x + ARROW_BUFFER_ALIGN - 1) & !(ARROW_BUFFER_ALIGN - 1) } // ── Arrow C Data Interface structs (matching Apache spec) ──────────────────── #[repr(C)] pub struct ArrowSchema { pub format: *const c_char, pub name: *const c_char, pub metadata: *const c_char, pub flags: i64, pub n_children: i64, pub children: *mut *mut ArrowSchema, pub dictionary: *mut ArrowSchema, pub release: Option, pub private_data: *mut c_void, } #[repr(C)] pub struct ArrowArray { pub length: i64, pub null_count: i64, pub offset: i64, pub n_buffers: i64, pub n_children: i64, pub buffers: *mut *const c_void, pub children: *mut *mut ArrowArray, pub dictionary: *mut ArrowArray, pub release: Option, pub private_data: *mut c_void, } // ── SHM header types ───────────────────────────────────────────────────────── #[repr(C)] pub struct ArrowColumnDesc { pub col_type: u32, // morloc_serial_type pub length: u64, pub null_count: u64, pub name_offset: u32, pub name_length: u16, pub data_offset: u64, pub data_size: u64, } #[repr(C)] pub struct ArrowShmHeader { pub magic: u32, pub n_columns: u32, pub n_rows: u64, pub total_size: u64, } // ── Type mapping ───────────────────────────────────────────────────────────── // Serial type constants matching C enum const MORLOC_NIL: u32 = 0; const MORLOC_BOOL: u32 = 1; const MORLOC_SINT8: u32 = 2; const MORLOC_SINT16: u32 = 3; const MORLOC_SINT32: u32 = 4; const MORLOC_SINT64: u32 = 5; const MORLOC_UINT8: u32 = 6; const MORLOC_UINT16: u32 = 7; const MORLOC_UINT32: u32 = 8; const MORLOC_UINT64: u32 = 9; const MORLOC_FLOAT32: u32 = 10; const MORLOC_FLOAT64: u32 = 11; const MORLOC_STRING: u32 = 12; #[no_mangle] pub extern "C" fn arrow_element_size(serial_type: u32) -> usize { match serial_type { MORLOC_BOOL | MORLOC_SINT8 | MORLOC_UINT8 => 1, MORLOC_SINT16 | MORLOC_UINT16 => 2, MORLOC_SINT32 | MORLOC_UINT32 | MORLOC_FLOAT32 => 4, MORLOC_SINT64 | MORLOC_UINT64 | MORLOC_FLOAT64 => 8, _ => 0, } } #[no_mangle] pub extern "C" fn arrow_format_string(serial_type: u32) -> *const c_char { match serial_type { MORLOC_BOOL => b"b\0".as_ptr() as *const c_char, MORLOC_SINT8 => b"c\0".as_ptr() as *const c_char, MORLOC_UINT8 => b"C\0".as_ptr() as *const c_char, MORLOC_SINT16 => b"s\0".as_ptr() as *const c_char, MORLOC_UINT16 => b"S\0".as_ptr() as *const c_char, MORLOC_SINT32 => b"i\0".as_ptr() as *const c_char, MORLOC_UINT32 => b"I\0".as_ptr() as *const c_char, MORLOC_SINT64 => b"l\0".as_ptr() as *const c_char, MORLOC_UINT64 => b"L\0".as_ptr() as *const c_char, MORLOC_FLOAT32 => b"f\0".as_ptr() as *const c_char, MORLOC_FLOAT64 => b"g\0".as_ptr() as *const c_char, MORLOC_STRING => b"u\0".as_ptr() as *const c_char, _ => ptr::null(), } } #[no_mangle] pub unsafe extern "C" fn arrow_format_to_type(format: *const c_char) -> u32 { if format.is_null() || *format == 0 || *format.add(1) != 0 { return MORLOC_NIL; } match *format as u8 { b'b' => MORLOC_BOOL, b'c' => MORLOC_SINT8, b'C' => MORLOC_UINT8, b's' => MORLOC_SINT16, b'S' => MORLOC_UINT16, b'i' => MORLOC_SINT32, b'I' => MORLOC_UINT32, b'l' => MORLOC_SINT64, b'L' => MORLOC_UINT64, b'f' => MORLOC_FLOAT32, b'g' => MORLOC_FLOAT64, b'u' => MORLOC_STRING, _ => MORLOC_NIL, } } // ── Column accessors (used by arrow_json.c) ────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn arrow_column_desc( header: *const ArrowShmHeader, col_index: u32, ) -> *const ArrowColumnDesc { if header.is_null() || col_index >= (*header).n_columns { return ptr::null(); } let descs = (header as *const u8).add(std::mem::size_of::()) as *const ArrowColumnDesc; descs.add(col_index as usize) } #[no_mangle] pub unsafe extern "C" fn arrow_column_data( header: *const ArrowShmHeader, col_index: u32, ) -> *const c_void { let desc = arrow_column_desc(header, col_index); if desc.is_null() { return ptr::null(); } (header as *const u8).add((*desc).data_offset as usize) as *const c_void } #[no_mangle] pub unsafe extern "C" fn arrow_column_name( header: *const ArrowShmHeader, col_index: u32, ) -> *const c_char { let desc = arrow_column_desc(header, col_index); if desc.is_null() { return ptr::null(); } (header as *const u8).add((*desc).name_offset as usize) as *const c_char } // ── arrow_to_shm ───────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn arrow_to_shm( array: *const ArrowArray, schema: *const ArrowSchema, errmsg: *mut *mut c_char, ) -> RelPtr { clear_errmsg(errmsg); if array.is_null() || schema.is_null() { set_errmsg(errmsg, &MorlocError::Other("NULL array or schema".into())); return shm::RELNULL; } // Verify struct type if (*schema).format.is_null() || libc::strcmp((*schema).format, b"+s\0".as_ptr() as *const c_char) != 0 { set_errmsg(errmsg, &MorlocError::Other("Expected struct schema (format '+s')".into())); return shm::RELNULL; } let n_cols = (*schema).n_children as usize; let n_rows = (*array).length as usize; if n_cols == 0 { set_errmsg(errmsg, &MorlocError::Other("Arrow struct must have at least one column".into())); return shm::RELNULL; } let header_size = std::mem::size_of::(); let descs_size = n_cols * std::mem::size_of::(); let mut names_size: usize = 0; for i in 0..n_cols { let child_schema = *(*schema).children.add(i); let name = (*child_schema).name; if !name.is_null() { names_size += libc::strlen(name); } } let data_start = arrow_align_up(header_size + descs_size + names_size); let mut total_size = data_start; for i in 0..n_cols { let child_schema = *(*schema).children.add(i); let col_type = arrow_format_to_type((*child_schema).format); let elem_size = arrow_element_size(col_type); if col_type == MORLOC_STRING { let child = *(*array).children.add(i); let offsets = if (*child).n_buffers >= 2 { *(*child).buffers.add(1) as *const i32 } else { ptr::null() }; let str_data_size = if !offsets.is_null() { let off = (*child).offset as usize; (*offsets.add(off + n_rows) - *offsets.add(off)) as usize } else { 0 }; total_size = arrow_align_up(total_size) + (n_rows + 1) * std::mem::size_of::() + str_data_size; } else { if elem_size == 0 { set_errmsg(errmsg, &MorlocError::Other(format!("Unsupported Arrow column type for column {}", i))); return shm::RELNULL; } total_size = arrow_align_up(total_size) + elem_size * n_rows; } } let shm_ptr = match shm::shmalloc(total_size) { Ok(p) => p, Err(e) => { set_errmsg(errmsg, &e); return shm::RELNULL; } }; ptr::write_bytes(shm_ptr, 0, total_size); // Write header let header = &mut *(shm_ptr as *mut ArrowShmHeader); header.magic = ARROW_SHM_MAGIC; header.n_columns = n_cols as u32; header.n_rows = n_rows as u64; header.total_size = total_size as u64; let descs = shm_ptr.add(header_size) as *mut ArrowColumnDesc; let mut name_cursor = header_size + descs_size; let mut data_cursor = data_start; for i in 0..n_cols { let child_schema = *(*schema).children.add(i); let child_array = *(*array).children.add(i); let col_type = arrow_format_to_type((*child_schema).format); data_cursor = arrow_align_up(data_cursor); let name = if (*child_schema).name.is_null() { b"\0".as_ptr() as *const c_char } else { (*child_schema).name }; let name_len = libc::strlen(name); let desc = &mut *descs.add(i); desc.col_type = col_type; desc.length = n_rows as u64; desc.null_count = (*child_array).null_count as u64; desc.name_offset = name_cursor as u32; desc.name_length = name_len as u16; desc.data_offset = data_cursor as u64; if name_len > 0 { ptr::copy_nonoverlapping(name as *const u8, shm_ptr.add(name_cursor), name_len); } name_cursor += name_len; if col_type == MORLOC_STRING { let src_offsets = if (*child_array).n_buffers >= 2 { *(*child_array).buffers.add(1) as *const i32 } else { ptr::null() }; let src_data = if (*child_array).n_buffers >= 3 { *(*child_array).buffers.add(2) as *const u8 } else { ptr::null() }; let arr_offset = (*child_array).offset as usize; let dst_offsets = shm_ptr.add(data_cursor) as *mut i32; let base = if !src_offsets.is_null() { *src_offsets.add(arr_offset) } else { 0 }; for r in 0..=n_rows { *dst_offsets.add(r) = if !src_offsets.is_null() { *src_offsets.add(arr_offset + r) - base } else { 0 }; } let offsets_size = (n_rows + 1) * std::mem::size_of::(); let str_data_size = if !src_offsets.is_null() { (*src_offsets.add(arr_offset + n_rows) - base) as usize } else { 0 }; if str_data_size > 0 && !src_data.is_null() { ptr::copy_nonoverlapping(src_data.add(base as usize), shm_ptr.add(data_cursor + offsets_size), str_data_size); } let buf_size = offsets_size + str_data_size; desc.data_size = buf_size as u64; data_cursor += buf_size; } else { let elem_size = arrow_element_size(col_type); let buf_size = elem_size * n_rows; desc.data_size = buf_size as u64; if (*child_array).n_buffers >= 2 && !(*(*child_array).buffers.add(1)).is_null() { let src = (*(*child_array).buffers.add(1) as *const u8).add((*child_array).offset as usize * elem_size); if buf_size > 0 { ptr::copy_nonoverlapping(src, shm_ptr.add(data_cursor), buf_size); } } data_cursor += buf_size; } } match shm::abs2rel(shm_ptr) { Ok(r) => r, Err(e) => { set_errmsg(errmsg, &e); shm::RELNULL } } } // ── arrow_validate ─────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn arrow_validate( header: *const ArrowShmHeader, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); if header.is_null() { set_errmsg(errmsg, &MorlocError::Other("NULL arrow header".into())); return 1; } if (*header).magic != ARROW_SHM_MAGIC { set_errmsg(errmsg, &MorlocError::Other("Invalid arrow SHM magic".into())); return 1; } if schema.is_null() { set_errmsg(errmsg, &MorlocError::Other("NULL schema for arrow validation".into())); return 1; } // MORLOC_MAP = 16 if (*schema).serial_type != crate::schema::SerialType::Map as u32 { set_errmsg(errmsg, &MorlocError::Other("Expected MORLOC_MAP schema for arrow table".into())); return 1; } let n_cols = (*header).n_columns as usize; if n_cols != (*schema).size { set_errmsg(errmsg, &MorlocError::Other(format!("Column count mismatch: arrow has {}, schema has {}", n_cols, (*schema).size))); return 1; } for i in 0..n_cols { let desc = arrow_column_desc(header, i as u32); if desc.is_null() { set_errmsg(errmsg, &MorlocError::Other(format!("NULL column descriptor at index {}", i))); return 1; } let param_schema = *(*schema).parameters.add(i); if (*desc).col_type != (*param_schema).serial_type as u32 { set_errmsg(errmsg, &MorlocError::Other(format!("Column {} type mismatch", i))); return 1; } } 0 } // ── Release callbacks for arrow_from_shm ───────────────────────────────────── unsafe extern "C" fn arrow_shm_child_schema_release(schema: *mut ArrowSchema) { if schema.is_null() { return; } if !(*schema).name.is_null() { libc::free((*schema).name as *mut c_void); } (*schema).name = ptr::null(); (*schema).release = None; } unsafe extern "C" fn arrow_shm_child_array_release(array: *mut ArrowArray) { if array.is_null() { return; } if !(*array).buffers.is_null() { libc::free((*array).buffers as *mut c_void); } (*array).buffers = ptr::null_mut(); (*array).release = None; } unsafe extern "C" fn arrow_shm_schema_release(schema: *mut ArrowSchema) { if schema.is_null() { return; } for i in 0..(*schema).n_children as usize { let child = *(*schema).children.add(i); if !child.is_null() { if let Some(release) = (*child).release { release(child); } libc::free(child as *mut c_void); } } libc::free((*schema).children as *mut c_void); (*schema).children = ptr::null_mut(); (*schema).release = None; } unsafe extern "C" fn arrow_shm_array_release(array: *mut ArrowArray) { if array.is_null() { return; } for i in 0..(*array).n_children as usize { let child = *(*array).children.add(i); if !child.is_null() { if let Some(release) = (*child).release { release(child); } libc::free(child as *mut c_void); } } libc::free((*array).children as *mut c_void); (*array).children = ptr::null_mut(); if !(*array).buffers.is_null() { libc::free((*array).buffers as *mut c_void); } (*array).buffers = ptr::null_mut(); (*array).release = None; } // ── arrow_from_shm ─────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn arrow_from_shm( header: *const ArrowShmHeader, out_schema: *mut ArrowSchema, out_array: *mut ArrowArray, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); if header.is_null() { set_errmsg(errmsg, &MorlocError::Other("NULL arrow header".into())); return 1; } if (*header).magic != ARROW_SHM_MAGIC { set_errmsg(errmsg, &MorlocError::Other("Invalid arrow SHM magic".into())); return 1; } let n_cols = (*header).n_columns as usize; let n_rows = (*header).n_rows as i64; // Parent schema (struct type) ptr::write_bytes(out_schema, 0, 1); (*out_schema).format = b"+s\0".as_ptr() as *const c_char; (*out_schema).n_children = n_cols as i64; (*out_schema).children = libc::calloc(n_cols, std::mem::size_of::<*mut ArrowSchema>()) as *mut *mut ArrowSchema; (*out_schema).release = Some(arrow_shm_schema_release); // Parent array ptr::write_bytes(out_array, 0, 1); (*out_array).length = n_rows; (*out_array).n_buffers = 1; (*out_array).buffers = libc::calloc(1, std::mem::size_of::<*const c_void>()) as *mut *const c_void; (*out_array).n_children = n_cols as i64; (*out_array).children = libc::calloc(n_cols, std::mem::size_of::<*mut ArrowArray>()) as *mut *mut ArrowArray; (*out_array).release = Some(arrow_shm_array_release); for i in 0..n_cols { let desc = arrow_column_desc(header, i as u32); // Child schema let child_s = libc::calloc(1, std::mem::size_of::()) as *mut ArrowSchema; (*child_s).release = Some(arrow_shm_child_schema_release); *(*out_schema).children.add(i) = child_s; (*child_s).format = arrow_format_string((*desc).col_type); let raw_name = arrow_column_name(header, i as u32); let name_len = (*desc).name_length as usize; let name_copy = libc::calloc(name_len + 1, 1) as *mut c_char; if !raw_name.is_null() && name_len > 0 { ptr::copy_nonoverlapping(raw_name as *const u8, name_copy as *mut u8, name_len); } (*child_s).name = name_copy; // Child array let child_a = libc::calloc(1, std::mem::size_of::()) as *mut ArrowArray; (*child_a).release = Some(arrow_shm_child_array_release); *(*out_array).children.add(i) = child_a; (*child_a).length = n_rows; (*child_a).null_count = (*desc).null_count as i64; let col_buf = arrow_column_data(header, i as u32); if (*desc).col_type == MORLOC_STRING { (*child_a).n_buffers = 3; (*child_a).buffers = libc::calloc(3, std::mem::size_of::<*const c_void>()) as *mut *const c_void; *(*child_a).buffers.add(1) = col_buf; // offsets *(*child_a).buffers.add(2) = (col_buf as *const u8).add((n_rows as usize + 1) * std::mem::size_of::()) as *const c_void; } else { (*child_a).n_buffers = 2; (*child_a).buffers = libc::calloc(2, std::mem::size_of::<*const c_void>()) as *mut *const c_void; *(*child_a).buffers.add(1) = col_buf; // zero-copy data } } 0 } // ── Arrow JSON/Table output (replaces arrow_json.c) ────────────────────────── unsafe fn print_arrow_value(desc: *const ArrowColumnDesc, col_data: *const u8, row: u64) { let row = row as usize; match (*desc).col_type { MORLOC_BOOL => { let v = *col_data.add(row); if v != 0 { libc::printf(b"true\0".as_ptr() as *const c_char); } else { libc::printf(b"false\0".as_ptr() as *const c_char); } } MORLOC_SINT8 => { libc::printf(b"%d\0".as_ptr() as *const c_char, *(col_data as *const i8).add(row) as i32); } MORLOC_SINT16 => { libc::printf(b"%d\0".as_ptr() as *const c_char, *(col_data as *const i16).add(row) as i32); } MORLOC_SINT32 => { libc::printf(b"%d\0".as_ptr() as *const c_char, *(col_data as *const i32).add(row)); } MORLOC_SINT64 => { libc::printf(b"%ld\0".as_ptr() as *const c_char, *(col_data as *const i64).add(row)); } MORLOC_UINT8 => { libc::printf(b"%u\0".as_ptr() as *const c_char, *col_data.add(row) as u32); } MORLOC_UINT16 => { libc::printf(b"%u\0".as_ptr() as *const c_char, *(col_data as *const u16).add(row) as u32); } MORLOC_UINT32 => { libc::printf(b"%u\0".as_ptr() as *const c_char, *(col_data as *const u32).add(row)); } MORLOC_UINT64 => { libc::printf(b"%lu\0".as_ptr() as *const c_char, *(col_data as *const u64).add(row)); } MORLOC_FLOAT32 => { libc::printf(b"%.7g\0".as_ptr() as *const c_char, *(col_data as *const f32).add(row) as f64); } MORLOC_FLOAT64 => { libc::printf(b"%.15g\0".as_ptr() as *const c_char, *(col_data as *const f64).add(row)); } MORLOC_STRING => { let offsets = col_data as *const i32; let str_data = offsets.add((*desc).length as usize + 1) as *const u8; let start = *offsets.add(row) as usize; let end = *offsets.add(row + 1) as usize; libc::putchar(b'"' as i32); for i in start..end { let c = *str_data.add(i); match c { b'"' => { libc::printf(b"\\\"\0".as_ptr() as *const c_char); } b'\\' => { libc::printf(b"\\\\\0".as_ptr() as *const c_char); } b'\n' => { libc::printf(b"\\n\0".as_ptr() as *const c_char); } b'\r' => { libc::printf(b"\\r\0".as_ptr() as *const c_char); } b'\t' => { libc::printf(b"\\t\0".as_ptr() as *const c_char); } _ if c < 32 => { libc::printf(b"\\u%04x\0".as_ptr() as *const c_char, c as u32); } _ => { libc::putchar(c as i32); } } } libc::putchar(b'"' as i32); } _ => { libc::printf(b"null\0".as_ptr() as *const c_char); } } } #[no_mangle] pub unsafe extern "C" fn print_arrow_as_json( data: *const c_void, errmsg: *mut *mut c_char, ) -> bool { use crate::error::{clear_errmsg, set_errmsg, MorlocError}; clear_errmsg(errmsg); let header = data as *const ArrowShmHeader; if header.is_null() { set_errmsg(errmsg, &MorlocError::Other("NULL arrow data".into())); return false; } if (*header).magic != ARROW_SHM_MAGIC { set_errmsg(errmsg, &MorlocError::Other(format!("Invalid arrow SHM magic: 0x{:08x}", (*header).magic))); return false; } let n_cols = (*header).n_columns; let n_rows = (*header).n_rows; libc::putchar(b'[' as i32); for r in 0..n_rows { if r > 0 { libc::putchar(b',' as i32); } libc::putchar(b'{' as i32); for c in 0..n_cols { if c > 0 { libc::putchar(b',' as i32); } let desc = arrow_column_desc(header, c); let name = arrow_column_name(header, c); let col_data = arrow_column_data(header, c); if !name.is_null() { libc::printf(b"\"%.*s\":\0".as_ptr() as *const c_char, (*desc).name_length as i32, name); } if !desc.is_null() && !col_data.is_null() { print_arrow_value(desc, col_data as *const u8, r); } else { libc::printf(b"null\0".as_ptr() as *const c_char); } } libc::putchar(b'}' as i32); } libc::printf(b"]\n\0".as_ptr() as *const c_char); true } #[no_mangle] pub unsafe extern "C" fn print_arrow_as_table( data: *const c_void, errmsg: *mut *mut c_char, ) -> bool { use crate::error::{clear_errmsg, set_errmsg, MorlocError}; clear_errmsg(errmsg); let header = data as *const ArrowShmHeader; if header.is_null() { set_errmsg(errmsg, &MorlocError::Other("NULL arrow data".into())); return false; } if (*header).magic != ARROW_SHM_MAGIC { set_errmsg(errmsg, &MorlocError::Other(format!("Invalid arrow SHM magic: 0x{:08x}", (*header).magic))); return false; } let n_cols = (*header).n_columns; let n_rows = (*header).n_rows; for c in 0..n_cols { if c > 0 { libc::putchar(b'\t' as i32); } let desc = arrow_column_desc(header, c); let name = arrow_column_name(header, c); if !name.is_null() && !desc.is_null() { libc::printf(b"%.*s\0".as_ptr() as *const c_char, (*desc).name_length as i32, name); } } libc::putchar(b'\n' as i32); for r in 0..n_rows { for c in 0..n_cols { if c > 0 { libc::putchar(b'\t' as i32); } let desc = arrow_column_desc(header, c); let col_data = arrow_column_data(header, c); if !desc.is_null() && !col_data.is_null() { print_arrow_value(desc, col_data as *const u8, r); } } libc::putchar(b'\n' as i32); } true } ================================================ FILE: data/rust/morloc-runtime/src/cache.rs ================================================ //! File-based packet caching with xxHash keys. //! Replaces cache.c. use std::ffi::{c_char, c_void, CStr, CString}; use std::ptr; use crate::cschema::CSchema; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; use crate::hash; use crate::shm; // ── hash_voidstar ────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn hash_voidstar( data: *const c_void, schema: *const CSchema, seed: u64, errmsg: *mut *mut c_char, ) -> u64 { clear_errmsg(errmsg); let rs = CSchema::to_rust(schema); match hash_voidstar_inner(data as *const u8, &rs, seed) { Ok(h) => h, Err(e) => { set_errmsg(errmsg, &e); 0 } } } fn hash_voidstar_inner( data: *const u8, schema: &crate::schema::Schema, seed: u64, ) -> Result { use crate::schema::SerialType; // SAFETY: data points to voidstar data in SHM with layout described by schema. // All reads (Array headers, element data) are within schema-defined bounds. unsafe { match schema.serial_type { SerialType::String | SerialType::Array => { let arr = &*(data as *const shm::Array); let elem_width = if schema.parameters.is_empty() { 1 // string bytes } else { schema.parameters[0].width }; let elem_data = shm::rel2abs(arr.data)?; if schema.is_fixed_width() || schema.serial_type == SerialType::String { let total = elem_width * arr.size; let bytes = std::slice::from_raw_parts(elem_data, total); Ok(hash::xxh64_with_seed(bytes, seed)) } else { let mut h = seed; for i in 0..arr.size { h = hash_voidstar_inner( elem_data.add(i * elem_width), &schema.parameters[0], h, )?; } Ok(h) } } SerialType::Tuple | SerialType::Map => { if schema.is_fixed_width() { let bytes = std::slice::from_raw_parts(data, schema.width); Ok(hash::xxh64_with_seed(bytes, seed)) } else { let mut h = seed; for i in 0..schema.parameters.len() { h = hash_voidstar_inner( data.add(schema.offsets[i]), &schema.parameters[i], h, )?; } Ok(h) } } _ => { let bytes = std::slice::from_raw_parts(data, schema.width); Ok(hash::xxh64_with_seed(bytes, seed)) } } } } // ── hash_morloc_packet ───────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn hash_morloc_packet( packet: *const u8, schema: *const CSchema, seed: u64, hash_out: *mut u64, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); *hash_out = 0; extern "C" { fn read_morloc_packet_header( msg: *const u8, errmsg: *mut *mut c_char, ) -> *const crate::packet::PacketHeader; fn morloc_packet_size(packet: *const u8, errmsg: *mut *mut c_char) -> usize; fn get_morloc_data_packet_value( data: *const u8, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut u8; } let mut err: *mut c_char = ptr::null_mut(); let header = read_morloc_packet_header(packet, &mut err); if header.is_null() { if !err.is_null() { *errmsg = err; } return false; } let cmd_type = (*header).command_type(); if cmd_type == crate::packet::PACKET_TYPE_CALL { let midx = { (*header).command.call.midx }; *hash_out = crate::utility::mix(seed, midx as u64); let offset = { (*header).offset } as usize; let length = { (*header).length } as usize; let arg_data = packet.add(32 + offset); let mut arg_start = 0usize; while arg_start < length { let arg_size = morloc_packet_size(arg_data.add(arg_start), &mut err); if !err.is_null() { *errmsg = err; return false; } let arg_bytes = std::slice::from_raw_parts(arg_data.add(arg_start), arg_size); *hash_out = crate::utility::mix(*hash_out, hash::xxh64_with_seed(arg_bytes, *hash_out)); arg_start += arg_size; } } else if cmd_type == crate::packet::PACKET_TYPE_DATA { let voidstar = get_morloc_data_packet_value(packet, schema, &mut err); if voidstar.is_null() { if !err.is_null() { *errmsg = err; } return false; } let rs = CSchema::to_rust(schema); match hash_voidstar_inner(voidstar, &rs, seed) { Ok(h) => *hash_out = h, Err(e) => { set_errmsg(errmsg, &e); return false; } } } else { set_errmsg( errmsg, &MorlocError::Other(format!("Cannot hash packet with command 0x{:02x}", cmd_type)), ); return false; } true } // ── Cache filename generation ────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn make_cache_filename_ext( key: u64, cache_path: *const c_char, ext: *const c_char, errmsg: *mut *mut c_char, ) -> *mut c_char { clear_errmsg(errmsg); let path = CStr::from_ptr(cache_path).to_string_lossy(); let extension = CStr::from_ptr(ext).to_string_lossy(); let filename = format!("{}/{:016x}{}", path, key, extension); match CString::new(filename) { Ok(cs) => cs.into_raw(), Err(_) => { set_errmsg(errmsg, &MorlocError::Other("CString conversion failed".into())); ptr::null_mut() } } } #[no_mangle] pub unsafe extern "C" fn make_cache_filename( key: u64, cache_path: *const c_char, errmsg: *mut *mut c_char, ) -> *mut c_char { let ext = CString::new(".packet").unwrap(); make_cache_filename_ext(key, cache_path, ext.as_ptr(), errmsg) } // ── Cache operations ─────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn put_cache_packet( voidstar: *const u8, schema: *const CSchema, key: u64, cache_path: *const c_char, errmsg: *mut *mut c_char, ) -> *mut c_char { clear_errmsg(errmsg); extern "C" { fn make_mpk_data_packet(filename: *const c_char, schema: *const CSchema) -> *mut u8; fn morloc_packet_size(packet: *const u8, errmsg: *mut *mut c_char) -> usize; fn pack_with_schema( mlc: *const c_void, schema: *const CSchema, mpk: *mut *mut c_char, mpk_size: *mut usize, errmsg: *mut *mut c_char, ) -> i32; fn write_atomic( filename: *const c_char, data: *const u8, size: usize, errmsg: *mut *mut c_char, ) -> i32; } let mut err: *mut c_char = ptr::null_mut(); // Generate filenames let pkt_filename = make_cache_filename(key, cache_path, &mut err); if pkt_filename.is_null() { *errmsg = err; return ptr::null_mut(); } let dat_ext = CString::new(".dat").unwrap(); let dat_filename = make_cache_filename_ext(key, cache_path, dat_ext.as_ptr(), &mut err); if dat_filename.is_null() { libc::free(pkt_filename as *mut c_void); *errmsg = err; return ptr::null_mut(); } // Create data packet pointing to the .dat file let data_packet = make_mpk_data_packet(dat_filename, schema); if data_packet.is_null() { libc::free(pkt_filename as *mut c_void); libc::free(dat_filename as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("Failed to create data packet".into())); return ptr::null_mut(); } let pkt_size = morloc_packet_size(data_packet, &mut err); // Pack voidstar to msgpack let mut mpk_data: *mut c_char = ptr::null_mut(); let mut mpk_size: usize = 0; let rc = pack_with_schema(voidstar as *const c_void, schema, &mut mpk_data, &mut mpk_size, &mut err); if rc != 0 { libc::free(data_packet as *mut c_void); libc::free(pkt_filename as *mut c_void); libc::free(dat_filename as *mut c_void); *errmsg = err; return ptr::null_mut(); } // Write packet file write_atomic(pkt_filename, data_packet, pkt_size, &mut err); libc::free(data_packet as *mut c_void); if !err.is_null() { libc::free(mpk_data as *mut c_void); libc::free(pkt_filename as *mut c_void); libc::free(dat_filename as *mut c_void); *errmsg = err; return ptr::null_mut(); } // Write data file write_atomic(dat_filename, mpk_data as *const u8, mpk_size, &mut err); libc::free(mpk_data as *mut c_void); libc::free(dat_filename as *mut c_void); if !err.is_null() { libc::free(pkt_filename as *mut c_void); *errmsg = err; return ptr::null_mut(); } // Return the packet filename let result = libc::strdup(pkt_filename); libc::free(pkt_filename as *mut c_void); result } #[no_mangle] pub unsafe extern "C" fn get_cache_packet( key: u64, cache_path: *const c_char, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); let mut err: *mut c_char = ptr::null_mut(); let filename = make_cache_filename(key, cache_path, &mut err); if filename.is_null() { *errmsg = err; return ptr::null_mut(); } extern "C" { fn read_binary_file( filename: *const c_char, file_size: *mut usize, errmsg: *mut *mut c_char, ) -> *mut u8; } let mut file_size: usize = 0; let data = read_binary_file(filename, &mut file_size, &mut err); libc::free(filename as *mut c_void); if data.is_null() { *errmsg = err; } data } #[no_mangle] pub unsafe extern "C" fn del_cache_packet( key: u64, cache_path: *const c_char, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); let mut err: *mut c_char = ptr::null_mut(); let filename = make_cache_filename(key, cache_path, &mut err); if filename.is_null() { *errmsg = err; return false; } let rc = libc::unlink(filename); if rc != 0 { set_errmsg( errmsg, &MorlocError::Other(format!( "Failed to delete cache file '{}'", CStr::from_ptr(filename).to_string_lossy() )), ); libc::free(filename as *mut c_void); return false; } libc::free(filename as *mut c_void); true } #[no_mangle] pub unsafe extern "C" fn check_cache_packet( key: u64, cache_path: *const c_char, errmsg: *mut *mut c_char, ) -> *mut c_char { clear_errmsg(errmsg); let mut err: *mut c_char = ptr::null_mut(); let filename = make_cache_filename(key, cache_path, &mut err); if filename.is_null() { *errmsg = err; return ptr::null_mut(); } let mut sb: libc::stat = std::mem::zeroed(); if libc::stat(filename, &mut sb) == 0 { let result = libc::strdup(filename); libc::free(filename as *mut c_void); return result; } libc::free(filename as *mut c_void); ptr::null_mut() // Not an error — cache miss } ================================================ FILE: data/rust/morloc-runtime/src/cli.rs ================================================ //! CLI argument handling and voidstar utility functions. //! Replaces cli.c. use std::ffi::{c_char, c_void, CStr}; use std::ptr; use crate::cschema::CSchema; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; use crate::packet; use crate::shm; // ── argument_t lifecycle ─────────────────────────────────────────────────── // argument_t is defined in eval.h (C). We use it opaquely via libc pointers. // The struct: { value: *mut c_char, fields: *mut *mut c_char, default_fields: *mut *mut c_char, size: usize } #[repr(C)] pub struct ArgumentT { pub value: *mut c_char, pub fields: *mut *mut c_char, pub default_fields: *mut *mut c_char, pub size: usize, } #[no_mangle] pub unsafe extern "C" fn initialize_positional(value: *mut c_char) -> *mut ArgumentT { let arg = libc::calloc(1, std::mem::size_of::()) as *mut ArgumentT; if arg.is_null() { return ptr::null_mut(); } (*arg).value = if value.is_null() { ptr::null_mut() } else { libc::strdup(value) }; (*arg).size = 0; arg } #[no_mangle] pub unsafe extern "C" fn initialize_unrolled( size: usize, default_value: *mut c_char, fields: *mut *mut c_char, default_fields: *mut *mut c_char, ) -> *mut ArgumentT { let arg = libc::calloc(1, std::mem::size_of::()) as *mut ArgumentT; if arg.is_null() { return ptr::null_mut(); } (*arg).value = if default_value.is_null() { ptr::null_mut() } else { libc::strdup(default_value) }; (*arg).size = size; (*arg).fields = libc::calloc(size, std::mem::size_of::<*mut c_char>()) as *mut *mut c_char; for i in 0..size { let f = *fields.add(i); if !f.is_null() { *(*arg).fields.add(i) = libc::strdup(f); } } (*arg).default_fields = libc::calloc(size, std::mem::size_of::<*mut c_char>()) as *mut *mut c_char; for i in 0..size { let d = *default_fields.add(i); if !d.is_null() { *(*arg).default_fields.add(i) = libc::strdup(d); } } arg } #[no_mangle] pub unsafe extern "C" fn free_argument_t(arg: *mut ArgumentT) { if arg.is_null() { return; } if !(*arg).value.is_null() { libc::free((*arg).value as *mut c_void); } if !(*arg).fields.is_null() { for i in 0..(*arg).size { let f = *(*arg).fields.add(i); if !f.is_null() { libc::free(f as *mut c_void); } } libc::free((*arg).fields as *mut c_void); } if !(*arg).default_fields.is_null() { for i in 0..(*arg).size { let d = *(*arg).default_fields.add(i); if !d.is_null() { libc::free(d as *mut c_void); } } libc::free((*arg).default_fields as *mut c_void); } libc::free(arg as *mut c_void); } // ── shfree_by_schema ─────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn shfree_by_schema( ptr: *mut c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); if ptr.is_null() || schema.is_null() { return true; } let rs = CSchema::to_rust(schema); match shfree_by_schema_inner(ptr as *mut u8, &rs) { Ok(_) => true, Err(e) => { set_errmsg(errmsg, &e); false } } } fn shfree_by_schema_inner( ptr: *mut u8, schema: &crate::schema::Schema, ) -> Result<(), MorlocError> { use crate::schema::SerialType; // SAFETY: ptr points to voidstar data in SHM with layout described by schema. // We recursively visit sub-structures and zero metadata before the parent shfree. unsafe { match schema.serial_type { SerialType::String | SerialType::Array => { let arr = &*(ptr as *const shm::Array); if arr.data > 0 { if !schema.parameters.is_empty() && !schema.parameters[0].is_fixed_width() { let arr_data = shm::rel2abs(arr.data)?; let elem_width = schema.parameters[0].width; for i in 0..arr.size { shfree_by_schema_inner( arr_data.add(i * elem_width), &schema.parameters[0], )?; } } } } SerialType::Tuple | SerialType::Map => { for i in 0..schema.parameters.len() { let child = ptr.add(schema.offsets[i]); shfree_by_schema_inner(child, &schema.parameters[i])?; } } SerialType::Tensor => { // shape and data are inline, freed by parent shfree } _ => { // fixed-size: no sub-data } } // Zero this node's metadata std::ptr::write_bytes(ptr, 0, schema.width); } Ok(()) } // ── adjust_voidstar_relptrs ──────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn adjust_voidstar_relptrs( data: *mut c_void, schema: *const CSchema, base_rel: shm::RelPtr, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); let rs = CSchema::to_rust(schema); match adjust_relptrs_inner(data as *mut u8, &rs, base_rel) { Ok(_) => 0, Err(e) => { set_errmsg(errmsg, &e); 1 } } } fn adjust_relptrs_inner( data: *mut u8, schema: &crate::schema::Schema, base_rel: shm::RelPtr, ) -> Result<(), MorlocError> { use crate::schema::SerialType; // SAFETY: data points to a voidstar blob in SHM. We adjust relptrs in-place; // all pointer arithmetic stays within the blob's bounds as defined by schema. unsafe { match schema.serial_type { SerialType::String | SerialType::Array => { let arr = &mut *(data as *mut shm::Array); arr.data += base_rel; if !schema.parameters.is_empty() && !schema.parameters[0].is_fixed_width() { let arr_data = shm::rel2abs(arr.data)?; let elem_width = schema.parameters[0].width; for i in 0..arr.size { adjust_relptrs_inner( arr_data.add(i * elem_width), &schema.parameters[0], base_rel, )?; } } } SerialType::Tuple | SerialType::Map => { for i in 0..schema.parameters.len() { adjust_relptrs_inner( data.add(schema.offsets[i]), &schema.parameters[i], base_rel, )?; } } SerialType::Optional => { let tag = *data; if tag != 0 && !schema.parameters.is_empty() { let inner_offset = schema.offsets.first().copied().unwrap_or( shm::align_up(1, schema.parameters[0].alignment().max(1)), ); adjust_relptrs_inner( data.add(inner_offset), &schema.parameters[0], base_rel, )?; } } SerialType::Tensor => { let tensor = &mut *(data as *mut shm::Tensor); if tensor.total_elements > 0 { tensor.shape += base_rel; tensor.data += base_rel; } } _ => {} } } Ok(()) } // ── read_voidstar_binary ─────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn read_voidstar_binary( blob: *const u8, blob_size: usize, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut c_void { clear_errmsg(errmsg); let rs = CSchema::to_rust(schema); let base = match shm::shmalloc(blob_size) { Ok(p) => p, Err(e) => { set_errmsg(errmsg, &e); return ptr::null_mut(); } }; std::ptr::copy_nonoverlapping(blob, base, blob_size); let base_rel = match shm::abs2rel(base) { Ok(r) => r, Err(e) => { let _ = shm::shfree(base); set_errmsg(errmsg, &e); return ptr::null_mut(); } }; if let Err(e) = adjust_relptrs_inner(base, &rs, base_rel) { let _ = shm::shfree(base); set_errmsg(errmsg, &e); return ptr::null_mut(); } base as *mut c_void } // ── load_morloc_data_file ────────────────────────────────────────────────── // This function is complex and calls many C functions (read_json_with_schema, // unpack_with_schema). Keep delegating to C for now via extern declarations. #[no_mangle] pub unsafe extern "C" fn load_morloc_data_file( path: *const c_char, data: *mut u8, data_size: usize, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut c_void { clear_errmsg(errmsg); extern "C" { fn read_json_with_schema( dest: *mut u8, json: *mut c_char, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut u8; fn unpack_with_schema( mpk: *const c_char, mpk_size: usize, schema: *const CSchema, mlcptr: *mut *mut c_void, errmsg: *mut *mut c_char, ) -> i32; } if data_size == 0 { set_errmsg(errmsg, &MorlocError::Other("Cannot parse 0-length data".into())); return ptr::null_mut(); } let path_str = CStr::from_ptr(path).to_string_lossy(); let mut err: *mut c_char = ptr::null_mut(); // 1. Extension-based dispatch if path_str.ends_with(".json") { let json_buf = libc::realloc(data as *mut c_void, data_size + 1) as *mut u8; if json_buf.is_null() { libc::free(data as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("realloc failed".into())); return ptr::null_mut(); } *json_buf.add(data_size) = 0; let result = read_json_with_schema(ptr::null_mut(), json_buf as *mut c_char, schema, &mut err); if !err.is_null() { libc::free(json_buf as *mut c_void); *errmsg = err; return ptr::null_mut(); } libc::free(json_buf as *mut c_void); return result as *mut c_void; } if path_str.ends_with(".mpk") || path_str.ends_with(".msgpack") { let mut result: *mut c_void = ptr::null_mut(); unpack_with_schema(data as *const c_char, data_size, schema, &mut result, &mut err); libc::free(data as *mut c_void); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } return result; } // 2. Check for morloc packet header if data_size >= 32 { let magic = *(data as *const u32); if magic == packet::PACKET_MAGIC { let header_bytes: &[u8; 32] = &*(data as *const [u8; 32]); if let Ok(header) = packet::PacketHeader::from_bytes(header_bytes) { if !header.is_data() { libc::free(data as *mut c_void); set_errmsg(errmsg, &MorlocError::Other(format!("Expected data packet in '{}'", path_str))); return ptr::null_mut(); } let offset = { header.offset } as usize; let length = { header.length } as usize; let payload = data.add(32 + offset); let format = { header.command.data.format }; if format == packet::PACKET_FORMAT_VOIDSTAR { let result = read_voidstar_binary(payload, length, schema, &mut err); libc::free(data as *mut c_void); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } return result; } else if format == packet::PACKET_FORMAT_MSGPACK { let mut result: *mut c_void = ptr::null_mut(); unpack_with_schema(payload as *const c_char, length, schema, &mut result, &mut err); libc::free(data as *mut c_void); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } return result; } else { libc::free(data as *mut c_void); set_errmsg(errmsg, &MorlocError::Other(format!("Unsupported format 0x{:02x} in '{}'", format, path_str))); return ptr::null_mut(); } } } } // 3. Try JSON let first_byte = *data; let may_be_json = matches!(first_byte, b'\'' | b'"' | b'[' | b'{' | b't' | b'f' | b'n' | b'\t' | b'\n' | b'\r' | b' ' | b'0'..=b'9' | b'-' ); if (data_size > 1 && may_be_json) || (data_size == 1 && first_byte >= b'0' && first_byte <= b'9') { let json_buf = libc::realloc(data as *mut c_void, data_size + 1) as *mut u8; if !json_buf.is_null() { *json_buf.add(data_size) = 0; let result = read_json_with_schema(ptr::null_mut(), json_buf as *mut c_char, schema, &mut err); if err.is_null() && !result.is_null() { libc::free(json_buf as *mut c_void); return result as *mut c_void; } if !err.is_null() { libc::free(err as *mut c_void); err = ptr::null_mut(); } // Fall through to try msgpack // Note: data pointer may have been invalidated by realloc // Use json_buf as the data pointer going forward let mut result: *mut c_void = ptr::null_mut(); unpack_with_schema(json_buf as *const c_char, data_size, schema, &mut result, &mut err); libc::free(json_buf as *mut c_void); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } return result; } } // 4. Try msgpack let mut result: *mut c_void = ptr::null_mut(); unpack_with_schema(data as *const c_char, data_size, schema, &mut result, &mut err); libc::free(data as *mut c_void); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } result } // ── upload_packet (static helper) ──────────────────────────────────────────── /// Copy a voidstar packet into SHM, adjusting relptrs. /// /// # Safety /// `dest` must point to schema.width writable bytes in SHM. /// `data` must point to a valid voidstar blob within [data, data_end]. unsafe fn upload_packet( dest: *mut u8, data: *const u8, data_end: usize, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); let rs = CSchema::to_rust(schema); match upload_packet_inner(dest, data, data_end, schema, &rs) { Ok(_) => 0, Err(e) => { set_errmsg(errmsg, &e); 1 } } } unsafe fn upload_packet_inner( dest: *mut u8, data: *const u8, data_end: usize, schema: *const CSchema, rs: &crate::schema::Schema, ) -> Result<(), MorlocError> { use crate::schema::SerialType; match rs.serial_type { SerialType::String | SerialType::Array => { if (data as usize + rs.width - 1) <= data_end { return Err(MorlocError::Packet("Data is too small to store an array header".into())); } ptr::copy_nonoverlapping(data, dest, rs.width); let arr = &mut *(dest as *mut shm::Array); let arr_data_offset = arr.data as usize; let arr_data = data.add(arr_data_offset); let elem_width = rs.parameters[0].width; let arr_size = arr.size * elem_width; if (arr_data as usize + arr_size - 1) > data_end { return Err(MorlocError::Packet("Data is too small to contain array values".into())); } let data_ptr = shm::shmemcpy(arr_data, arr_size)?; if !rs.is_fixed_width() { let elem_schema = &rs.parameters[0]; // Need the C schema for each element let elem_c_schema = (*schema).parameters; if !elem_c_schema.is_null() { let elem_cs = *elem_c_schema; for i in 0..arr.size { upload_packet_inner( data_ptr.add(i * elem_width), arr_data.add(i * elem_width), data_end, elem_cs, elem_schema, )?; } } } arr.data = shm::abs2rel(data_ptr)?; } SerialType::Tuple | SerialType::Map => { for i in 0..rs.parameters.len() { let elem_cs = if (*schema).parameters.is_null() { return Err(MorlocError::Packet("NULL parameters in schema".into())); } else { *(*schema).parameters.add(i) }; upload_packet_inner( dest.add(rs.offsets[i]), data.add(rs.offsets[i]), data_end, elem_cs, &rs.parameters[i], )?; } } _ => { if (data as usize + rs.width - 1) > data_end { return Err(MorlocError::Packet("Given data packet is too small".into())); } ptr::copy_nonoverlapping(data, dest, rs.width); } } Ok(()) } // ── parse_cli_data_argument_singular ───────────────────────────────────────── unsafe fn parse_cli_data_argument_singular( mut dest: *mut u8, arg: *mut c_char, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); extern "C" { fn read_json_with_schema( dest: *mut u8, json: *mut c_char, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut u8; fn file_exists(filename: *const c_char) -> bool; fn read_binary_fd(file: *mut libc::FILE, file_size: *mut usize, errmsg: *mut *mut c_char) -> *mut u8; } let rs = CSchema::to_rust(schema); let mut err: *mut c_char = ptr::null_mut(); let mut fd: *mut libc::FILE = ptr::null_mut(); // handle STDIN let stdin_path = b"/dev/stdin\0"; let dash_path = b"-\0"; if libc::strcmp(arg, stdin_path.as_ptr() as *const c_char) == 0 || libc::strcmp(arg, dash_path.as_ptr() as *const c_char) == 0 { fd = libc::fdopen(libc::STDIN_FILENO, b"rb\0".as_ptr() as *const c_char); } else if file_exists(arg) { fd = libc::fopen(arg, b"rb\0".as_ptr() as *const c_char); if fd.is_null() { set_errmsg(errmsg, &MorlocError::Other( format!("The argument '{}' is a filename, but it can't be read", CStr::from_ptr(arg).to_string_lossy()) )); return ptr::null_mut(); } } if fd.is_null() { // Literal JSON data if dest.is_null() { match shm::shcalloc(1, rs.width) { Ok(p) => dest = p, Err(e) => { set_errmsg(errmsg, &e); return ptr::null_mut(); } } } dest = read_json_with_schema(dest, arg, schema, &mut err); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } return dest; } // File or stdin let mut data_size: usize = 0; let data = read_binary_fd(fd, &mut data_size, &mut err); // Don't close stdin if fd != libc::fdopen(libc::STDIN_FILENO, b"rb\0".as_ptr() as *const c_char) { libc::fclose(fd); } if !err.is_null() { if !data.is_null() { libc::free(data as *mut c_void); } *errmsg = err; return ptr::null_mut(); } // Special case: RPTR packets if data_size >= 32 { let magic = *(data as *const u32); if magic == packet::PACKET_MAGIC { let header = &*(data as *const packet::PacketHeader); let source = header.command.data.source; let format = header.command.data.format; if source == packet::PACKET_SOURCE_RPTR && format == packet::PACKET_FORMAT_VOIDSTAR { if dest.is_null() { match shm::shcalloc(1, rs.width) { Ok(p) => dest = p, Err(e) => { libc::free(data as *mut c_void); set_errmsg(errmsg, &e); return ptr::null_mut(); } } } let voidstar_ptr = data.add(32 + header.offset as usize); if upload_packet(dest, voidstar_ptr, voidstar_ptr as usize + data_size - 1, schema, &mut err) != 0 { libc::free(data as *mut c_void); *errmsg = err; return ptr::null_mut(); } libc::free(data as *mut c_void); return dest; } } } // All other formats: canonical file loader (takes ownership of data) dest = load_morloc_data_file(arg, data, data_size, schema, &mut err) as *mut u8; if !err.is_null() { *errmsg = err; return ptr::null_mut(); } dest } // ── parse_cli_data_argument_unrolled ───────────────────────────────────────── unsafe fn parse_cli_data_argument_unrolled( mut dest: *mut u8, default_value: *mut c_char, fields: *mut *mut c_char, default_fields: *mut *mut c_char, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); let rs = CSchema::to_rust(schema); let mut err: *mut c_char = ptr::null_mut(); let mut using_record_default = false; if dest.is_null() { match shm::shcalloc(1, rs.width) { Ok(p) => dest = p, Err(e) => { set_errmsg(errmsg, &e); return ptr::null_mut(); } } } if !default_value.is_null() { dest = parse_cli_data_argument_singular(dest, default_value, schema, &mut err); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } using_record_default = true; } use crate::schema::SerialType; match rs.serial_type { SerialType::Tuple | SerialType::Map => { for i in 0..rs.parameters.len() { let element_dest = dest.add(rs.offsets[i]); let field_val = *fields.add(i); let elem_cs = *(*schema).parameters.add(i); if !field_val.is_null() { // Free memory from default for this field shfree_by_schema(element_dest as *mut c_void, elem_cs, &mut err); if !err.is_null() { libc::free(err as *mut c_void); err = ptr::null_mut(); } let result = parse_cli_data_argument_singular( element_dest, field_val, elem_cs, &mut err, ); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } let _ = result; // result writes into element_dest } else if using_record_default { continue; } else { let default_field = *default_fields.add(i); if !default_field.is_null() { let result = parse_cli_data_argument_singular( element_dest, default_field, elem_cs, &mut err, ); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } let _ = result; } else { set_errmsg(errmsg, &MorlocError::Other( format!("Field {} missing with no default or default record", i) )); return ptr::null_mut(); } } } } _ => { set_errmsg(errmsg, &MorlocError::Other("Only record and tuple types may be unrolled".into())); return ptr::null_mut(); } } dest } // ── parse_cli_data_argument ────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn parse_cli_data_argument( dest: *mut u8, arg: *const ArgumentT, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); let mut err: *mut c_char = ptr::null_mut(); let result = if (*arg).fields.is_null() { parse_cli_data_argument_singular(dest, (*arg).value, schema, &mut err) } else { parse_cli_data_argument_unrolled( dest, (*arg).value, (*arg).fields, (*arg).default_fields, schema, &mut err, ) }; if !err.is_null() { *errmsg = err; return ptr::null_mut(); } if result.is_null() { return ptr::null_mut(); } let relptr = match shm::abs2rel(result) { Ok(r) => r, Err(e) => { set_errmsg(errmsg, &e); return ptr::null_mut(); } }; // Call the Rust make_standard_data_packet FFI crate::packet_ffi::make_standard_data_packet(relptr, schema) } // ── make_call_packet_from_cli ──────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn make_call_packet_from_cli( dest: *mut u8, mid: u32, args: *mut *mut ArgumentT, // NULL-terminated arg_schema_strs: *mut *mut c_char, // NULL-terminated errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); let mut err: *mut c_char = ptr::null_mut(); // Count and parse schemas let mut nschemas: usize = 0; while !(*arg_schema_strs.add(nschemas)).is_null() { nschemas += 1; } let mut schemas: Vec<*mut CSchema> = Vec::with_capacity(nschemas); for i in 0..nschemas { let schema = crate::ffi::parse_schema(*arg_schema_strs.add(i), &mut err); if !err.is_null() { for s in &schemas { CSchema::free(*s); } *errmsg = err; return ptr::null_mut(); } schemas.push(schema); } // Count args let mut nargs: usize = 0; while !(*args.add(nargs)).is_null() { nargs += 1; } // Parse each argument into a data packet let mut packet_args: Vec<*const u8> = Vec::with_capacity(nargs); for i in 0..nargs { let packet = parse_cli_data_argument(dest, *args.add(i), schemas[i], &mut err); if !err.is_null() { for p in &packet_args { libc::free(*p as *mut c_void); } for s in &schemas { CSchema::free(*s); } *errmsg = err; return ptr::null_mut(); } packet_args.push(packet as *const u8); } // Build call packet let call_packet = crate::packet_ffi::make_morloc_local_call_packet( mid, packet_args.as_ptr(), nargs, &mut err, ); for p in &packet_args { libc::free(*p as *mut c_void); } for s in &schemas { CSchema::free(*s); } if !err.is_null() { *errmsg = err; return ptr::null_mut(); } call_packet } ================================================ FILE: data/rust/morloc-runtime/src/cschema.rs ================================================ //! C-compatible Schema type for FFI. //! This module is always compiled (even with no-ffi-exports feature). use std::ffi::{c_char, CStr, CString}; use std::ptr; use crate::schema::{Schema, SerialType}; /// C-compatible Schema struct matching the C `Schema` layout. #[repr(C)] pub struct CSchema { pub serial_type: u32, pub size: usize, pub width: usize, pub offsets: *mut usize, pub hint: *mut c_char, pub parameters: *mut *mut CSchema, pub keys: *mut *mut c_char, } impl CSchema { pub fn from_rust(schema: &Schema) -> *mut CSchema { let cs = Box::new(CSchema { serial_type: schema.serial_type as u32, size: schema.size, width: schema.width, offsets: if schema.offsets.is_empty() { ptr::null_mut() } else { let mut v = schema.offsets.clone().into_boxed_slice(); let p = v.as_mut_ptr(); std::mem::forget(v); p }, hint: match &schema.hint { Some(s) => CString::new(s.as_str()).unwrap_or_default().into_raw(), None => ptr::null_mut(), }, parameters: if schema.parameters.is_empty() { ptr::null_mut() } else { let mut ptrs: Vec<*mut CSchema> = schema .parameters .iter() .map(|p| CSchema::from_rust(p)) .collect(); let p = ptrs.as_mut_ptr(); std::mem::forget(ptrs); p }, keys: if schema.keys.is_empty() { ptr::null_mut() } else { let mut ptrs: Vec<*mut c_char> = schema .keys .iter() .map(|k| CString::new(k.as_str()).unwrap_or_default().into_raw()) .collect(); let p = ptrs.as_mut_ptr(); std::mem::forget(ptrs); p }, }); Box::into_raw(cs) } /// Convert a C-allocated CSchema to a Rust Schema by deep-copying all data. /// /// # Safety /// `cs` must be null or a valid pointer to a CSchema allocated by `from_rust` /// or equivalent C code. All child pointers must be valid for `cs.size` entries. pub unsafe fn to_rust(cs: *const CSchema) -> Schema { if cs.is_null() { return Schema::primitive(SerialType::Nil); } let cs = &*cs; // SAFETY: SerialType is #[repr(u32)] and cs.serial_type was set from a valid SerialType. let serial_type = std::mem::transmute::(cs.serial_type); let offsets = if cs.offsets.is_null() || cs.size == 0 { Vec::new() } else { let n = match serial_type { SerialType::Tuple | SerialType::Map => cs.size, SerialType::Optional | SerialType::Tensor => 1, _ => 0, }; if n > 0 { std::slice::from_raw_parts(cs.offsets, n).to_vec() } else { Vec::new() } }; let parameters = if cs.parameters.is_null() || cs.size == 0 { Vec::new() } else { (0..cs.size) .map(|i| CSchema::to_rust(*cs.parameters.add(i))) .collect() }; let keys = if cs.keys.is_null() || cs.size == 0 { Vec::new() } else { (0..cs.size) .filter_map(|i| { let p = *cs.keys.add(i); if p.is_null() { None } else { Some(CStr::from_ptr(p).to_string_lossy().into_owned()) } }) .collect() }; let hint = if cs.hint.is_null() { None } else { Some(CStr::from_ptr(cs.hint).to_string_lossy().into_owned()) }; Schema { serial_type, size: cs.size, width: cs.width, offsets, hint, parameters, keys, } } /// Free a CSchema and all its children (same logic as ffi::free_schema). /// /// # Safety /// `schema` must be null or a valid pointer previously returned by `from_rust`. pub unsafe fn free(schema: *mut CSchema) { if schema.is_null() { return; } let cs = Box::from_raw(schema); // SAFETY: cs.serial_type was set from a valid SerialType in from_rust. let st = std::mem::transmute::(cs.serial_type); if !cs.offsets.is_null() { let n = match st { SerialType::Tuple | SerialType::Map => cs.size, SerialType::Optional | SerialType::Tensor => 1, _ => 0, }; if n > 0 { let _ = Vec::from_raw_parts(cs.offsets, n, n); } } if !cs.hint.is_null() { let _ = CString::from_raw(cs.hint); } if !cs.parameters.is_null() && cs.size > 0 { let ptrs = Vec::from_raw_parts(cs.parameters, cs.size, cs.size); for p in ptrs { CSchema::free(p); } } if !cs.keys.is_null() && cs.size > 0 { let ptrs = Vec::from_raw_parts(cs.keys, cs.size, cs.size); for p in ptrs { if !p.is_null() { let _ = CString::from_raw(p); } } } } } ================================================ FILE: data/rust/morloc-runtime/src/daemon_ffi.rs ================================================ //! C ABI wrappers for daemon subsystems. //! Replaces daemon.c. Uses serde_json, HashMap, VecDeque, and std::thread. use std::collections::HashMap; use std::collections::VecDeque; use std::ffi::{c_char, c_void, CStr, CString}; use std::ptr; use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; use std::sync::{Arc, Condvar, Mutex}; use crate::cschema::CSchema; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; use crate::hash; use crate::http_ffi::{DaemonMethod, DaemonRequest, HttpRequest}; // -- Constants ---------------------------------------------------------------- const DEFAULT_XXHASH_SEED: u64 = 0; const MAX_LP_MESSAGE: u32 = 64 * 1024 * 1024; // -- Global state ------------------------------------------------------------- static SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false); static G_EVAL_TIMEOUT: AtomicI32 = AtomicI32::new(30); // SAFETY: These globals are set once during daemon_run initialization (single-threaded) // and only read afterwards. The daemon is single-threaded for request dispatch. static mut G_POOL_ALIVE_FN: Option bool> = None; static mut G_N_POOLS: usize = 0; static mut G_BINDING_STORE: *mut BindingStore = ptr::null_mut(); // -- C-compatible types ------------------------------------------------------- /// Matches morloc_socket_t from call.h #[repr(C)] pub struct MorlocSocket { pub lang: *mut c_char, pub syscmd: *mut *mut c_char, pub socket_filename: *mut c_char, pub pid: i32, } /// Matches daemon_config_t from daemon.h #[repr(C)] pub struct DaemonConfig { pub unix_socket_path: *const c_char, pub tcp_port: i32, pub http_port: i32, pub pool_check_fn: Option, pub pool_alive_fn: Option bool>, pub n_pools: usize, pub eval_timeout: i32, } /// Matches daemon_response_t from daemon.h #[repr(C)] pub struct DaemonResponse { pub id: *mut c_char, pub success: bool, pub result_json: *mut c_char, pub error: *mut c_char, } // -- Binding store (replaces linear-probe hash table with HashMap) ------------ struct BindingEntry { hash: u64, expr: String, #[allow(dead_code)] artifact_dir: String, type_sig: Option, names: Vec, } struct BindingStore { entries: HashMap, /// Index from name -> hash for name-based lookup name_index: HashMap, base_dir: String, } impl BindingStore { fn new(base_dir: &str) -> Self { let _ = std::fs::create_dir_all(base_dir); BindingStore { entries: HashMap::new(), name_index: HashMap::new(), base_dir: base_dir.to_string(), } } fn lookup_hash(&self, hash: u64) -> Option<&BindingEntry> { self.entries.get(&hash) } fn lookup_name(&self, name: &str) -> Option<&BindingEntry> { let hash = self.name_index.get(name)?; self.entries.get(hash) } fn add_name(&mut self, hash: u64, name: &str) { if let Some(entry) = self.entries.get_mut(&hash) { if !entry.names.contains(&name.to_string()) { entry.names.push(name.to_string()); } } self.name_index.insert(name.to_string(), hash); } fn bind(&mut self, expr: &str, name: Option<&str>, eval_timeout: i32) -> Option { let hv = hash::xxh64_with_seed(expr.as_bytes(), DEFAULT_XXHASH_SEED); if self.entries.contains_key(&hv) { if let Some(n) = name { self.add_name(hv, n); } return Some(hv); } let hash_hex = format!("{:016x}", hv); let artifact_dir = format!("{}/{}", self.base_dir, hash_hex); // Fork morloc eval --save unsafe { let mut stdout_pipe = [0i32; 2]; let mut stderr_pipe = [0i32; 2]; if libc::pipe(stdout_pipe.as_mut_ptr()) != 0 || libc::pipe(stderr_pipe.as_mut_ptr()) != 0 { return None; } let pid = libc::fork(); if pid < 0 { libc::close(stdout_pipe[0]); libc::close(stdout_pipe[1]); libc::close(stderr_pipe[0]); libc::close(stderr_pipe[1]); return None; } if pid == 0 { // Child libc::close(stdout_pipe[0]); libc::close(stderr_pipe[0]); libc::dup2(stdout_pipe[1], libc::STDOUT_FILENO); libc::dup2(stderr_pipe[1], libc::STDERR_FILENO); libc::close(stdout_pipe[1]); libc::close(stderr_pipe[1]); if eval_timeout > 0 { let cpu_limit = libc::rlimit { rlim_cur: eval_timeout as libc::rlim_t, rlim_max: (eval_timeout + 5) as libc::rlim_t, }; libc::setrlimit(libc::RLIMIT_CPU, &cpu_limit); let as_limit = libc::rlimit { rlim_cur: 2 * 1024 * 1024 * 1024, rlim_max: 2 * 1024 * 1024 * 1024, }; libc::setrlimit(libc::RLIMIT_AS, &as_limit); } let cmd = CString::new("morloc").unwrap(); let arg_eval = CString::new("eval").unwrap(); let arg_save = CString::new("--save").unwrap(); let arg_hex = CString::new(hash_hex.as_str()).unwrap(); let arg_expr = CString::new(expr).unwrap(); libc::execlp( cmd.as_ptr(), cmd.as_ptr(), arg_eval.as_ptr(), arg_save.as_ptr(), arg_hex.as_ptr(), arg_expr.as_ptr(), ptr::null::(), ); libc::_exit(127); } // Parent libc::close(stdout_pipe[1]); libc::close(stderr_pipe[1]); let mut stderr_buf = vec![0u8; 4096]; let mut stderr_len: usize = 0; loop { let n = libc::read( stderr_pipe[0], stderr_buf.as_mut_ptr().add(stderr_len) as *mut c_void, stderr_buf.len() - stderr_len - 1, ); if n <= 0 { break; } stderr_len += n as usize; } libc::close(stdout_pipe[0]); libc::close(stderr_pipe[0]); let mut status: i32 = 0; libc::waitpid(pid, &mut status, 0); if !libc::WIFEXITED(status) || libc::WEXITSTATUS(status) != 0 { stderr_buf.truncate(stderr_len); let msg = String::from_utf8_lossy(&stderr_buf); eprintln!("binding_store_bind: morloc eval --save failed: {}", msg); return None; } } let entry = BindingEntry { hash: hv, expr: expr.to_string(), artifact_dir, type_sig: None, names: Vec::new(), }; self.entries.insert(hv, entry); if let Some(n) = name { self.add_name(hv, n); } Some(hv) } fn list_json(&self) -> String { #[derive(serde::Serialize)] struct BindingInfo { hash: String, expr: String, #[serde(skip_serializing_if = "Option::is_none")] r#type: Option, names: Vec, } #[derive(serde::Serialize)] struct BindingsList { bindings: Vec, } let bindings: Vec = self .entries .values() .map(|e| BindingInfo { hash: format!("{:016x}", e.hash), expr: e.expr.clone(), r#type: e.type_sig.clone(), names: e.names.clone(), }) .collect(); serde_json::to_string(&BindingsList { bindings }).unwrap_or_default() } fn unbind(&mut self, name: &str) -> bool { let hash = match self.name_index.remove(name) { Some(h) => h, None => return false, }; if let Some(entry) = self.entries.get_mut(&hash) { entry.names.retain(|n| n != name); } true } } // -- C-exported binding store functions --------------------------------------- #[no_mangle] pub unsafe extern "C" fn binding_store_init(base_dir: *const c_char) -> *mut c_void { let dir = CStr::from_ptr(base_dir).to_string_lossy().into_owned(); let store = Box::new(BindingStore::new(&dir)); Box::into_raw(store) as *mut c_void } #[no_mangle] pub unsafe extern "C" fn binding_store_free(store: *mut c_void) { if !store.is_null() { drop(Box::from_raw(store as *mut BindingStore)); } } // -- Request parsing (serde_json) --------------------------------------------- #[derive(serde::Deserialize)] struct JsonRequest { id: Option, method: Option, command: Option, args: Option, expr: Option, name: Option, } #[no_mangle] pub unsafe extern "C" fn daemon_parse_request( json: *const c_char, len: usize, errmsg: *mut *mut c_char, ) -> *mut DaemonRequest { clear_errmsg(errmsg); let slice = std::slice::from_raw_parts(json as *const u8, len); let text = match std::str::from_utf8(slice) { Ok(s) => s, Err(_) => { set_errmsg(errmsg, &MorlocError::Other("Invalid UTF-8 in request".into())); return ptr::null_mut(); } }; let parsed: JsonRequest = match serde_json::from_str(text) { Ok(r) => r, Err(e) => { set_errmsg( errmsg, &MorlocError::Other(format!("Failed to parse request JSON: {}", e)), ); return ptr::null_mut(); } }; let req = libc::calloc(1, std::mem::size_of::()) as *mut DaemonRequest; if req.is_null() { set_errmsg( errmsg, &MorlocError::Other("Failed to allocate daemon_request_t".into()), ); return ptr::null_mut(); } if let Some(id) = &parsed.id { let c = CString::new(id.as_str()).unwrap_or_default(); (*req).id = libc::strdup(c.as_ptr()); } if let Some(method) = &parsed.method { (*req).method = match method.as_str() { "call" => DaemonMethod::Call, "discover" => DaemonMethod::Discover, "health" => DaemonMethod::Health, "eval" => DaemonMethod::Eval, "typecheck" => DaemonMethod::Typecheck, "bind" => DaemonMethod::Bind, "bindings" => DaemonMethod::Bindings, "unbind" => DaemonMethod::Unbind, _ => { daemon_free_request(req); set_errmsg( errmsg, &MorlocError::Other(format!("Unknown method: {}", method)), ); return ptr::null_mut(); } }; } if let Some(cmd) = &parsed.command { let c = CString::new(cmd.as_str()).unwrap_or_default(); (*req).command = libc::strdup(c.as_ptr()); } if let Some(args) = &parsed.args { let args_str = serde_json::to_string(args).unwrap_or_default(); let c = CString::new(args_str).unwrap_or_default(); (*req).args_json = libc::strdup(c.as_ptr()); } if let Some(expr) = &parsed.expr { let c = CString::new(expr.as_str()).unwrap_or_default(); (*req).expr = libc::strdup(c.as_ptr()); } if let Some(name) = &parsed.name { let c = CString::new(name.as_str()).unwrap_or_default(); (*req).name = libc::strdup(c.as_ptr()); } req } // -- Response parsing (serde_json) -------------------------------------------- #[derive(serde::Deserialize)] struct JsonResponse { id: Option, status: Option, result: Option, error: Option, } #[no_mangle] pub unsafe extern "C" fn daemon_parse_response( json: *const c_char, len: usize, errmsg: *mut *mut c_char, ) -> *mut DaemonResponse { clear_errmsg(errmsg); let slice = std::slice::from_raw_parts(json as *const u8, len); let text = match std::str::from_utf8(slice) { Ok(s) => s, Err(_) => { set_errmsg(errmsg, &MorlocError::Other("Invalid UTF-8 in response".into())); return ptr::null_mut(); } }; let parsed: JsonResponse = match serde_json::from_str(text) { Ok(r) => r, Err(e) => { set_errmsg( errmsg, &MorlocError::Other(format!("Failed to parse response JSON: {}", e)), ); return ptr::null_mut(); } }; let resp = libc::calloc(1, std::mem::size_of::()) as *mut DaemonResponse; if resp.is_null() { set_errmsg( errmsg, &MorlocError::Other("Failed to allocate daemon_response_t".into()), ); return ptr::null_mut(); } if let Some(id) = &parsed.id { let c = CString::new(id.as_str()).unwrap_or_default(); (*resp).id = libc::strdup(c.as_ptr()); } (*resp).success = parsed .status .as_deref() .map(|s| s == "ok") .unwrap_or(false); if let Some(result) = &parsed.result { let s = serde_json::to_string(result).unwrap_or_default(); let c = CString::new(s).unwrap_or_default(); (*resp).result_json = libc::strdup(c.as_ptr()); } if let Some(error) = &parsed.error { let c = CString::new(error.as_str()).unwrap_or_default(); (*resp).error = libc::strdup(c.as_ptr()); } resp } // -- Free functions ----------------------------------------------------------- #[no_mangle] pub unsafe extern "C" fn daemon_free_request(req: *mut DaemonRequest) { if req.is_null() { return; } if !(*req).id.is_null() { libc::free((*req).id as *mut c_void); } if !(*req).command.is_null() { libc::free((*req).command as *mut c_void); } if !(*req).args_json.is_null() { libc::free((*req).args_json as *mut c_void); } if !(*req).expr.is_null() { libc::free((*req).expr as *mut c_void); } if !(*req).name.is_null() { libc::free((*req).name as *mut c_void); } libc::free(req as *mut c_void); } #[no_mangle] pub unsafe extern "C" fn daemon_free_response(resp: *mut DaemonResponse) { if resp.is_null() { return; } if !(*resp).id.is_null() { libc::free((*resp).id as *mut c_void); } if !(*resp).result_json.is_null() { libc::free((*resp).result_json as *mut c_void); } if !(*resp).error.is_null() { libc::free((*resp).error as *mut c_void); } libc::free(resp as *mut c_void); } // -- Response serialization (serde_json) -------------------------------------- #[no_mangle] pub unsafe extern "C" fn daemon_serialize_response( response: *mut DaemonResponse, out_len: *mut usize, ) -> *mut c_char { let mut map = serde_json::Map::new(); if !(*response).id.is_null() { let id = CStr::from_ptr((*response).id).to_string_lossy(); map.insert("id".into(), serde_json::Value::String(id.into_owned())); } map.insert( "status".into(), serde_json::Value::String( if (*response).success { "ok" } else { "error" }.into(), ), ); if (*response).success && !(*response).result_json.is_null() { let raw = CStr::from_ptr((*response).result_json).to_string_lossy(); // Try to parse as JSON value; if it fails, store as raw string match serde_json::from_str::(&raw) { Ok(v) => { map.insert("result".into(), v); } Err(_) => { map.insert("result".into(), serde_json::Value::String(raw.into_owned())); } } } if !(*response).success && !(*response).error.is_null() { let err = CStr::from_ptr((*response).error).to_string_lossy(); map.insert("error".into(), serde_json::Value::String(err.into_owned())); } let json_str = serde_json::to_string(&map).unwrap_or_else(|_| "{}".into()); if !out_len.is_null() { *out_len = json_str.len(); } let c = CString::new(json_str).unwrap_or_default(); libc::strdup(c.as_ptr()) } // -- Discovery ---------------------------------------------------------------- #[no_mangle] pub unsafe extern "C" fn daemon_build_discovery(manifest: *mut c_void) -> *mut c_char { extern "C" { fn manifest_to_discovery_json(manifest: *const c_void) -> *mut c_char; } manifest_to_discovery_json(manifest) } // -- Eval timeout ------------------------------------------------------------- #[no_mangle] pub extern "C" fn daemon_set_eval_timeout(timeout_sec: i32) { let t = if timeout_sec > 0 { timeout_sec } else { 30 }; G_EVAL_TIMEOUT.store(t, Ordering::Relaxed); } // -- Fork-based eval/typecheck ------------------------------------------------ /// Fork `morloc `, capture stdout/stderr, return a DaemonResponse. unsafe fn fork_morloc_command(subcmd: &str, expr: *const c_char) -> *mut DaemonResponse { let resp = libc::calloc(1, std::mem::size_of::()) as *mut DaemonResponse; let mut stdout_pipe = [0i32; 2]; let mut stderr_pipe = [0i32; 2]; if libc::pipe(stdout_pipe.as_mut_ptr()) != 0 || libc::pipe(stderr_pipe.as_mut_ptr()) != 0 { (*resp).success = false; let c = CString::new(format!("Failed to create pipes for {}", subcmd)).unwrap_or_default(); (*resp).error = libc::strdup(c.as_ptr()); return resp; } let pid = libc::fork(); if pid < 0 { (*resp).success = false; let c = CString::new(format!("Failed to fork for {}", subcmd)).unwrap_or_default(); (*resp).error = libc::strdup(c.as_ptr()); libc::close(stdout_pipe[0]); libc::close(stdout_pipe[1]); libc::close(stderr_pipe[0]); libc::close(stderr_pipe[1]); return resp; } if pid == 0 { // Child libc::close(stdout_pipe[0]); libc::close(stderr_pipe[0]); libc::dup2(stdout_pipe[1], libc::STDOUT_FILENO); libc::dup2(stderr_pipe[1], libc::STDERR_FILENO); libc::close(stdout_pipe[1]); libc::close(stderr_pipe[1]); let timeout = G_EVAL_TIMEOUT.load(Ordering::Relaxed); if timeout > 0 { let cpu_limit = libc::rlimit { rlim_cur: timeout as libc::rlim_t, rlim_max: (timeout + 5) as libc::rlim_t, }; libc::setrlimit(libc::RLIMIT_CPU, &cpu_limit); let as_limit = libc::rlimit { rlim_cur: 2 * 1024 * 1024 * 1024, rlim_max: 2 * 1024 * 1024 * 1024, }; libc::setrlimit(libc::RLIMIT_AS, &as_limit); } let cmd = CString::new("morloc").unwrap(); let arg_subcmd = CString::new(subcmd).unwrap(); libc::execlp( cmd.as_ptr(), cmd.as_ptr(), arg_subcmd.as_ptr(), expr, ptr::null::(), ); libc::_exit(127); } // Parent libc::close(stdout_pipe[1]); libc::close(stderr_pipe[1]); let stdout_buf = read_fd_to_vec(stdout_pipe[0]); libc::close(stdout_pipe[0]); let stderr_buf = read_fd_to_vec(stderr_pipe[0]); libc::close(stderr_pipe[0]); let mut status: i32 = 0; libc::waitpid(pid, &mut status, 0); if libc::WIFEXITED(status) && libc::WEXITSTATUS(status) == 0 { let mut out = String::from_utf8_lossy(&stdout_buf).into_owned(); // Trim trailing newlines while out.ends_with('\n') || out.ends_with('\r') { out.pop(); } (*resp).success = true; let c = CString::new(out).unwrap_or_default(); (*resp).result_json = libc::strdup(c.as_ptr()); } else { (*resp).success = false; let errmsg = if !stderr_buf.is_empty() { String::from_utf8_lossy(&stderr_buf).into_owned() } else if libc::WIFSIGNALED(status) { format!("morloc {} killed by signal {}", subcmd, libc::WTERMSIG(status)) } else { let code = if libc::WIFEXITED(status) { libc::WEXITSTATUS(status) } else { -1 }; format!("morloc {} exited with code {}", subcmd, code) }; let c = CString::new(errmsg).unwrap_or_default(); (*resp).error = libc::strdup(c.as_ptr()); } resp } /// Read an fd to completion into a Vec. unsafe fn read_fd_to_vec(fd: i32) -> Vec { let mut buf = Vec::with_capacity(65536); let mut tmp = [0u8; 8192]; loop { let n = libc::read(fd, tmp.as_mut_ptr() as *mut c_void, tmp.len()); if n <= 0 { break; } buf.extend_from_slice(&tmp[..n as usize]); } buf } // -- Dispatch ----------------------------------------------------------------- #[no_mangle] pub unsafe extern "C" fn daemon_dispatch( manifest: *mut c_void, request: *mut DaemonRequest, sockets: *mut MorlocSocket, _shm_basename: *const c_char, ) -> *mut DaemonResponse { let resp = libc::calloc(1, std::mem::size_of::()) as *mut DaemonResponse; // Echo request id if !(*request).id.is_null() { (*resp).id = libc::strdup((*request).id); } match (*request).method { DaemonMethod::Health => { (*resp).success = true; if let Some(alive_fn) = G_POOL_ALIVE_FN { let mut arr = Vec::with_capacity(G_N_POOLS); for i in 0..G_N_POOLS { arr.push(serde_json::Value::Bool(alive_fn(i))); } let json = serde_json::to_string(&arr).unwrap_or_default(); let c = CString::new(json).unwrap_or_default(); (*resp).result_json = libc::strdup(c.as_ptr()); } return resp; } DaemonMethod::Discover => { (*resp).success = true; (*resp).result_json = daemon_build_discovery(manifest); return resp; } DaemonMethod::Eval => { if (*request).expr.is_null() { (*resp).success = false; let c = CString::new("Missing 'expr' field in eval request").unwrap(); (*resp).error = libc::strdup(c.as_ptr()); return resp; } // Check binding store for cached expression if !G_BINDING_STORE.is_null() { let expr_str = CStr::from_ptr((*request).expr).to_string_lossy(); let store = &*G_BINDING_STORE; let hv = hash::xxh64_with_seed(expr_str.as_bytes(), DEFAULT_XXHASH_SEED); let _cached = store .lookup_hash(hv) .or_else(|| store.lookup_name(&expr_str)); // TODO: direct binary execution for bound functions } let eval_resp = fork_morloc_command("eval", (*request).expr); if !(*request).id.is_null() { (*eval_resp).id = libc::strdup((*request).id); } libc::free(resp as *mut c_void); return eval_resp; } DaemonMethod::Typecheck => { if (*request).expr.is_null() { (*resp).success = false; let c = CString::new("Missing 'expr' field in typecheck request").unwrap(); (*resp).error = libc::strdup(c.as_ptr()); return resp; } let tc_resp = fork_morloc_command("typecheck", (*request).expr); if !(*request).id.is_null() { (*tc_resp).id = libc::strdup((*request).id); } libc::free(resp as *mut c_void); return tc_resp; } DaemonMethod::Bind => { if (*request).expr.is_null() { (*resp).success = false; let c = CString::new("Missing 'expr' field in bind request").unwrap(); (*resp).error = libc::strdup(c.as_ptr()); return resp; } if G_BINDING_STORE.is_null() { (*resp).success = false; let c = CString::new("Binding store not initialized").unwrap(); (*resp).error = libc::strdup(c.as_ptr()); return resp; } let store = &mut *G_BINDING_STORE; let expr_str = CStr::from_ptr((*request).expr).to_string_lossy().into_owned(); let name = if (*request).name.is_null() { None } else { Some(CStr::from_ptr((*request).name).to_string_lossy().into_owned()) }; let timeout = G_EVAL_TIMEOUT.load(Ordering::Relaxed); match store.bind(&expr_str, name.as_deref(), timeout) { Some(hv) => { let mut map = serde_json::Map::new(); map.insert( "hash".into(), serde_json::Value::String(format!("{:016x}", hv)), ); map.insert("expr".into(), serde_json::Value::String(expr_str)); if let Some(n) = &name { map.insert("name".into(), serde_json::Value::String(n.clone())); } if let Some(entry) = store.lookup_hash(hv) { if let Some(ref ts) = entry.type_sig { map.insert("type".into(), serde_json::Value::String(ts.clone())); } } let json = serde_json::to_string(&map).unwrap_or_default(); (*resp).success = true; let c = CString::new(json).unwrap_or_default(); (*resp).result_json = libc::strdup(c.as_ptr()); } None => { (*resp).success = false; let c = CString::new("Failed to compile and bind expression").unwrap_or_default(); (*resp).error = libc::strdup(c.as_ptr()); } } return resp; } DaemonMethod::Bindings => { (*resp).success = true; if G_BINDING_STORE.is_null() { let c = CString::new("{\"bindings\":[]}").unwrap(); (*resp).result_json = libc::strdup(c.as_ptr()); } else { let store = &*G_BINDING_STORE; let json = store.list_json(); let c = CString::new(json).unwrap_or_default(); (*resp).result_json = libc::strdup(c.as_ptr()); } return resp; } DaemonMethod::Unbind => { let name_ptr = if !(*request).command.is_null() { (*request).command } else { (*request).name }; if name_ptr.is_null() { (*resp).success = false; let c = CString::new("Missing binding name").unwrap(); (*resp).error = libc::strdup(c.as_ptr()); return resp; } if G_BINDING_STORE.is_null() { (*resp).success = false; let c = CString::new("Binding store not initialized").unwrap(); (*resp).error = libc::strdup(c.as_ptr()); return resp; } let store = &mut *G_BINDING_STORE; let name = CStr::from_ptr(name_ptr).to_string_lossy(); if store.unbind(&name) { (*resp).success = true; let c = CString::new("{\"removed\":true}").unwrap(); (*resp).result_json = libc::strdup(c.as_ptr()); } else { (*resp).success = false; let c = CString::new(format!("Binding not found: {}", name)).unwrap_or_default(); (*resp).error = libc::strdup(c.as_ptr()); } return resp; } DaemonMethod::Call => { // Fall through to call dispatch below } } // DAEMON_CALL if (*request).command.is_null() { (*resp).success = false; let c = CString::new("Missing 'command' field in call request").unwrap(); (*resp).error = libc::strdup(c.as_ptr()); return resp; } // Delegate to the C functions that handle manifest lookup, arg parsing, // schema handling, and pool communication. These are all already ported // to Rust in other _ffi modules, so we declare them as extern "C". extern "C" { fn parse_schema(schema: *const c_char, errmsg: *mut *mut c_char) -> *mut CSchema; fn free_schema(schema: *mut CSchema); fn initialize_positional(value: *mut c_char) -> *mut c_void; fn free_argument_t(arg: *mut c_void); fn parse_cli_data_argument( dest: *mut u8, arg: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut u8; fn make_call_packet_from_cli( dest: *mut u8, mid: u32, args: *mut *mut c_void, arg_schema_strs: *mut *mut c_char, errmsg: *mut *mut c_char, ) -> *mut u8; fn send_and_receive_over_socket( socket_path: *const c_char, packet: *const u8, errmsg: *mut *mut c_char, ) -> *mut u8; fn get_morloc_data_packet_error_message( data: *const u8, errmsg: *mut *mut c_char, ) -> *mut c_char; fn get_morloc_data_packet_value( data: *const u8, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut u8; fn voidstar_to_json_string( data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut c_char; fn morloc_eval( expr: *mut c_void, // actually *mut MorlocExpression return_schema: *mut CSchema, arg_voidstar: *mut *mut u8, arg_schemas: *mut *mut CSchema, nargs: usize, errmsg: *mut *mut c_char, ) -> *mut u8; } // The manifest is the canonical v2 C struct from manifest_ffi.rs. // No local mirror needed -- import the real type and walk it. use crate::manifest_ffi::{Manifest as ManifestC, ManifestArgKind, ManifestCommand}; let mv = manifest as *const ManifestC; let command_name = CStr::from_ptr((*request).command); let mut cmd: *const ManifestCommand = ptr::null(); for i in 0..(*mv).n_commands { let c = &*(*mv).commands.add(i); if CStr::from_ptr(c.name) == command_name { cmd = c; break; } } if cmd.is_null() { (*resp).success = false; let msg = format!( "Unknown command: {}", command_name.to_string_lossy() ); let c = CString::new(msg).unwrap_or_default(); (*resp).error = libc::strdup(c.as_ptr()); return resp; } let cmd = &*cmd; let expected_nargs = cmd.n_args; // Parse JSON args into argument_t** array let mut err: *mut c_char = ptr::null_mut(); let args: *mut *mut c_void; if !(*request).args_json.is_null() { // Parse the JSON array let args_str = CStr::from_ptr((*request).args_json).to_string_lossy(); let parsed_args: Vec = match serde_json::from_str(&args_str) { Ok(v) => v, Err(e) => { (*resp).success = false; let c = CString::new(format!("Failed to parse args: {}", e)).unwrap_or_default(); (*resp).error = libc::strdup(c.as_ptr()); return resp; } }; if parsed_args.len() != expected_nargs { (*resp).success = false; let c = CString::new(format!( "Expected {} arguments, got {}", expected_nargs, parsed_args.len() )) .unwrap_or_default(); (*resp).error = libc::strdup(c.as_ptr()); return resp; } args = libc::calloc(expected_nargs + 1, std::mem::size_of::<*mut c_void>()) as *mut *mut c_void; for (i, val) in parsed_args.iter().enumerate() { let val_str = match val { serde_json::Value::String(s) => format!("\"{}\"", s), other => other.to_string(), }; let c = CString::new(val_str).unwrap_or_default(); let dup = libc::strdup(c.as_ptr()); *args.add(i) = initialize_positional(dup); libc::free(dup as *mut c_void); } *args.add(expected_nargs) = ptr::null_mut(); } else { if expected_nargs > 0 { // Check if any are positional (required) // For simplicity, match the C behavior: require args if n_args > 0 (*resp).success = false; let c = CString::new("Missing 'args' field in call request").unwrap(); (*resp).error = libc::strdup(c.as_ptr()); return resp; } args = libc::calloc(1, std::mem::size_of::<*mut c_void>()) as *mut *mut c_void; *args = ptr::null_mut(); } if cmd.is_pure { // Pure command: evaluate expression tree let mut nargs: usize = 0; while !(*args.add(nargs)).is_null() { nargs += 1; } // v2: schemas live on each ManifestArg. Walk cmd.args in // declaration order, INCLUDING flags (they consume an arg // slot in the parsed list and need a corresponding schema // entry to keep alignment). For flags, fall back to the // boolean schema "b". static FLAG_SCHEMA: &[u8] = b"b\0"; let mut arg_schema_strs: Vec<*mut c_char> = Vec::with_capacity(nargs); for i in 0..cmd.n_args { let a = &*cmd.args.add(i); let s = if a.kind == ManifestArgKind::Flag || a.schema.is_null() { FLAG_SCHEMA.as_ptr() as *mut c_char } else { a.schema }; arg_schema_strs.push(s); } let arg_schemas_arr = libc::calloc(nargs, std::mem::size_of::<*mut CSchema>()) as *mut *mut CSchema; let arg_packets = libc::calloc(nargs, std::mem::size_of::<*mut u8>()) as *mut *mut u8; let arg_voidstars = libc::calloc(nargs, std::mem::size_of::<*mut u8>()) as *mut *mut u8; let mut cleanup_and_fail = false; for i in 0..nargs { let schema_str = arg_schema_strs.get(i).copied().unwrap_or(ptr::null_mut()); *arg_schemas_arr.add(i) = parse_schema(schema_str, &mut err); if !err.is_null() { (*resp).success = false; (*resp).error = err; cleanup_and_fail = true; break; } *arg_packets.add(i) = parse_cli_data_argument( ptr::null_mut(), *args.add(i), *arg_schemas_arr.add(i), &mut err, ); if !err.is_null() { (*resp).success = false; (*resp).error = err; cleanup_and_fail = true; break; } *arg_voidstars.add(i) = get_morloc_data_packet_value( *arg_packets.add(i), *arg_schemas_arr.add(i), &mut err, ); if !err.is_null() { (*resp).success = false; (*resp).error = err; cleanup_and_fail = true; break; } } if !cleanup_and_fail { let return_schema = parse_schema(cmd.ret.schema, &mut err); if !err.is_null() { (*resp).success = false; (*resp).error = err; } else { let result_abs = morloc_eval( cmd.expr as *mut c_void, return_schema, arg_voidstars, arg_schemas_arr, nargs, &mut err, ); if !err.is_null() { (*resp).success = false; (*resp).error = err; } else { let json = voidstar_to_json_string( result_abs as *const c_void, return_schema as *const CSchema, &mut err, ); if !err.is_null() { (*resp).success = false; (*resp).error = err; } else { (*resp).success = true; (*resp).result_json = json; } } free_schema(return_schema); } } // Cleanup for i in 0..nargs { let s = *arg_schemas_arr.add(i); if !s.is_null() { free_schema(s); } let p = *arg_packets.add(i); if !p.is_null() { libc::free(p as *mut c_void); } } libc::free(arg_schemas_arr as *mut c_void); libc::free(arg_packets as *mut c_void); libc::free(arg_voidstars as *mut c_void); } else { // Remote command: send call packet to pool. v2 stores schemas // per-arg, but make_call_packet_from_cli wants a NULL-terminated // flat array. ManifestCommand exposes a helper that materializes // the flat view; the outer pointer array is owned by us and // freed below, but the inner C strings remain owned by the // ManifestArg objects. let arg_schemas_flat = cmd.build_arg_schemas_array(); let call_packet = make_call_packet_from_cli( ptr::null_mut(), cmd.mid, args, arg_schemas_flat, &mut err, ); libc::free(arg_schemas_flat as *mut c_void); if !err.is_null() { (*resp).success = false; (*resp).error = err; } else { let socket_path = (*sockets.add(cmd.pool_index)).socket_filename; let result_packet = send_and_receive_over_socket(socket_path, call_packet, &mut err); libc::free(call_packet as *mut c_void); if !err.is_null() { (*resp).success = false; (*resp).error = err; } else { let packet_error = get_morloc_data_packet_error_message(result_packet, &mut err); if !packet_error.is_null() { (*resp).success = false; (*resp).error = libc::strdup(packet_error); libc::free(result_packet as *mut c_void); } else if !err.is_null() { (*resp).success = false; (*resp).error = err; libc::free(result_packet as *mut c_void); } else { let return_schema = parse_schema(cmd.ret.schema, &mut err); if !err.is_null() { (*resp).success = false; (*resp).error = err; libc::free(result_packet as *mut c_void); } else { let packet_value = get_morloc_data_packet_value( result_packet, return_schema as *const CSchema, &mut err, ); if !err.is_null() { (*resp).success = false; (*resp).error = err; } else { let json = voidstar_to_json_string( packet_value as *const c_void, return_schema as *const CSchema, &mut err, ); if !err.is_null() { (*resp).success = false; (*resp).error = err; } else { (*resp).success = true; (*resp).result_json = json; } } free_schema(return_schema); libc::free(result_packet as *mut c_void); } } } } } // Free args let mut i = 0; while !(*args.add(i)).is_null() { free_argument_t(*args.add(i)); i += 1; } libc::free(args as *mut c_void); resp } // -- Length-prefixed message protocol ----------------------------------------- unsafe fn read_lp_message( fd: i32, out_len: *mut usize, errmsg: *mut *mut c_char, ) -> *mut c_char { clear_errmsg(errmsg); let mut len_buf = [0u8; 4]; let n = libc::recv( fd, len_buf.as_mut_ptr() as *mut c_void, 4, libc::MSG_WAITALL, ); if n != 4 { set_errmsg( errmsg, &MorlocError::Other("Failed to read message length prefix".into()), ); return ptr::null_mut(); } let msg_len = ((len_buf[0] as u32) << 24) | ((len_buf[1] as u32) << 16) | ((len_buf[2] as u32) << 8) | (len_buf[3] as u32); if msg_len > MAX_LP_MESSAGE { set_errmsg( errmsg, &MorlocError::Other(format!("Message too large: {} bytes", msg_len)), ); return ptr::null_mut(); } let msg = libc::malloc(msg_len as usize + 1) as *mut c_char; if msg.is_null() { set_errmsg( errmsg, &MorlocError::Other("Failed to allocate message buffer".into()), ); return ptr::null_mut(); } let mut total: usize = 0; while total < msg_len as usize { let n = libc::recv( fd, msg.add(total) as *mut c_void, msg_len as usize - total, 0, ); if n <= 0 { libc::free(msg as *mut c_void); set_errmsg( errmsg, &MorlocError::Other(format!( "Failed to read message body (got {} of {} bytes)", total, msg_len )), ); return ptr::null_mut(); } total += n as usize; } *msg.add(msg_len as usize) = 0; if !out_len.is_null() { *out_len = msg_len as usize; } msg } unsafe fn write_lp_message( fd: i32, data: *const c_char, len: usize, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); let len_buf: [u8; 4] = [ ((len >> 24) & 0xFF) as u8, ((len >> 16) & 0xFF) as u8, ((len >> 8) & 0xFF) as u8, (len & 0xFF) as u8, ]; let n = libc::send( fd, len_buf.as_ptr() as *const c_void, 4, crate::utility::SEND_NOSIGNAL, ); if n != 4 { set_errmsg( errmsg, &MorlocError::Other("Failed to write message length prefix".into()), ); return false; } let mut total: usize = 0; while total < len { let n = libc::send( fd, (data as *const u8).add(total) as *const c_void, len - total, crate::utility::SEND_NOSIGNAL, ); if n <= 0 { set_errmsg( errmsg, &MorlocError::Other("Failed to write message body".into()), ); return false; } total += n as usize; } true } // -- Connection handlers ------------------------------------------------------ unsafe fn handle_lp_connection( client_fd: i32, manifest: *mut c_void, sockets: *mut MorlocSocket, shm_basename: *const c_char, ) { let mut errmsg: *mut c_char = ptr::null_mut(); let mut msg_len: usize = 0; // Peek to distinguish a probe connection (immediate EOF) from a real // client. The router's readiness check connects then closes without // sending data; silently ignore those. let mut peek_buf = [0u8; 1]; let peek_n = libc::recv(client_fd, peek_buf.as_mut_ptr() as *mut c_void, 1, libc::MSG_PEEK); if peek_n == 0 { // Clean EOF — probe connection, silently close. libc::close(client_fd); return; } let msg = read_lp_message(client_fd, &mut msg_len, &mut errmsg); if !errmsg.is_null() { let err_str = CStr::from_ptr(errmsg).to_string_lossy(); eprintln!("daemon: read error: {}", err_str); libc::free(errmsg as *mut c_void); libc::close(client_fd); return; } let req = daemon_parse_request(msg, msg_len, &mut errmsg); libc::free(msg as *mut c_void); if !errmsg.is_null() { let mut err_resp: DaemonResponse = std::mem::zeroed(); err_resp.success = false; err_resp.error = errmsg; let mut resp_len: usize = 0; let resp_json = daemon_serialize_response(&mut err_resp, &mut resp_len); let mut write_err: *mut c_char = ptr::null_mut(); write_lp_message(client_fd, resp_json, resp_len, &mut write_err); libc::free(resp_json as *mut c_void); if !write_err.is_null() { libc::free(write_err as *mut c_void); } libc::free(errmsg as *mut c_void); libc::close(client_fd); return; } let resp = daemon_dispatch(manifest, req, sockets, shm_basename); let mut resp_len: usize = 0; let resp_json = daemon_serialize_response(resp, &mut resp_len); let mut write_err: *mut c_char = ptr::null_mut(); write_lp_message(client_fd, resp_json, resp_len, &mut write_err); if !write_err.is_null() { let err_str = CStr::from_ptr(write_err).to_string_lossy(); eprintln!("daemon: write error: {}", err_str); libc::free(write_err as *mut c_void); } libc::free(resp_json as *mut c_void); daemon_free_request(req); daemon_free_response(resp); libc::close(client_fd); } unsafe fn handle_http_connection( client_fd: i32, manifest: *mut c_void, sockets: *mut MorlocSocket, shm_basename: *const c_char, ) { extern "C" { fn http_parse_request(fd: i32, errmsg: *mut *mut c_char) -> *mut HttpRequest; fn http_free_request(req: *mut HttpRequest); fn http_write_response( fd: i32, status: i32, content_type: *const c_char, body: *const c_char, body_len: usize, ) -> bool; fn http_to_daemon_request( req: *mut HttpRequest, errmsg: *mut *mut c_char, ) -> *mut DaemonRequest; } let mut errmsg: *mut c_char = ptr::null_mut(); let http_req = http_parse_request(client_fd, &mut errmsg); if !errmsg.is_null() { let body = b"{\"status\":\"error\",\"error\":\"Bad request\"}\0"; let ct = b"application/json\0"; http_write_response( client_fd, 400, ct.as_ptr() as *const c_char, body.as_ptr() as *const c_char, body.len() - 1, ); libc::free(errmsg as *mut c_void); libc::close(client_fd); return; } let req = http_to_daemon_request(http_req, &mut errmsg); if !errmsg.is_null() { let body = b"{\"status\":\"error\",\"error\":\"Invalid request\"}\0"; let ct = b"application/json\0"; http_write_response( client_fd, 400, ct.as_ptr() as *const c_char, body.as_ptr() as *const c_char, body.len() - 1, ); http_free_request(http_req); libc::free(errmsg as *mut c_void); libc::close(client_fd); return; } http_free_request(http_req); let resp = daemon_dispatch(manifest, req, sockets, shm_basename); let mut resp_len: usize = 0; let resp_json = daemon_serialize_response(resp, &mut resp_len); // Append newline for terminal-friendly output let resp_body = libc::malloc(resp_len + 2) as *mut u8; ptr::copy_nonoverlapping(resp_json as *const u8, resp_body, resp_len); *resp_body.add(resp_len) = b'\n'; *resp_body.add(resp_len + 1) = 0; let status = if (*resp).success { 200 } else { 500 }; let ct = b"application/json\0"; http_write_response( client_fd, status, ct.as_ptr() as *const c_char, resp_body as *const c_char, resp_len + 1, ); libc::free(resp_body as *mut c_void); libc::free(resp_json as *mut c_void); daemon_free_request(req); daemon_free_response(resp); libc::close(client_fd); } // -- Thread pool (VecDeque + Condvar instead of linked list + pthread) -------- #[derive(Clone, Copy)] struct DaemonJob { client_fd: i32, conn_type: i32, // 0 = length-prefixed (unix/tcp), 2 = http } struct JobQueue { jobs: VecDeque, } struct WorkerContext { queue: Mutex, cond: Condvar, manifest: *mut c_void, sockets: *mut MorlocSocket, shm_basename: *const c_char, } // SAFETY: WorkerContext is shared between threads but all raw pointers // within it point to read-only or thread-safe C data. unsafe impl Send for WorkerContext {} unsafe impl Sync for WorkerContext {} fn set_socket_timeouts(fd: i32, timeout_sec: i32) { unsafe { let tv = libc::timeval { tv_sec: timeout_sec as _, tv_usec: 0, }; libc::setsockopt( fd, libc::SOL_SOCKET, libc::SO_RCVTIMEO, &tv as *const libc::timeval as *const c_void, std::mem::size_of::() as libc::socklen_t, ); libc::setsockopt( fd, libc::SOL_SOCKET, libc::SO_SNDTIMEO, &tv as *const libc::timeval as *const c_void, std::mem::size_of::() as libc::socklen_t, ); } } // -- Main daemon event loop --------------------------------------------------- const MAX_LISTENERS: usize = 3; #[no_mangle] pub unsafe extern "C" fn daemon_run( config: *mut DaemonConfig, manifest: *mut c_void, sockets: *mut MorlocSocket, n_pools: usize, shm_basename: *const c_char, ) { // Set globals G_POOL_ALIVE_FN = (*config).pool_alive_fn; G_N_POOLS = n_pools; let timeout = if (*config).eval_timeout > 0 { (*config).eval_timeout } else { 30 }; G_EVAL_TIMEOUT.store(timeout, Ordering::Relaxed); // Initialize binding store if G_BINDING_STORE.is_null() { let store = Box::new(BindingStore::new("/tmp/morloc-bindings")); G_BINDING_STORE = Box::into_raw(store); } // Install signal handlers SHUTDOWN_REQUESTED.store(false, Ordering::Relaxed); let handler: libc::sighandler_t = std::mem::transmute::(daemon_signal_handler_fn); libc::signal(libc::SIGTERM, handler); libc::signal(libc::SIGINT, handler); let mut fds = [libc::pollfd { fd: -1, events: 0, revents: 0, }; MAX_LISTENERS]; let mut fd_types = [0i32; MAX_LISTENERS]; // 0=unix, 1=tcp, 2=http let mut nfds: usize = 0; // Unix socket if !(*config).unix_socket_path.is_null() { let sock_fd = libc::socket(libc::AF_UNIX, libc::SOCK_STREAM, 0); if sock_fd < 0 { eprintln!("daemon: failed to create unix socket"); return; } let mut addr: libc::sockaddr_un = std::mem::zeroed(); addr.sun_family = libc::AF_UNIX as libc::sa_family_t; let path_bytes = CStr::from_ptr((*config).unix_socket_path).to_bytes(); let copy_len = path_bytes.len().min(addr.sun_path.len() - 1); ptr::copy_nonoverlapping( path_bytes.as_ptr() as *const c_char, addr.sun_path.as_mut_ptr(), copy_len, ); libc::unlink((*config).unix_socket_path); if libc::bind( sock_fd, &addr as *const libc::sockaddr_un as *const libc::sockaddr, std::mem::size_of::() as libc::socklen_t, ) < 0 { eprintln!("daemon: failed to bind unix socket"); libc::close(sock_fd); return; } libc::listen(sock_fd, 64); fds[nfds].fd = sock_fd; fds[nfds].events = libc::POLLIN as i16; fd_types[nfds] = 0; nfds += 1; } // TCP if (*config).tcp_port > 0 { let tcp_fd = libc::socket(libc::AF_INET, libc::SOCK_STREAM, 0); if tcp_fd < 0 { eprintln!("daemon: failed to create tcp socket"); return; } let opt: i32 = 1; libc::setsockopt( tcp_fd, libc::SOL_SOCKET, libc::SO_REUSEADDR, &opt as *const i32 as *const c_void, std::mem::size_of::() as libc::socklen_t, ); let mut addr: libc::sockaddr_in = std::mem::zeroed(); addr.sin_family = libc::AF_INET as libc::sa_family_t; addr.sin_addr.s_addr = u32::from_be(0x7f000001); // INADDR_LOOPBACK addr.sin_port = ((*config).tcp_port as u16).to_be(); if libc::bind( tcp_fd, &addr as *const libc::sockaddr_in as *const libc::sockaddr, std::mem::size_of::() as libc::socklen_t, ) < 0 { eprintln!( "daemon: failed to bind tcp port {}", (*config).tcp_port ); libc::close(tcp_fd); return; } libc::listen(tcp_fd, 64); fds[nfds].fd = tcp_fd; fds[nfds].events = libc::POLLIN as i16; fd_types[nfds] = 1; nfds += 1; } // HTTP if (*config).http_port > 0 { let http_fd = libc::socket(libc::AF_INET, libc::SOCK_STREAM, 0); if http_fd < 0 { eprintln!("daemon: failed to create http socket"); return; } let opt: i32 = 1; libc::setsockopt( http_fd, libc::SOL_SOCKET, libc::SO_REUSEADDR, &opt as *const i32 as *const c_void, std::mem::size_of::() as libc::socklen_t, ); let mut addr: libc::sockaddr_in = std::mem::zeroed(); addr.sin_family = libc::AF_INET as libc::sa_family_t; // HTTP router is externally reachable; bind to all interfaces so that // container port mappings (docker -p) can reach it. addr.sin_addr.s_addr = libc::INADDR_ANY.to_be(); addr.sin_port = ((*config).http_port as u16).to_be(); if libc::bind( http_fd, &addr as *const libc::sockaddr_in as *const libc::sockaddr, std::mem::size_of::() as libc::socklen_t, ) < 0 { eprintln!( "daemon: failed to bind http port {}", (*config).http_port ); libc::close(http_fd); return; } libc::listen(http_fd, 64); fds[nfds].fd = http_fd; fds[nfds].events = libc::POLLIN as i16; fd_types[nfds] = 2; nfds += 1; } if nfds == 0 { eprintln!("daemon: no listeners configured, exiting"); return; } // Start worker thread pool let ctx = Arc::new(WorkerContext { queue: Mutex::new(JobQueue { jobs: VecDeque::new(), }), cond: Condvar::new(), manifest, sockets, shm_basename, }); let n_workers = n_pools.saturating_add(4).clamp(4, 32); let mut workers = Vec::with_capacity(n_workers); for _ in 0..n_workers { let ctx = Arc::clone(&ctx); workers.push(std::thread::spawn(move || { daemon_worker_fn(ctx); })); } // Main event loop while !SHUTDOWN_REQUESTED.load(Ordering::Relaxed) { let ready = libc::poll(fds.as_mut_ptr(), nfds as libc::nfds_t, 1000); if ready < 0 { if crate::utility::errno_val() == libc::EINTR { continue; } eprintln!("daemon: poll error"); break; } // Check and restart crashed pools if let Some(check_fn) = (*config).pool_check_fn { check_fn(sockets, n_pools); } if ready == 0 { continue; } for i in 0..nfds { if fds[i].revents & libc::POLLIN as i16 == 0 { continue; } let client_fd = libc::accept(fds[i].fd, ptr::null_mut(), ptr::null_mut()); if client_fd < 0 { if crate::utility::errno_val() == libc::EINTR || crate::utility::errno_val() == libc::EAGAIN { continue; } eprintln!("daemon: accept error"); continue; } crate::utility::set_nosigpipe(client_fd); set_socket_timeouts(client_fd, 30); let job = DaemonJob { client_fd, conn_type: fd_types[i], }; let mut q = ctx.queue.lock().unwrap(); q.jobs.push_back(job); ctx.cond.notify_one(); } } // Wake all workers and join ctx.cond.notify_all(); for w in workers { let _ = w.join(); } // Drain remaining jobs { let mut q = ctx.queue.lock().unwrap(); while let Some(job) = q.jobs.pop_front() { libc::close(job.client_fd); } } // Close listener sockets for i in 0..nfds { libc::close(fds[i].fd); } if !(*config).unix_socket_path.is_null() { libc::unlink((*config).unix_socket_path); } } fn daemon_worker_fn(ctx: Arc) { loop { if SHUTDOWN_REQUESTED.load(Ordering::Relaxed) { break; } let job = { let mut q = ctx.queue.lock().unwrap(); loop { if let Some(job) = q.jobs.pop_front() { break Some(job); } if SHUTDOWN_REQUESTED.load(Ordering::Relaxed) { break None; } // Wait with timeout so we recheck shutdown let (guard, _timeout) = ctx .cond .wait_timeout(q, std::time::Duration::from_millis(100)) .unwrap(); q = guard; } }; let job = match job { Some(j) => j, None => continue, }; unsafe { if job.conn_type == 2 { handle_http_connection( job.client_fd, ctx.manifest, ctx.sockets, ctx.shm_basename, ); } else { handle_lp_connection( job.client_fd, ctx.manifest, ctx.sockets, ctx.shm_basename, ); } } } } // Signal handler (must be async-signal-safe) extern "C" fn daemon_signal_handler_fn(_sig: i32) { SHUTDOWN_REQUESTED.store(true, Ordering::Relaxed); } ================================================ FILE: data/rust/morloc-runtime/src/error.rs ================================================ use std::ffi::{CString, c_char}; #[derive(Debug, thiserror::Error)] pub enum MorlocError { #[error("shared memory error: {0}")] Shm(String), #[error("packet error: {0}")] Packet(String), #[error("schema error: {0}")] Schema(String), #[error("serialization error: {0}")] Serialization(String), #[error("IPC error: {0}")] Ipc(String), #[error("IO error: {0}")] Io(#[from] std::io::Error), #[error("null pointer")] NullPointer, #[error("{0}")] Other(String), } /// Write a MorlocError into the C ERRMSG convention. /// /// # Safety /// `errmsg` must be a valid, non-dangling pointer to a `*mut c_char` (i.e., `char**`). /// The caller is responsible for freeing the allocated string via `CString::from_raw` /// or `libc::free`. pub unsafe fn set_errmsg(errmsg: *mut *mut c_char, err: &MorlocError) { if !errmsg.is_null() { if let Ok(cstr) = CString::new(err.to_string()) { *errmsg = cstr.into_raw(); } } } /// Clear the ERRMSG pointer (must be called at FFI entry). /// /// # Safety /// `errmsg` must be a valid pointer to a `*mut c_char`. pub unsafe fn clear_errmsg(errmsg: *mut *mut c_char) { if !errmsg.is_null() { *errmsg = std::ptr::null_mut(); } } ================================================ FILE: data/rust/morloc-runtime/src/eval_ffi.rs ================================================ //! Expression evaluator and constructor functions. //! Replaces eval.c. Uses HashMap instead of linked-list dict_t. use std::collections::HashMap; use std::ffi::{c_char, c_void, CStr}; use std::ptr; use crate::cschema::CSchema; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; use crate::manifest_ffi::*; use crate::shm::{self, AbsPtr, RelPtr}; // ── Constructor functions (called by manifest_ffi.rs and daemon.c) ─────────── #[no_mangle] pub unsafe extern "C" fn make_morloc_bound_var( schema_str: *const c_char, varname: *mut c_char, errmsg: *mut *mut c_char, ) -> *mut MorlocExpression { clear_errmsg(errmsg); let mut err: *mut c_char = ptr::null_mut(); let schema = crate::ffi::parse_schema(schema_str, &mut err); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } let expr = libc::calloc(1, std::mem::size_of::()) as *mut MorlocExpression; if expr.is_null() { set_errmsg(errmsg, &MorlocError::Other("Failed to allocate bound variable expression".into())); return ptr::null_mut(); } (*expr).etype = MorlocExpressionType::Bnd; (*expr).schema = schema; (*expr).expr.bnd_expr = varname; expr } #[no_mangle] pub unsafe extern "C" fn make_morloc_literal( schema_str: *const c_char, lit: Primitive, errmsg: *mut *mut c_char, ) -> *mut MorlocExpression { clear_errmsg(errmsg); let mut err: *mut c_char = ptr::null_mut(); let schema = crate::ffi::parse_schema(schema_str, &mut err); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } let data = libc::malloc(std::mem::size_of::()) as *mut MorlocData; if data.is_null() { set_errmsg(errmsg, &MorlocError::Other("Failed to allocate literal data".into())); return ptr::null_mut(); } (*data).is_voidstar = false; (*data).data = DataUnion { lit_val: std::mem::ManuallyDrop::new(lit) }; let expr = libc::malloc(std::mem::size_of::()) as *mut MorlocExpression; if expr.is_null() { libc::free(data as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("Failed to allocate literal expression".into())); return ptr::null_mut(); } (*expr).etype = MorlocExpressionType::Dat; (*expr).schema = schema; (*expr).expr.data_expr = data; expr } #[no_mangle] pub unsafe extern "C" fn make_morloc_pattern( schema_str: *const c_char, pattern: *mut MorlocPattern, errmsg: *mut *mut c_char, ) -> *mut MorlocExpression { clear_errmsg(errmsg); let mut err: *mut c_char = ptr::null_mut(); let schema = crate::ffi::parse_schema(schema_str, &mut err); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } let expr = libc::calloc(1, std::mem::size_of::()) as *mut MorlocExpression; if expr.is_null() { set_errmsg(errmsg, &MorlocError::Other("Failed to allocate pattern expression".into())); return ptr::null_mut(); } (*expr).etype = MorlocExpressionType::Pat; (*expr).schema = schema; (*expr).expr.pattern_expr = pattern; expr } #[no_mangle] pub extern "C" fn make_morloc_pattern_end() -> *mut MorlocPattern { // SAFETY: calloc returns zeroed memory suitable for MorlocPattern. // We initialize all fields before returning. unsafe { let pat = libc::calloc(1, std::mem::size_of::()) as *mut MorlocPattern; if pat.is_null() { return ptr::null_mut(); } (*pat).ptype = MorlocPatternType::End; (*pat).size = 0; (*pat).fields = PatternFields { indices: ptr::null_mut() }; (*pat).selectors = ptr::null_mut(); pat } } // Varargs constructors (make_morloc_container, make_morloc_app, // make_morloc_lambda, make_morloc_interpolation, make_morloc_pattern_idx, // make_morloc_pattern_key) are only used by generated C++ pool code. // They cannot be implemented in stable Rust due to C-variadic ABI. // The C-gcc build path (morloc init) provides them from the original eval.c. // The Rust hybrid build does not call them (only morloc_eval and the // non-varargs constructors are needed). // ── Core evaluator ─────────────────────────────────────────────────────────── type BndVars<'a> = HashMap<&'a str, AbsPtr>; /// Convert key-based pattern selectors to index-based using the schema's key names. /// /// # Safety /// `pattern` and `schema` must be valid, non-null pointers to C-allocated structures. /// `schema` keys array must have `schema.size` entries. unsafe fn convert_keys_to_indices( pattern: *mut MorlocPattern, schema: *const CSchema, ) -> Result<(), MorlocError> { let pat = &mut *pattern; let n_params = (*schema).size; if n_params > 1 { for i in 0..pat.size { let child_schema = *(*schema).parameters.add(i); convert_keys_to_indices(*pat.selectors.add(i), child_schema)?; } } if pat.ptype == MorlocPatternType::ByKey { let indices = libc::calloc(n_params, std::mem::size_of::()) as *mut usize; for i in 0..pat.size { let key = CStr::from_ptr(*pat.fields.keys.add(i)).to_str().unwrap_or(""); let mut found = false; for j in 0..n_params { let record_key = CStr::from_ptr(*(*schema).keys.add(j)).to_str().unwrap_or(""); if key == record_key { found = true; *indices.add(i) = j; break; } } if !found { libc::free(indices as *mut c_void); return Err(MorlocError::Other(format!("Pattern contains key missing in schema: {}", key))); } libc::free(*pat.fields.keys.add(i) as *mut c_void); } pat.ptype = MorlocPatternType::ByIndex; libc::free(pat.fields.keys as *mut c_void); pat.fields.indices = indices; } Ok(()) } /// Extract fields from a voidstar value using a pattern, copying them into dest. /// /// # Safety /// All pointer arguments must be valid and point to correctly-typed C structures. /// `value` must point to voidstar data matching `value_schema`. unsafe fn apply_getter( dest: AbsPtr, return_index: &mut usize, return_schema: *const CSchema, pattern: *mut MorlocPattern, value_schema: *const CSchema, value: AbsPtr, ) -> Result { let pat = &*pattern; match pat.ptype { MorlocPatternType::ByIndex => { for i in 0..pat.size { let idx = *pat.fields.indices.add(i); apply_getter( dest, return_index, return_schema, *pat.selectors.add(i), *(*value_schema).parameters.add(idx), value.add(*(*value_schema).offsets.add(idx)), )?; } } MorlocPatternType::ByKey => { convert_keys_to_indices(pattern, value_schema)?; return apply_getter(dest, return_index, return_schema, pattern, value_schema, value); } MorlocPatternType::End => { let (element_dest, element_width) = if (*return_schema).size > 1 { (dest.add(*(*return_schema).offsets.add(*return_index)), (*(*(*return_schema).parameters.add(*return_index))).width) } else { (dest, (*return_schema).width) }; *return_index += 1; ptr::copy_nonoverlapping(value, element_dest, element_width); } } Ok(dest) } /// Copy value into dest, preserving fields not selected by pattern. /// /// # Safety /// All pointer arguments must be valid. Schema sizes must match. unsafe fn apply_setter_copy( dest: AbsPtr, return_schema: *const CSchema, pattern: *mut MorlocPattern, value_schema: *const CSchema, value: AbsPtr, ) -> Result<(), MorlocError> { let pat = &*pattern; match pat.ptype { MorlocPatternType::ByKey => { convert_keys_to_indices(pattern, value_schema)?; return apply_setter_copy(dest, return_schema, pattern, value_schema, value); } MorlocPatternType::ByIndex => { if (*value_schema).size != (*return_schema).size { return Err(MorlocError::Other("Expected setter return and input sizes to be the same".into())); } for i in 0..(*value_schema).size { let new_dest = dest.add(*(*return_schema).offsets.add(i)); let new_value = value.add(*(*value_schema).offsets.add(i)); let mut changed = false; for j in 0..pat.size { if i == *pat.fields.indices.add(j) { apply_setter_copy( new_dest, *(*return_schema).parameters.add(i), *pat.selectors.add(j), *(*value_schema).parameters.add(i), new_value, )?; changed = true; break; } } if !changed { let w = (*(*(*value_schema).parameters.add(i))).width; ptr::copy_nonoverlapping(new_value, new_dest, w); } } } MorlocPatternType::End => {} } Ok(()) } /// Overwrite pattern-selected fields in dest with provided set_values. /// /// # Safety /// All pointer arguments must be valid. set_values must have enough entries. unsafe fn apply_setter_set( dest: AbsPtr, return_schema: *const CSchema, pattern: *mut MorlocPattern, value_schema: *const CSchema, value: AbsPtr, set_schemas: *mut *mut CSchema, set_values: *mut AbsPtr, set_idx: &mut usize, ) -> Result<(), MorlocError> { let pat = &*pattern; match pat.ptype { MorlocPatternType::ByIndex => { for pi in 0..pat.size { let di = *pat.fields.indices.add(pi); apply_setter_set( dest.add(*(*return_schema).offsets.add(di)), *(*return_schema).parameters.add(di), *pat.selectors.add(pi), *(*value_schema).parameters.add(di), value.add(*(*value_schema).offsets.add(di)), set_schemas, set_values, set_idx, )?; } } MorlocPatternType::End => { ptr::copy_nonoverlapping(*set_values.add(*set_idx), dest, (*return_schema).width); *set_idx += 1; } MorlocPatternType::ByKey => { return Err(MorlocError::Other("Key patterns should have been resolved in copy step".into())); } } Ok(()) } /// Recursively evaluate a morloc expression, writing results into SHM. /// /// # Safety /// `expr` must be a valid MorlocExpression pointer (or null for error). /// If `dest` is non-null, it must point to `width` bytes of writable SHM. unsafe fn morloc_eval_r( expr: *mut MorlocExpression, dest: AbsPtr, width: usize, bndvars: &mut BndVars, ) -> Result { if expr.is_null() { return Err(MorlocError::Other("Empty expression".into())); } let schema = (*expr).schema; let (dest, width) = if dest.is_null() { let w = (*schema).width; let d = shm::shcalloc(1, w)?; (d, w) } else { if width != (*schema).width { return Err(MorlocError::Other("Unexpected data size".into())); } (dest, width) }; match (*expr).etype { MorlocExpressionType::Dat => { let data = (*expr).expr.data_expr; if (*data).is_voidstar { return Ok((*data).data.voidstar as AbsPtr); } let stype = (*schema).serial_type; if stype == crate::schema::SerialType::String as u32 { // String: allocate in SHM let s = std::mem::ManuallyDrop::into_inner(ptr::read(&(*data).data.lit_val)).s; let str_size = if s.is_null() { 0 } else { libc::strlen(s) }; let str_relptr: RelPtr = if str_size > 0 { let abs = shm::shmemcpy(s as *const u8, str_size)?; shm::abs2rel(abs)? } else { -1isize as RelPtr }; let arr = shm::Array { size: str_size, data: str_relptr }; ptr::copy_nonoverlapping(&arr as *const shm::Array as *const u8, dest, width); } else if stype == crate::schema::SerialType::Array as u32 { let arr_data = (*data).data.array_val; let arr_size = (*arr_data).size; let elem_schema = (*arr_data).schema; let elem_width = (*elem_schema).width; let arr_reldata: RelPtr = if arr_size > 0 { let arr_abs = shm::shcalloc(arr_size, elem_width)?; for i in 0..arr_size { morloc_eval_r( *(*arr_data).values.add(i), arr_abs.add(i * elem_width), elem_width, bndvars, )?; } shm::abs2rel(arr_abs)? } else { -1isize as RelPtr }; let arr = shm::Array { size: arr_size, data: arr_reldata }; ptr::copy_nonoverlapping(&arr as *const shm::Array as *const u8, dest, width); } else if stype == crate::schema::SerialType::Tuple as u32 || stype == crate::schema::SerialType::Map as u32 { for i in 0..(*schema).size { let elem_width = (*(*(*schema).parameters.add(i))).width; let elem_dest = dest.add(*(*schema).offsets.add(i)); let element = *(*data).data.tuple_val.add(i); morloc_eval_r(element, elem_dest, elem_width, bndvars)?; } } else { // All primitives: just copy width bytes from the union ptr::copy_nonoverlapping( &(*data).data as *const DataUnion as *const u8, dest, width, ); } } MorlocExpressionType::App => { let app = (*expr).expr.app_expr; let nargs = (*app).nargs; // Evaluate all arguments let mut arg_results: Vec = Vec::with_capacity(nargs); for i in 0..nargs { let r = morloc_eval_r(*(*app).args.add(i), ptr::null_mut(), 0, bndvars)?; arg_results.push(r); } match (*app).atype { MorlocAppExpressionType::Pattern => { if nargs == 1 { let mut return_index: usize = 0; apply_getter( dest, &mut return_index, schema, (*app).function.pattern, (*(*(*app).args)).schema, arg_results[0], )?; } else if nargs > 1 { // Setter: first arg is the value, rest are set values let mut set_schemas: Vec<*mut CSchema> = Vec::with_capacity(nargs - 1); for i in 1..nargs { set_schemas.push((*(*(*app).args.add(i))).schema); } apply_setter_copy( dest, schema, (*app).function.pattern, (*(*(*app).args)).schema, arg_results[0], )?; let mut set_idx: usize = 0; apply_setter_set( dest, schema, (*app).function.pattern, (*(*(*app).args)).schema, arg_results[0], set_schemas.as_mut_ptr(), arg_results[1..].as_ptr() as *mut AbsPtr, &mut set_idx, )?; } else { return Err(MorlocError::Other("No arguments provided to pattern".into())); } } MorlocAppExpressionType::Lambda => { let lam = (*app).function.lambda; // Bind arguments for i in 0..nargs { let var = CStr::from_ptr(*(*lam).args.add(i)).to_str().unwrap_or(""); bndvars.insert(var, arg_results[i]); } morloc_eval_r((*lam).body, dest, width, bndvars)?; // Clean up bindings for i in 0..nargs { let var = CStr::from_ptr(*(*lam).args.add(i)).to_str().unwrap_or(""); bndvars.remove(var); } } MorlocAppExpressionType::Format => { let strings = (*app).function.fmt; let mut result_size: usize = 0; let mut string_lengths: Vec = Vec::with_capacity(nargs + 1); for i in 0..=nargs { let len = libc::strlen(*strings.add(i)); string_lengths.push(len); result_size += len; } for i in 0..nargs { let arr = &*(arg_results[i] as *const shm::Array); result_size += arr.size; } let new_string = shm::shmalloc(result_size)?; let result_array = &mut *(dest as *mut shm::Array); result_array.size = result_size; result_array.data = shm::abs2rel(new_string)?; let mut cursor = new_string; for i in 0..=nargs { ptr::copy_nonoverlapping(*strings.add(i) as *const u8, cursor, string_lengths[i]); cursor = cursor.add(string_lengths[i]); if i < nargs { let arr = &*(arg_results[i] as *const shm::Array); if arr.size > 0 { let arr_data = shm::rel2abs(arr.data)?; ptr::copy_nonoverlapping(arr_data, cursor, arr.size); cursor = cursor.add(arr.size); } } } } } } MorlocExpressionType::Bnd => { let varname = CStr::from_ptr((*expr).expr.bnd_expr).to_str().unwrap_or(""); let bnd_value = bndvars.get(varname).copied() .ok_or_else(|| MorlocError::Other(format!("Unbound variable {}", varname)))?; ptr::copy_nonoverlapping(bnd_value, dest, (*schema).width); } MorlocExpressionType::Show => { // Serialize child to JSON string let child = (*expr).expr.unary_expr; let child_schema = (*child).schema; let child_result = morloc_eval_r(child, ptr::null_mut(), 0, bndvars)?; extern "C" { fn voidstar_to_json_string(data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char) -> *mut c_char; } let mut err: *mut c_char = ptr::null_mut(); let json = voidstar_to_json_string(child_result as *const c_void, child_schema, &mut err); if !err.is_null() { let msg = CStr::from_ptr(err).to_string_lossy().into_owned(); libc::free(err as *mut c_void); return Err(MorlocError::Other(msg)); } let json_len = libc::strlen(json); let str_relptr: RelPtr = if json_len > 0 { let abs = shm::shmemcpy(json as *const u8, json_len)?; libc::free(json as *mut c_void); shm::abs2rel(abs)? } else { libc::free(json as *mut c_void); -1isize as RelPtr }; let arr = shm::Array { size: json_len, data: str_relptr }; ptr::copy_nonoverlapping(&arr as *const shm::Array as *const u8, dest, width); } MorlocExpressionType::Read => { // Deserialize JSON string to typed data, return optional let child = (*expr).expr.unary_expr; let child_result = morloc_eval_r(child, ptr::null_mut(), 0, bndvars)?; let str_arr = &*(child_result as *const shm::Array); let opt_dest = dest; let inner_schema = *(*schema).parameters; if str_arr.size > 0 { let str_abs = shm::rel2abs(str_arr.data)?; let json_str = libc::malloc(str_arr.size + 1) as *mut c_char; if json_str.is_null() { return Err(MorlocError::Other("Failed to allocate for @read".into())); } ptr::copy_nonoverlapping(str_abs, json_str as *mut u8, str_arr.size); *json_str.add(str_arr.size) = 0; extern "C" { fn read_json_with_schema(dest: *mut u8, json: *mut c_char, schema: *const CSchema, errmsg: *mut *mut c_char) -> *mut u8; } let inner_offset = *(*schema).offsets; let mut parse_err: *mut c_char = ptr::null_mut(); let parsed = read_json_with_schema(opt_dest.add(inner_offset), json_str, inner_schema, &mut parse_err); libc::free(json_str as *mut c_void); if !parse_err.is_null() { libc::free(parse_err as *mut c_void); *opt_dest = 0; // None } else { *opt_dest = if parsed.is_null() { 0 } else { 1 }; } } else { *opt_dest = 0; // None } } MorlocExpressionType::Hash => { // Hash child data and return hex string let child = (*expr).expr.unary_expr; let child_schema = (*child).schema; let child_result = morloc_eval_r(child, ptr::null_mut(), 0, bndvars)?; extern "C" { fn mlc_hash(data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char) -> *mut c_char; } let mut err: *mut c_char = ptr::null_mut(); let hex = mlc_hash(child_result as *const c_void, child_schema, &mut err); if !err.is_null() { let msg = CStr::from_ptr(err).to_string_lossy().into_owned(); libc::free(err as *mut c_void); return Err(MorlocError::Other(msg)); } let hex_len = libc::strlen(hex); let str_relptr: RelPtr = if hex_len > 0 { let abs = shm::shmemcpy(hex as *const u8, hex_len)?; libc::free(hex as *mut c_void); shm::abs2rel(abs)? } else { libc::free(hex as *mut c_void); -1isize as RelPtr }; let arr = shm::Array { size: hex_len, data: str_relptr }; ptr::copy_nonoverlapping(&arr as *const shm::Array as *const u8, dest, width); } MorlocExpressionType::Save => { // Save value to file at path let save = (*expr).expr.save_expr; let value_expr = (*save).value; let path_expr = (*save).path; let fmt = CStr::from_ptr((*save).format).to_str().unwrap_or("voidstar"); let value_schema = (*value_expr).schema; let value_result = morloc_eval_r(value_expr, ptr::null_mut(), 0, bndvars)?; let path_result = morloc_eval_r(path_expr, ptr::null_mut(), 0, bndvars)?; // Extract path string from voidstar Array let path_arr = &*(path_result as *const shm::Array); let path_abs = shm::rel2abs(path_arr.data)?; let path_cstr = libc::malloc(path_arr.size + 1) as *mut c_char; if path_cstr.is_null() { return Err(MorlocError::Other("Failed to allocate for @save path".into())); } ptr::copy_nonoverlapping(path_abs, path_cstr as *mut u8, path_arr.size); *path_cstr.add(path_arr.size) = 0; extern "C" { fn mlc_save(data: *const c_void, schema: *const CSchema, path: *const c_char, errmsg: *mut *mut c_char) -> i32; fn mlc_save_json(data: *const c_void, schema: *const CSchema, path: *const c_char, errmsg: *mut *mut c_char) -> i32; fn mlc_save_voidstar(data: *const c_void, schema: *const CSchema, path: *const c_char, errmsg: *mut *mut c_char) -> i32; } let mut err: *mut c_char = ptr::null_mut(); let rc = match fmt { "json" => mlc_save_json(value_result as *const c_void, value_schema, path_cstr, &mut err), "msgpack" => mlc_save(value_result as *const c_void, value_schema, path_cstr, &mut err), _ => mlc_save_voidstar(value_result as *const c_void, value_schema, path_cstr, &mut err), }; libc::free(path_cstr as *mut c_void); if rc != 0 && !err.is_null() { let msg = CStr::from_ptr(err).to_string_lossy().into_owned(); libc::free(err as *mut c_void); return Err(MorlocError::Other(msg)); } // Return unit (zero-fill dest) ptr::write_bytes(dest, 0, width); } MorlocExpressionType::Load => { // Load data from file, return optional let child = (*expr).expr.unary_expr; let child_result = morloc_eval_r(child, ptr::null_mut(), 0, bndvars)?; // Extract path string from voidstar Array let path_arr = &*(child_result as *const shm::Array); let path_abs = shm::rel2abs(path_arr.data)?; let path_cstr = libc::malloc(path_arr.size + 1) as *mut c_char; if path_cstr.is_null() { return Err(MorlocError::Other("Failed to allocate for @load path".into())); } ptr::copy_nonoverlapping(path_abs, path_cstr as *mut u8, path_arr.size); *path_cstr.add(path_arr.size) = 0; extern "C" { fn mlc_load(path: *const c_char, schema: *const CSchema, errmsg: *mut *mut c_char) -> *mut c_void; } let opt_dest = dest; let inner_schema = *(*schema).parameters; let inner_offset = *(*schema).offsets; let mut err: *mut c_char = ptr::null_mut(); let loaded = mlc_load(path_cstr, inner_schema, &mut err); libc::free(path_cstr as *mut c_void); if loaded.is_null() { if !err.is_null() { libc::free(err as *mut c_void); } *opt_dest = 0; // None } else { // Copy loaded voidstar data into the optional's inner slot let inner_width = (*inner_schema).width; ptr::copy_nonoverlapping(loaded as *const u8, opt_dest.add(inner_offset), inner_width); libc::free(loaded as *mut c_void); *opt_dest = 1; // Some } } _ => { return Err(MorlocError::Other("Illegal top expression".into())); } } Ok(dest) } // ── Public entry point ────���─────────────────────────────────��──────────────── #[no_mangle] pub unsafe extern "C" fn morloc_eval( expr: *mut MorlocExpression, return_schema: *mut CSchema, arg_voidstar: *mut *mut u8, arg_schemas: *mut *mut CSchema, nargs: usize, errmsg: *mut *mut c_char, ) -> AbsPtr { clear_errmsg(errmsg); let mut bndvars: BndVars = HashMap::new(); let new_expr: *mut MorlocExpression; let mut allocated_wrappers: Vec<*mut c_void> = Vec::new(); let eval_expr = match (*expr).etype { MorlocExpressionType::Lam | MorlocExpressionType::Pat => { // Wrap CLI args as voidstar data expressions and apply let arg_exprs = libc::calloc(nargs, std::mem::size_of::<*mut MorlocExpression>()) as *mut *mut MorlocExpression; allocated_wrappers.push(arg_exprs as *mut c_void); for i in 0..nargs { let ae = libc::calloc(1, std::mem::size_of::()) as *mut MorlocExpression; (*ae).etype = MorlocExpressionType::Dat; (*ae).schema = *arg_schemas.add(i); let ad = libc::calloc(1, std::mem::size_of::()) as *mut MorlocData; (*ad).is_voidstar = true; (*ad).data.voidstar = *arg_voidstar.add(i) as *mut c_void; (*ae).expr.data_expr = ad; *arg_exprs.add(i) = ae; allocated_wrappers.push(ad as *mut c_void); allocated_wrappers.push(ae as *mut c_void); } let app = libc::calloc(1, std::mem::size_of::()) as *mut MorlocAppExpression; (*app).atype = if (*expr).etype == MorlocExpressionType::Lam { (*app).function.lambda = (*expr).expr.lam_expr; MorlocAppExpressionType::Lambda } else { (*app).function.pattern = (*expr).expr.pattern_expr; MorlocAppExpressionType::Pattern }; (*app).args = arg_exprs; (*app).nargs = nargs; allocated_wrappers.push(app as *mut c_void); new_expr = libc::calloc(1, std::mem::size_of::()) as *mut MorlocExpression; (*new_expr).etype = MorlocExpressionType::App; (*new_expr).schema = return_schema; (*new_expr).expr.app_expr = app; allocated_wrappers.push(new_expr as *mut c_void); new_expr } _ => expr, }; let result = morloc_eval_r(eval_expr, ptr::null_mut(), 0, &mut bndvars); // Free wrapper nodes for p in &allocated_wrappers { libc::free(*p); } match result { Ok(ptr) => ptr, Err(e) => { set_errmsg(errmsg, &e); ptr::null_mut() } } } ================================================ FILE: data/rust/morloc-runtime/src/ffi.rs ================================================ //! C ABI wrappers for libmorloc.so //! //! These `extern "C"` functions match the signatures in morloc.h. //! Internally they call the Rust implementations and convert between //! Rust Result and the C ERRMSG convention (char** last arg). use std::ffi::{c_char, c_void, CStr, CString}; use std::ptr; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; use crate::schema::{self}; use crate::shm::{self, AbsPtr, BlockHeader, RelPtr, ShmHeader, VolPtr}; pub use crate::cschema::CSchema; // ── Macro for ERRMSG-pattern FFI wrappers ────────────────────────────────── /// Wrap a Rust Result-returning expression into the C ERRMSG convention. /// On success: clears errmsg, returns the value. /// On error: sets errmsg, returns $fail. macro_rules! ffi_try { ($errmsg:expr, $fail:expr, $body:expr) => {{ unsafe { clear_errmsg($errmsg) }; match $body { Ok(val) => val, Err(e) => { unsafe { set_errmsg($errmsg, &e) }; $fail } } }}; } // CSchema type and conversions are in cschema.rs (always compiled). // ── SHM functions ────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn shinit( shm_basename: *const c_char, volume_index: usize, shm_size: usize, errmsg: *mut *mut c_char, ) -> *mut ShmHeader { let basename = CStr::from_ptr(shm_basename).to_string_lossy(); ffi_try!(errmsg, ptr::null_mut(), shm::shinit(&basename, volume_index, shm_size)) } #[no_mangle] pub unsafe extern "C" fn shopen( volume_index: usize, errmsg: *mut *mut c_char, ) -> *mut ShmHeader { ffi_try!( errmsg, ptr::null_mut(), shm::shopen(volume_index).and_then(|opt| opt.ok_or(MorlocError::Shm("volume not found".into()))) ) } #[no_mangle] pub unsafe extern "C" fn shclose(errmsg: *mut *mut c_char) -> bool { ffi_try!(errmsg, false, shm::shclose().map(|_| true)) } #[no_mangle] pub unsafe extern "C" fn shm_set_fallback_dir(dir: *const c_char) { if !dir.is_null() { let d = CStr::from_ptr(dir).to_string_lossy(); shm::shm_set_fallback_dir(&d); } } #[no_mangle] pub unsafe extern "C" fn shmalloc(size: usize, errmsg: *mut *mut c_char) -> *mut c_void { ffi_try!(errmsg, ptr::null_mut(), shm::shmalloc(size).map(|p| p as *mut c_void)) } #[no_mangle] pub unsafe extern "C" fn shmemcpy( src: *mut c_void, size: usize, errmsg: *mut *mut c_char, ) -> *mut c_void { ffi_try!( errmsg, ptr::null_mut(), shm::shmemcpy(src as *const u8, size).map(|p| p as *mut c_void) ) } #[no_mangle] pub unsafe extern "C" fn shcalloc( nmemb: usize, size: usize, errmsg: *mut *mut c_char, ) -> *mut c_void { ffi_try!(errmsg, ptr::null_mut(), shm::shcalloc(nmemb, size).map(|p| p as *mut c_void)) } #[no_mangle] pub unsafe extern "C" fn shrealloc( ptr: *mut c_void, size: usize, errmsg: *mut *mut c_char, ) -> *mut c_void { // TODO: implement shrealloc in shm.rs let _ = (ptr, size); set_errmsg(errmsg, &MorlocError::Shm("shrealloc not yet implemented".into())); ptr::null_mut() } #[no_mangle] pub unsafe extern "C" fn shfree(ptr: *mut c_void, errmsg: *mut *mut c_char) -> bool { ffi_try!(errmsg, false, shm::shfree(ptr as AbsPtr).map(|_| true)) } #[no_mangle] pub unsafe extern "C" fn shincref(ptr: *mut c_void, errmsg: *mut *mut c_char) -> bool { ffi_try!(errmsg, false, shm::shincref(ptr as AbsPtr).map(|_| true)) } // shfree_by_schema is provided by cli.c #[no_mangle] pub unsafe extern "C" fn total_shm_size() -> usize { shm::total_shm_size() } #[no_mangle] pub unsafe extern "C" fn rel2abs(ptr: RelPtr, errmsg: *mut *mut c_char) -> *mut c_void { ffi_try!(errmsg, ptr::null_mut(), shm::rel2abs(ptr).map(|p| p as *mut c_void)) } #[no_mangle] pub unsafe extern "C" fn abs2rel(ptr: *mut c_void, errmsg: *mut *mut c_char) -> RelPtr { ffi_try!(errmsg, shm::RELNULL, shm::abs2rel(ptr as AbsPtr)) } #[no_mangle] pub unsafe extern "C" fn abs2shm(ptr: *mut c_void, errmsg: *mut *mut c_char) -> *mut ShmHeader { ffi_try!(errmsg, ptr::null_mut(), shm::abs2shm(ptr as AbsPtr)) } #[no_mangle] pub unsafe extern "C" fn abs2blk(ptr: *mut c_void, errmsg: *mut *mut c_char) -> *mut BlockHeader { clear_errmsg(errmsg); if ptr.is_null() { set_errmsg(errmsg, &MorlocError::NullPointer); return ptr::null_mut(); } let blk = (ptr as *mut u8).sub(std::mem::size_of::()) as *mut BlockHeader; if (*blk).magic != shm::BLK_MAGIC { set_errmsg(errmsg, &MorlocError::Shm("Bad block magic".into())); return ptr::null_mut(); } blk } #[no_mangle] pub unsafe extern "C" fn vol2rel(ptr: VolPtr, shm_ptr: *const ShmHeader) -> RelPtr { shm::vol2rel(ptr, &*shm_ptr) } #[no_mangle] pub unsafe extern "C" fn vol2abs(ptr: VolPtr, shm_ptr: *const ShmHeader) -> *mut c_void { shm::vol2abs(ptr, shm_ptr) as *mut c_void } // ── Schema functions ─────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn parse_schema( schema_str: *const c_char, errmsg: *mut *mut c_char, ) -> *mut CSchema { clear_errmsg(errmsg); if schema_str.is_null() { set_errmsg(errmsg, &MorlocError::NullPointer); return ptr::null_mut(); } let s = CStr::from_ptr(schema_str).to_string_lossy(); match schema::parse_schema(&s) { Ok(schema) => CSchema::from_rust(&schema), Err(e) => { set_errmsg(errmsg, &e); ptr::null_mut() } } } #[no_mangle] pub unsafe extern "C" fn schema_to_string(schema: *const CSchema) -> *mut c_char { if schema.is_null() { return ptr::null_mut(); } let rs = CSchema::to_rust(schema); let s = schema::schema_to_string(&rs); match CString::new(s) { Ok(cs) => cs.into_raw(), Err(_) => ptr::null_mut(), } } #[no_mangle] pub unsafe extern "C" fn free_schema(schema: *mut CSchema) { CSchema::free(schema); } #[no_mangle] pub unsafe extern "C" fn schema_is_fixed_width(schema: *const CSchema) -> bool { if schema.is_null() { return true; } let rs = CSchema::to_rust(schema); rs.is_fixed_width() } #[no_mangle] pub unsafe extern "C" fn schema_alignment(schema: *const CSchema) -> usize { if schema.is_null() { return 1; } let rs = CSchema::to_rust(schema); rs.alignment() } // Hash: morloc_xxh64 is provided by utility.c (via xxhash.h inline) // ── Serialization ────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn pack_with_schema( mlc: *const c_void, schema: *const CSchema, mpkptr: *mut *mut c_char, mpk_size: *mut usize, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); *mpkptr = ptr::null_mut(); *mpk_size = 0; let rs = CSchema::to_rust(schema); match crate::mpack::pack_with_schema(mlc as AbsPtr, &rs) { Ok(data) => { *mpk_size = data.len(); let buf = libc::malloc(data.len()) as *mut u8; if buf.is_null() { set_errmsg(errmsg, &MorlocError::Shm("malloc failed".into())); return 1; } std::ptr::copy_nonoverlapping(data.as_ptr(), buf, data.len()); *mpkptr = buf as *mut c_char; 0 } Err(e) => { set_errmsg(errmsg, &e); 1 } } } #[no_mangle] pub unsafe extern "C" fn pack( mlc: *const c_void, schema_str: *const c_char, mpkptr: *mut *mut c_char, mpk_size: *mut usize, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); let s = CStr::from_ptr(schema_str).to_string_lossy(); let schema = match schema::parse_schema(&s) { Ok(s) => s, Err(e) => { set_errmsg(errmsg, &e); return 1; } }; let cs = CSchema::from_rust(&schema); let result = pack_with_schema(mlc, cs, mpkptr, mpk_size, errmsg); free_schema(cs); result } #[no_mangle] pub unsafe extern "C" fn unpack_with_schema( mpk: *const c_char, mpk_size: usize, schema: *const CSchema, mlcptr: *mut *mut c_void, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); *mlcptr = ptr::null_mut(); let data = std::slice::from_raw_parts(mpk as *const u8, mpk_size); let rs = CSchema::to_rust(schema); match crate::mpack::unpack_with_schema(data, &rs) { Ok(ptr) => { *mlcptr = ptr as *mut c_void; 0 } Err(e) => { set_errmsg(errmsg, &e); 1 } } } // quoted, print_voidstar, pretty_print_voidstar, read_json_with_schema // are provided by json.c // ── Schema utility functions needed by C code ────────────────────────────── #[no_mangle] pub unsafe extern "C" fn calculate_voidstar_size( data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> usize { clear_errmsg(errmsg); if data.is_null() || schema.is_null() { return 0; } let rs = CSchema::to_rust(schema); match calc_voidstar_size_inner(data as *const u8, &rs) { Ok(size) => size, Err(e) => { set_errmsg(errmsg, &e); 0 } } } pub fn calc_voidstar_size_inner( data: *const u8, schema: &crate::schema::Schema, ) -> Result { use crate::schema::SerialType; use crate::shm::{self, Array, Tensor}; // SAFETY: data points to voidstar data in SHM with layout described by schema. // We only read Array/Tensor headers and follow relptrs to compute total size. unsafe { match schema.serial_type { SerialType::String => { let arr = &*(data as *const Array); Ok(std::mem::size_of::() + arr.size) } SerialType::Array => { let arr = &*(data as *const Array); let mut size = std::mem::size_of::(); if arr.size == 0 { return Ok(size); } let elem_schema = &schema.parameters[0]; let elem_width = elem_schema.width; size += elem_schema.alignment().saturating_sub(1); if schema.is_fixed_width() { size += elem_width * arr.size; } else { let elem_data = shm::rel2abs(arr.data)?; for i in 0..arr.size { size += calc_voidstar_size_inner( elem_data.add(i * elem_width), elem_schema, )?; } } Ok(size) } SerialType::Optional => { let tag = *data; let mut size = schema.width; if tag != 0 { let inner_offset = schema.offsets.first().copied().unwrap_or( shm::align_up(1, schema.parameters[0].alignment().max(1)), ); let inner_total = calc_voidstar_size_inner( data.add(inner_offset), &schema.parameters[0], )?; if inner_total > schema.parameters[0].width { size += inner_total - schema.parameters[0].width; } } Ok(size) } SerialType::Tensor => { let tensor = &*(data as *const Tensor); let ndim = schema.offsets.first().copied().unwrap_or(0); let elem_width = schema.parameters[0].width; let mut size = std::mem::size_of::(); size += schema.parameters[0].alignment().saturating_sub(1); size += ndim * std::mem::size_of::(); size += schema.parameters[0].alignment().saturating_sub(1); size += tensor.total_elements * elem_width; Ok(size) } SerialType::Tuple | SerialType::Map => { if schema.is_fixed_width() { Ok(schema.width) } else { let mut size = schema.width; for i in 0..schema.parameters.len() { let elem_total = calc_voidstar_size_inner( data.add(schema.offsets[i]), &schema.parameters[i], )?; if elem_total > schema.parameters[i].width { size += elem_total - schema.parameters[i].width; } } Ok(size) } } _ => Ok(schema.width), } } } #[no_mangle] pub unsafe extern "C" fn get_ptr( schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut c_void { clear_errmsg(errmsg); if schema.is_null() { return ptr::null_mut(); } let rs = CSchema::to_rust(schema); ffi_try!(errmsg, ptr::null_mut(), shm::shmalloc(rs.width).map(|p| p as *mut c_void)) } ================================================ FILE: data/rust/morloc-runtime/src/hash.rs ================================================ //! xxHash64 wrapper using the twox-hash crate. //! Replaces the 1500-line xxhash.h header. use std::hash::Hasher; use twox_hash::XxHash64; const DEFAULT_SEED: u64 = 0; /// Compute xxHash64 of a byte slice with the default seed. pub fn xxh64(data: &[u8]) -> u64 { let mut hasher = XxHash64::with_seed(DEFAULT_SEED); hasher.write(data); hasher.finish() } /// Compute xxHash64 with a custom seed. pub fn xxh64_with_seed(data: &[u8], seed: u64) -> u64 { let mut hasher = XxHash64::with_seed(seed); hasher.write(data); hasher.finish() } /// Mix multiple hash values together (for composite keys). pub fn mix(a: u64, b: u64) -> u64 { // Use xxHash to mix two 64-bit values let mut hasher = XxHash64::with_seed(a); hasher.write(&b.to_le_bytes()); hasher.finish() } #[cfg(test)] mod tests { use super::*; #[test] fn test_xxh64_empty() { let h = xxh64(b""); assert_ne!(h, 0); // xxHash of empty with seed 0 is a known non-zero value } #[test] fn test_xxh64_deterministic() { let a = xxh64(b"hello"); let b = xxh64(b"hello"); assert_eq!(a, b); } #[test] fn test_xxh64_different_inputs() { let a = xxh64(b"hello"); let b = xxh64(b"world"); assert_ne!(a, b); } #[test] fn test_mix_commutative_ish() { // mix is not commutative by design let ab = mix(1, 2); let ba = mix(2, 1); assert_ne!(ab, ba); } } ================================================ FILE: data/rust/morloc-runtime/src/http_ffi.rs ================================================ //! C ABI wrappers for HTTP request/response handling. //! Replaces http.c. use std::ffi::{c_char, c_void}; use std::ptr; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; const HTTP_MAX_HEADERS: usize = 8192; const HTTP_MAX_REQUEST: usize = 4 * 1024 * 1024; // ── C-compatible types ─────────────────────────────────────────────────────── #[repr(C)] #[derive(Debug, Clone, Copy, PartialEq)] pub enum HttpMethod { Get = 0, Post = 1, Delete = 2, Options = 3, } #[repr(C)] pub struct HttpRequest { pub method: HttpMethod, pub path: [c_char; 256], pub body: *mut c_char, pub body_len: usize, } #[repr(C)] #[derive(Debug, Clone, Copy, PartialEq)] pub enum DaemonMethod { Call = 0, Discover = 1, Health = 2, Eval = 3, Typecheck = 4, Bind = 5, Bindings = 6, Unbind = 7, } #[repr(C)] pub struct DaemonRequest { pub id: *mut c_char, pub method: DaemonMethod, pub command: *mut c_char, pub args_json: *mut c_char, pub expr: *mut c_char, pub name: *mut c_char, } // ── http_parse_request ─────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn http_parse_request( fd: i32, errmsg: *mut *mut c_char, ) -> *mut HttpRequest { clear_errmsg(errmsg); // Read headers byte by byte until \r\n\r\n let mut header_buf = vec![0u8; HTTP_MAX_HEADERS]; let mut header_len: usize = 0; let mut header_end_pos: Option = None; while header_len < HTTP_MAX_HEADERS - 1 { let n = libc::recv(fd, header_buf.as_mut_ptr().add(header_len) as *mut c_void, 1, 0); if n <= 0 { set_errmsg(errmsg, &MorlocError::Other("Connection closed while reading HTTP headers".into())); return ptr::null_mut(); } header_len += 1; if header_len >= 4 { let tail = &header_buf[header_len - 4..header_len]; if tail == b"\r\n\r\n" { header_end_pos = Some(header_len - 4); break; } } } let header_end = match header_end_pos { Some(p) => p, None => { set_errmsg(errmsg, &MorlocError::Other("HTTP headers too large or malformed".into())); return ptr::null_mut(); } }; let header_str = std::str::from_utf8(&header_buf[..header_len]).unwrap_or(""); // Allocate request let req = libc::calloc(1, std::mem::size_of::()) as *mut HttpRequest; if req.is_null() { set_errmsg(errmsg, &MorlocError::Other("Failed to allocate http_request_t".into())); return ptr::null_mut(); } // Parse method if header_str.starts_with("GET ") { (*req).method = HttpMethod::Get; } else if header_str.starts_with("POST ") { (*req).method = HttpMethod::Post; } else if header_str.starts_with("DELETE ") { (*req).method = HttpMethod::Delete; } else if header_str.starts_with("OPTIONS ") { (*req).method = HttpMethod::Options; } else { libc::free(req as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("Unsupported HTTP method".into())); return ptr::null_mut(); } // Parse path let first_space = header_str.find(' ').unwrap_or(0) + 1; let path_end = header_str[first_space..].find(' ').map(|p| first_space + p).unwrap_or(first_space); let path = &header_str[first_space..path_end]; // Strip query string let path = path.split('?').next().unwrap_or(path); let path_len = path.len().min(255); ptr::copy_nonoverlapping(path.as_ptr(), (*req).path.as_mut_ptr() as *mut u8, path_len); (*req).path[path_len] = 0; // Find Content-Length let mut content_length: usize = 0; let header_lower = header_str.to_ascii_lowercase(); if let Some(pos) = header_lower.find("content-length:") { let after = &header_str[pos + 15..]; let trimmed = after.trim_start(); if let Some(end) = trimmed.find(|c: char| !c.is_ascii_digit()) { content_length = trimmed[..end].parse().unwrap_or(0); } else { content_length = trimmed.parse().unwrap_or(0); } } // Read body if content_length > 0 { if content_length > HTTP_MAX_REQUEST { libc::free(req as *mut c_void); set_errmsg(errmsg, &MorlocError::Other(format!("HTTP body too large: {} bytes", content_length))); return ptr::null_mut(); } let body = libc::malloc(content_length + 1) as *mut u8; if body.is_null() { libc::free(req as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("Failed to allocate HTTP body buffer".into())); return ptr::null_mut(); } // Some body bytes may be in header_buf after \r\n\r\n let after_headers = header_end + 4; let already_read = (header_len - after_headers).min(content_length); if already_read > 0 { ptr::copy_nonoverlapping(header_buf.as_ptr().add(after_headers), body, already_read); } let mut total = already_read; while total < content_length { let n = libc::recv(fd, body.add(total) as *mut c_void, content_length - total, 0); if n <= 0 { libc::free(body as *mut c_void); libc::free(req as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("Connection closed while reading HTTP body".into())); return ptr::null_mut(); } total += n as usize; } *body.add(content_length) = 0; (*req).body = body as *mut c_char; (*req).body_len = content_length; } req } #[no_mangle] pub unsafe extern "C" fn http_free_request(req: *mut HttpRequest) { if req.is_null() { return; } if !(*req).body.is_null() { libc::free((*req).body as *mut c_void); } libc::free(req as *mut c_void); } // ── http_write_response ────────────────────────────────────────────────────── fn http_status_text(status: i32) -> &'static str { match status { 200 => "OK", 400 => "Bad Request", 404 => "Not Found", 405 => "Method Not Allowed", 500 => "Internal Server Error", _ => "Unknown", } } #[no_mangle] pub unsafe extern "C" fn http_write_response( fd: i32, status: i32, content_type: *const c_char, body: *const c_char, body_len: usize, ) -> bool { let ct = if content_type.is_null() { "application/json" } else { std::ffi::CStr::from_ptr(content_type).to_str().unwrap_or("application/json") }; let header = format!( "HTTP/1.1 {} {}\r\n\ Content-Type: {}\r\n\ Content-Length: {}\r\n\ Connection: close\r\n\ Access-Control-Allow-Origin: *\r\n\ Access-Control-Allow-Methods: GET, POST, OPTIONS\r\n\ Access-Control-Allow-Headers: Content-Type\r\n\ \r\n", status, http_status_text(status), ct, body_len ); let n = libc::send(fd, header.as_ptr() as *const c_void, header.len(), crate::utility::SEND_NOSIGNAL); if n < 0 { return false; } if !body.is_null() && body_len > 0 { let mut total: usize = 0; while total < body_len { let n = libc::send(fd, (body as *const u8).add(total) as *const c_void, body_len - total, crate::utility::SEND_NOSIGNAL); if n <= 0 { return false; } total += n as usize; } } true } // ── http_to_daemon_request ─────────────────────────────────────────────────── /// Extract a JSON string value after a key like "expr": "..." fn extract_json_string(body: &str, key: &str) -> Option { let search = format!("\"{}\"", key); let pos = body.find(&search)?; let after = &body[pos + search.len()..]; let after = after.trim_start(); let after = after.strip_prefix(':')?; let after = after.trim_start(); if !after.starts_with('"') { return None; } let after = &after[1..]; // skip opening quote let mut result = String::new(); let mut chars = after.chars(); loop { match chars.next() { Some('\\') => { if let Some(c) = chars.next() { result.push(c); } } Some('"') => break, Some(c) => result.push(c), None => break, } } Some(result) } #[no_mangle] pub unsafe extern "C" fn http_to_daemon_request( req: *mut HttpRequest, errmsg: *mut *mut c_char, ) -> *mut DaemonRequest { clear_errmsg(errmsg); let dreq = libc::calloc(1, std::mem::size_of::()) as *mut DaemonRequest; if dreq.is_null() { set_errmsg(errmsg, &MorlocError::Other("Failed to allocate daemon_request_t".into())); return ptr::null_mut(); } let path = std::ffi::CStr::from_ptr((*req).path.as_ptr()) .to_str().unwrap_or(""); let method = (*req).method; let body_str = if !(*req).body.is_null() && (*req).body_len > 0 { std::str::from_utf8(std::slice::from_raw_parts((*req).body as *const u8, (*req).body_len)) .unwrap_or("") } else { "" }; // GET /health if method == HttpMethod::Get && path == "/health" { (*dreq).method = DaemonMethod::Health; return dreq; } // GET /discover if method == HttpMethod::Get && path == "/discover" { (*dreq).method = DaemonMethod::Discover; return dreq; } // POST /eval if method == HttpMethod::Post && path == "/eval" { (*dreq).method = DaemonMethod::Eval; if let Some(expr) = extract_json_string(body_str, "expr") { (*dreq).expr = libc::strdup(expr.as_ptr() as *const c_char); // strdup from Rust string - need null terminated let c = std::ffi::CString::new(expr).unwrap_or_default(); (*dreq).expr = libc::strdup(c.as_ptr()); } if (*dreq).expr.is_null() { libc::free(dreq as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("Missing 'expr' field in /eval request body".into())); return ptr::null_mut(); } return dreq; } // POST /typecheck if method == HttpMethod::Post && path == "/typecheck" { (*dreq).method = DaemonMethod::Typecheck; if let Some(expr) = extract_json_string(body_str, "expr") { let c = std::ffi::CString::new(expr).unwrap_or_default(); (*dreq).expr = libc::strdup(c.as_ptr()); } if (*dreq).expr.is_null() { libc::free(dreq as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("Missing 'expr' field in /typecheck request body".into())); return ptr::null_mut(); } return dreq; } // POST /bind if method == HttpMethod::Post && path == "/bind" { (*dreq).method = DaemonMethod::Bind; if let Some(expr) = extract_json_string(body_str, "expr") { let c = std::ffi::CString::new(expr).unwrap_or_default(); (*dreq).expr = libc::strdup(c.as_ptr()); } if let Some(name) = extract_json_string(body_str, "name") { let c = std::ffi::CString::new(name).unwrap_or_default(); (*dreq).name = libc::strdup(c.as_ptr()); } if (*dreq).expr.is_null() { libc::free(dreq as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("Missing 'expr' field in /bind request body".into())); return ptr::null_mut(); } return dreq; } // GET /bindings if method == HttpMethod::Get && path == "/bindings" { (*dreq).method = DaemonMethod::Bindings; return dreq; } // DELETE /bindings/ if method == HttpMethod::Delete && path.starts_with("/bindings/") { let name = &path[10..]; if name.is_empty() { libc::free(dreq as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("Missing binding name in /bindings/ path".into())); return ptr::null_mut(); } (*dreq).method = DaemonMethod::Unbind; let c = std::ffi::CString::new(name).unwrap_or_default(); (*dreq).name = libc::strdup(c.as_ptr()); return dreq; } // POST /call/ if method == HttpMethod::Post && path.starts_with("/call/") { let cmd_name = &path[6..]; if cmd_name.is_empty() { libc::free(dreq as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("Missing command name in /call/ path".into())); return ptr::null_mut(); } (*dreq).method = DaemonMethod::Call; let c = std::ffi::CString::new(cmd_name).unwrap_or_default(); (*dreq).command = libc::strdup(c.as_ptr()); // Parse body let trimmed = body_str.trim(); if trimmed.starts_with('[') { let c = std::ffi::CString::new(trimmed).unwrap_or_default(); (*dreq).args_json = libc::strdup(c.as_ptr()); } else if trimmed.starts_with('{') { // Extract "args" array if let Some(args_pos) = trimmed.find("\"args\"") { let after = &trimmed[args_pos + 6..]; let after = after.trim_start().strip_prefix(':').unwrap_or(after).trim_start(); if after.starts_with('[') { // Find matching ] let mut depth = 0i32; let mut in_string = false; let mut end = 0; for (i, ch) in after.chars().enumerate() { if in_string { if ch == '\\' { continue; } if ch == '"' { in_string = false; } } else { if ch == '"' { in_string = true; } else if ch == '[' { depth += 1; } else if ch == ']' { depth -= 1; if depth == 0 { end = i + 1; break; } } } } if end > 0 { let arr = &after[..end]; let c = std::ffi::CString::new(arr).unwrap_or_default(); (*dreq).args_json = libc::strdup(c.as_ptr()); } } } } return dreq; } // OPTIONS (CORS preflight) if method == HttpMethod::Options { (*dreq).method = DaemonMethod::Health; return dreq; } libc::free(dreq as *mut c_void); let method_str = match method { HttpMethod::Get => "GET", HttpMethod::Post => "POST", HttpMethod::Delete => "DELETE", HttpMethod::Options => "OPTIONS", }; set_errmsg(errmsg, &MorlocError::Other(format!("Unknown HTTP endpoint: {} {}", method_str, path))); ptr::null_mut() } ================================================ FILE: data/rust/morloc-runtime/src/intrinsics.rs ================================================ //! Intrinsic functions for morloc: save/load/show/read/hash. //! Replaces intrinsics.c. These are thin wrappers around serialization functions. use std::ffi::{c_char, c_void, CStr, CString}; use std::ptr; use crate::cschema::CSchema; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; // ── mlc_save: serialize to msgpack file ──────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn mlc_save( data: *const c_void, schema: *const CSchema, path: *const c_char, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); extern "C" { fn pack_with_schema( mlc: *const c_void, schema: *const CSchema, mpk: *mut *mut c_char, mpk_size: *mut usize, errmsg: *mut *mut c_char, ) -> i32; fn write_atomic( filename: *const c_char, data: *const u8, size: usize, errmsg: *mut *mut c_char, ) -> i32; } let mut err: *mut c_char = ptr::null_mut(); let mut mpk: *mut c_char = ptr::null_mut(); let mut mpk_size: usize = 0; let rc = pack_with_schema(data, schema, &mut mpk, &mut mpk_size, &mut err); if rc != 0 { *errmsg = err; return 1; } let wrc = write_atomic(path, mpk as *const u8, mpk_size, &mut err); libc::free(mpk as *mut c_void); if wrc != 0 { *errmsg = err; return 1; } 0 } // ── mlc_save_json: serialize to JSON file ────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn mlc_save_json( data: *const c_void, schema: *const CSchema, path: *const c_char, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); extern "C" { fn voidstar_to_json_string( data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut c_char; fn write_atomic( filename: *const c_char, data: *const u8, size: usize, errmsg: *mut *mut c_char, ) -> i32; } let mut err: *mut c_char = ptr::null_mut(); let json = voidstar_to_json_string(data, schema, &mut err); if json.is_null() { *errmsg = err; return 1; } let json_len = libc::strlen(json); let wrc = write_atomic(path, json as *const u8, json_len, &mut err); libc::free(json as *mut c_void); if wrc != 0 { *errmsg = err; return 1; } 0 } // ── mlc_save_voidstar: serialize to binary voidstar file ─────────────────── #[no_mangle] pub unsafe extern "C" fn mlc_save_voidstar( data: *const c_void, schema: *const CSchema, path: *const c_char, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); extern "C" { fn flatten_voidstar_to_buffer( data: *const c_void, schema: *const CSchema, out_buf: *mut *mut u8, out_size: *mut usize, errmsg: *mut *mut c_char, ) -> i32; fn write_binary_fd( fd: i32, buf: *const c_char, count: usize, errmsg: *mut *mut c_char, ) -> i32; } let mut err: *mut c_char = ptr::null_mut(); // Get directory for temp file let path_str = CStr::from_ptr(path).to_string_lossy(); let parent = std::path::Path::new(path_str.as_ref()).parent(); let dir = match parent { Some(p) if !p.as_os_str().is_empty() => p.to_string_lossy().into_owned(), _ => ".".to_string(), }; let tmp_template = format!("{}/morloc-tmp_XXXXXX\0", dir); let mut tmp_buf: Vec = tmp_template.into_bytes(); let fd = libc::mkstemp(tmp_buf.as_mut_ptr() as *mut c_char); if fd < 0 { set_errmsg(errmsg, &MorlocError::Io(std::io::Error::last_os_error())); return 1; } // Write packet header placeholder let header_size = std::mem::size_of::(); let zeros = vec![0u8; header_size]; if write_binary_fd(fd, zeros.as_ptr() as *const c_char, header_size, &mut err) != 0 { libc::close(fd); libc::unlink(tmp_buf.as_ptr() as *const c_char); *errmsg = err; return 1; } // Flatten voidstar let mut blob: *mut u8 = ptr::null_mut(); let mut blob_size: usize = 0; if flatten_voidstar_to_buffer(data, schema, &mut blob, &mut blob_size, &mut err) != 0 { libc::close(fd); libc::unlink(tmp_buf.as_ptr() as *const c_char); *errmsg = err; return 1; } // Write flattened data if write_binary_fd(fd, blob as *const c_char, blob_size, &mut err) != 0 { libc::free(blob as *mut c_void); libc::close(fd); libc::unlink(tmp_buf.as_ptr() as *const c_char); *errmsg = err; return 1; } libc::free(blob as *mut c_void); // Seek back and write real header libc::lseek(fd, 0, libc::SEEK_SET); let header = crate::packet::PacketHeader::data_mesg( crate::packet::PACKET_FORMAT_VOIDSTAR, blob_size as u64, ); let hdr_bytes = header.to_bytes(); write_binary_fd(fd, hdr_bytes.as_ptr() as *const c_char, hdr_bytes.len(), &mut err); libc::fsync(fd); libc::close(fd); // Atomic rename if libc::rename(tmp_buf.as_ptr() as *const c_char, path) != 0 { libc::unlink(tmp_buf.as_ptr() as *const c_char); set_errmsg(errmsg, &MorlocError::Io(std::io::Error::last_os_error())); return 1; } 0 } // ── mlc_load: load from file (auto-detect format) ───────────────────────── #[no_mangle] pub unsafe extern "C" fn mlc_load( path: *const c_char, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut c_void { clear_errmsg(errmsg); extern "C" { fn file_exists(filename: *const c_char) -> bool; fn read_binary_file( filename: *const c_char, file_size: *mut usize, errmsg: *mut *mut c_char, ) -> *mut u8; fn load_morloc_data_file( path: *const c_char, data: *mut u8, data_size: usize, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut c_void; } if !file_exists(path) { return ptr::null_mut(); } let mut err: *mut c_char = ptr::null_mut(); let mut file_size: usize = 0; let data = read_binary_file(path, &mut file_size, &mut err); if data.is_null() { if !err.is_null() { let path_str = CStr::from_ptr(path).to_string_lossy(); let err_str = CStr::from_ptr(err).to_string_lossy(); eprintln!("@load warning ({}): {}", path_str, err_str); libc::free(err as *mut libc::c_void); } return ptr::null_mut(); } let result = load_morloc_data_file(path, data, file_size, schema, &mut err); if result.is_null() && !err.is_null() { let path_str = CStr::from_ptr(path).to_string_lossy(); let err_str = CStr::from_ptr(err).to_string_lossy(); eprintln!("@load warning ({}): {}", path_str, err_str); libc::free(err as *mut libc::c_void); } result } // ── mlc_hash: hash voidstar data ─────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn mlc_hash( data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut c_char { clear_errmsg(errmsg); let mut err: *mut c_char = ptr::null_mut(); let hash = crate::cache::hash_voidstar(data, schema, 0, &mut err); if !err.is_null() { *errmsg = err; return ptr::null_mut(); } let hex = format!("{:016x}", hash); match CString::new(hex) { Ok(cs) => cs.into_raw(), Err(_) => { set_errmsg(errmsg, &MorlocError::Other("CString error".into())); ptr::null_mut() } } } // ── mlc_show: serialize to JSON string ───────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn mlc_show( data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut c_char { clear_errmsg(errmsg); extern "C" { fn voidstar_to_json_string( data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut c_char; } voidstar_to_json_string(data, schema, errmsg) } // ── mlc_read: deserialize from JSON string ───────────────────────────────── #[no_mangle] pub unsafe extern "C" fn mlc_read( json_str: *const c_char, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut c_void { clear_errmsg(errmsg); extern "C" { fn read_json_with_schema( dest: *mut u8, json: *mut c_char, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut u8; } let json_copy = libc::strdup(json_str); if json_copy.is_null() { set_errmsg(errmsg, &MorlocError::Other("strdup failed".into())); return ptr::null_mut(); } let mut err: *mut c_char = ptr::null_mut(); let result = read_json_with_schema(ptr::null_mut(), json_copy, schema, &mut err); libc::free(json_copy as *mut c_void); if result.is_null() { if !err.is_null() { libc::free(err as *mut c_void); } } result as *mut c_void } // write_voidstar_binary is provided by packet.c (still C) // It will be ported when packet.c is ported to Rust. // Unused Rust implementation kept for future use #[allow(dead_code)] unsafe fn _write_voidstar_binary_rust( fd: i32, data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> isize { clear_errmsg(errmsg); extern "C" { fn flatten_voidstar_to_buffer( data: *const c_void, schema: *const CSchema, out_buf: *mut *mut u8, out_size: *mut usize, errmsg: *mut *mut c_char, ) -> i32; fn write_binary_fd( fd: i32, buf: *const c_char, count: usize, errmsg: *mut *mut c_char, ) -> i32; } let mut err: *mut c_char = ptr::null_mut(); let mut blob: *mut u8 = ptr::null_mut(); let mut blob_size: usize = 0; if flatten_voidstar_to_buffer(data, schema, &mut blob, &mut blob_size, &mut err) != 0 { *errmsg = err; return -1; } if write_binary_fd(fd, blob as *const c_char, blob_size, &mut err) != 0 { libc::free(blob as *mut c_void); *errmsg = err; return -1; } libc::free(blob as *mut c_void); blob_size as isize } ================================================ FILE: data/rust/morloc-runtime/src/ipc.rs ================================================ //! Unix domain socket IPC for communication between nexus and language pools. //! Replaces ipc.c. use crate::error::MorlocError; use crate::packet::PacketHeader; use std::io::{Read, Write}; use std::os::unix::net::UnixStream; use std::path::Path; /// Send a packet (header + payload) over a Unix stream socket and receive the response. pub fn send_and_receive( socket_path: &Path, header: &PacketHeader, payload: &[u8], ) -> Result<(PacketHeader, Vec), MorlocError> { let mut stream = UnixStream::connect(socket_path).map_err(|e| { MorlocError::Ipc(format!( "failed to connect to {}: {e}", socket_path.display() )) })?; // Send header let header_bytes = header.to_bytes(); stream .write_all(&header_bytes) .map_err(|e| MorlocError::Ipc(format!("failed to send header: {e}")))?; // Send payload if !payload.is_empty() { stream .write_all(payload) .map_err(|e| MorlocError::Ipc(format!("failed to send payload: {e}")))?; } // Read response header let mut resp_header_bytes = [0u8; 32]; stream .read_exact(&mut resp_header_bytes) .map_err(|e| MorlocError::Ipc(format!("failed to read response header: {e}")))?; let resp_header = PacketHeader::from_bytes(&resp_header_bytes)?; // Read response payload let payload_len = resp_header.length as usize; let mut resp_payload = vec![0u8; payload_len]; if payload_len > 0 { stream .read_exact(&mut resp_payload) .map_err(|e| MorlocError::Ipc(format!("failed to read response payload: {e}")))?; } Ok((resp_header, resp_payload)) } /// Read a single packet from a connected stream. pub fn read_packet(stream: &mut UnixStream) -> Result<(PacketHeader, Vec), MorlocError> { let mut header_bytes = [0u8; 32]; stream .read_exact(&mut header_bytes) .map_err(|e| MorlocError::Ipc(format!("failed to read packet header: {e}")))?; let header = PacketHeader::from_bytes(&header_bytes)?; // Skip metadata between header and payload let skip = header.offset as usize - 32; if skip > 0 { let mut discard = vec![0u8; skip]; stream .read_exact(&mut discard) .map_err(|e| MorlocError::Ipc(format!("failed to skip metadata: {e}")))?; } let payload_len = header.length as usize; let mut payload = vec![0u8; payload_len]; if payload_len > 0 { stream .read_exact(&mut payload) .map_err(|e| MorlocError::Ipc(format!("failed to read payload: {e}")))?; } Ok((header, payload)) } /// Send a packet over a connected stream. pub fn send_packet( stream: &mut UnixStream, header: &PacketHeader, payload: &[u8], ) -> Result<(), MorlocError> { let header_bytes = header.to_bytes(); stream .write_all(&header_bytes) .map_err(|e| MorlocError::Ipc(format!("failed to send header: {e}")))?; if !payload.is_empty() { stream .write_all(payload) .map_err(|e| MorlocError::Ipc(format!("failed to send payload: {e}")))?; } Ok(()) } ================================================ FILE: data/rust/morloc-runtime/src/ipc_ffi.rs ================================================ //! C ABI wrappers for IPC functions. //! Replaces ipc.c with Rust implementations of Unix domain socket operations. use std::ffi::{c_char, c_void, CStr}; use std::ptr; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; // ── C types from call.h ────────────────────────────────────────────────────── #[repr(C)] pub struct ClientList { pub fd: i32, pub next: *mut ClientList, } // language_daemon_t has fd_set which is 128 bytes on Linux. // We represent it as an opaque struct and use libc calls. #[repr(C)] pub struct LanguageDaemon { pub socket_path: *mut c_char, pub tmpdir: *mut c_char, pub shm_basename: *mut c_char, pub shm: *mut crate::shm::ShmHeader, pub shm_default_size: usize, pub server_fd: i32, pub read_fds: libc::fd_set, pub client_fds: *mut ClientList, } const BUFFER_SIZE: usize = 65536; // ── close_socket / close_daemon ────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn close_socket(socket_id: i32) { if socket_id >= 0 { libc::close(socket_id); } } #[no_mangle] pub unsafe extern "C" fn close_daemon(daemon_ptr: *mut *mut LanguageDaemon) { if daemon_ptr.is_null() || (*daemon_ptr).is_null() { return; } let daemon = *daemon_ptr; close_socket((*daemon).server_fd); // Free client list let mut current = (*daemon).client_fds; while !current.is_null() { let next = (*current).next; libc::close((*current).fd); libc::free(current as *mut c_void); current = next; } if !(*daemon).socket_path.is_null() { libc::unlink((*daemon).socket_path); libc::free((*daemon).socket_path as *mut c_void); } if !(*daemon).tmpdir.is_null() { libc::free((*daemon).tmpdir as *mut c_void); } if !(*daemon).shm_basename.is_null() { libc::free((*daemon).shm_basename as *mut c_void); } libc::free(daemon as *mut c_void); *daemon_ptr = ptr::null_mut(); } // ── Socket helpers ─────────────────────────────────────────────────────────── unsafe fn new_socket(errmsg: *mut *mut c_char) -> i32 { clear_errmsg(errmsg); let fd = libc::socket(libc::AF_UNIX, libc::SOCK_STREAM, 0); if fd < 0 { set_errmsg(errmsg, &MorlocError::Ipc("Error creating socket".into())); return -1; } crate::utility::set_nosigpipe(fd); fd } unsafe fn new_server_addr(socket_path: *const c_char) -> libc::sockaddr_un { let mut addr: libc::sockaddr_un = std::mem::zeroed(); addr.sun_family = libc::AF_UNIX as libc::sa_family_t; let path_bytes = CStr::from_ptr(socket_path).to_bytes(); let copy_len = path_bytes.len().min(addr.sun_path.len() - 1); ptr::copy_nonoverlapping( path_bytes.as_ptr() as *const c_char, addr.sun_path.as_mut_ptr(), copy_len, ); addr } unsafe fn new_server(socket_path: *const c_char, errmsg: *mut *mut c_char) -> i32 { let server_fd = new_socket(errmsg); if server_fd < 0 { return -1; } let addr = new_server_addr(socket_path); // Remove any existing socket file libc::unlink(socket_path); if libc::bind(server_fd, &addr as *const libc::sockaddr_un as *const libc::sockaddr, std::mem::size_of::() as u32) < 0 { close_socket(server_fd); set_errmsg(errmsg, &MorlocError::Ipc("Error binding socket".into())); return -1; } if libc::listen(server_fd, 16) < 0 { close_socket(server_fd); set_errmsg(errmsg, &MorlocError::Ipc("Error listening on socket".into())); return -1; } server_fd } // ── start_daemon ───────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn start_daemon( socket_path: *const c_char, tmpdir: *const c_char, shm_basename: *const c_char, shm_default_size: usize, errmsg: *mut *mut c_char, ) -> *mut LanguageDaemon { clear_errmsg(errmsg); let daemon = libc::calloc(1, std::mem::size_of::()) as *mut LanguageDaemon; if daemon.is_null() { set_errmsg(errmsg, &MorlocError::Ipc("Calloc for language_daemon_t failed".into())); return ptr::null_mut(); } (*daemon).server_fd = -1; (*daemon).socket_path = libc::strdup(socket_path); (*daemon).tmpdir = libc::strdup(tmpdir); (*daemon).shm_basename = libc::strdup(shm_basename); if (*daemon).socket_path.is_null() || (*daemon).tmpdir.is_null() || (*daemon).shm_basename.is_null() { close_daemon(&mut (daemon as *mut LanguageDaemon)); set_errmsg(errmsg, &MorlocError::Ipc("strdup failed in start_daemon".into())); return ptr::null_mut(); } (*daemon).shm_default_size = shm_default_size; (*daemon).client_fds = ptr::null_mut(); libc::FD_ZERO(&mut (*daemon).read_fds); // Set fallback dir for file-backed SHM crate::shm::shm_set_fallback_dir(&CStr::from_ptr(tmpdir).to_string_lossy()); // Init shared memory let mut err: *mut c_char = ptr::null_mut(); let shm = crate::ffi::shinit(shm_basename, 0, shm_default_size, &mut err); if !err.is_null() { close_daemon(&mut (daemon as *mut LanguageDaemon)); *errmsg = err; return ptr::null_mut(); } (*daemon).shm = shm; // Create server socket (*daemon).server_fd = new_server(socket_path, &mut err); if !err.is_null() { close_daemon(&mut (daemon as *mut LanguageDaemon)); *errmsg = err; return ptr::null_mut(); } // Set non-blocking mode let flags = libc::fcntl((*daemon).server_fd, libc::F_GETFL); if flags == -1 || libc::fcntl((*daemon).server_fd, libc::F_SETFL, flags | libc::O_NONBLOCK) == -1 { let errno_msg = std::ffi::CStr::from_ptr(libc::strerror(crate::utility::errno_val())) .to_string_lossy().into_owned(); close_daemon(&mut (daemon as *mut LanguageDaemon)); set_errmsg(errmsg, &MorlocError::Ipc(format!("Failed to set non-blocking mode: {}", errno_msg))); return ptr::null_mut(); } daemon } // ── stream_from_client_wait ────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn stream_from_client_wait( client_fd: i32, pselect_timeout_us: i32, recv_timeout_us: i32, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); if libc::fcntl(client_fd, libc::F_GETFD) == -1 { set_errmsg(errmsg, &MorlocError::Ipc("Invalid file descriptor".into())); return ptr::null_mut(); } let buffer = libc::calloc(BUFFER_SIZE, 1) as *mut u8; if buffer.is_null() { set_errmsg(errmsg, &MorlocError::Ipc("calloc failed for buffer".into())); return ptr::null_mut(); } let mut read_fds: libc::fd_set = std::mem::zeroed(); let max_fd = client_fd; // Timeout setup let mut ts_loop: libc::timespec = std::mem::zeroed(); let timeout_ptr = if pselect_timeout_us > 0 { ts_loop.tv_sec = (pselect_timeout_us / 1000000) as i64; ts_loop.tv_nsec = ((pselect_timeout_us % 1000000) * 1000) as i64; &ts_loop as *const libc::timespec } else { ptr::null() }; // Signal mask setup let mut mask: libc::sigset_t = std::mem::zeroed(); let mut origmask: libc::sigset_t = std::mem::zeroed(); libc::sigemptyset(&mut mask); libc::sigaddset(&mut mask, libc::SIGINT); libc::pthread_sigmask(libc::SIG_SETMASK, &mask, &mut origmask); // Initial receive with timeout let mut ready; loop { libc::FD_ZERO(&mut read_fds); libc::FD_SET(client_fd, &mut read_fds); ready = libc::pselect(max_fd + 1, &mut read_fds, ptr::null_mut(), ptr::null_mut(), timeout_ptr, &origmask); if !(ready < 0 && crate::utility::errno_val() == libc::EINTR) { break; } } libc::pthread_sigmask(libc::SIG_SETMASK, &origmask, ptr::null_mut()); if ready == 0 { libc::free(buffer as *mut c_void); set_errmsg(errmsg, &MorlocError::Ipc("Timeout waiting for initial data".into())); return ptr::null_mut(); } if ready < 0 { libc::free(buffer as *mut c_void); set_errmsg(errmsg, &MorlocError::Ipc("pselect error".into())); return ptr::null_mut(); } if !libc::FD_ISSET(client_fd, &read_fds) { libc::free(buffer as *mut c_void); set_errmsg(errmsg, &MorlocError::Ipc("Bad client file descriptor".into())); return ptr::null_mut(); } let recv_length = libc::recv(client_fd, buffer as *mut c_void, BUFFER_SIZE, 0); if recv_length == 0 { libc::free(buffer as *mut c_void); set_errmsg(errmsg, &MorlocError::Ipc("Connection closed by peer".into())); return ptr::null_mut(); } if recv_length < 0 && crate::utility::errno_val() != libc::EWOULDBLOCK && crate::utility::errno_val() != libc::EAGAIN { libc::free(buffer as *mut c_void); set_errmsg(errmsg, &MorlocError::Ipc("Recv error".into())); return ptr::null_mut(); } // Get packet size from header let mut packet_err: *mut c_char = ptr::null_mut(); let packet_length = crate::packet_ffi::morloc_packet_size(buffer, &mut packet_err); if !packet_err.is_null() { libc::free(buffer as *mut c_void); *errmsg = packet_err; return ptr::null_mut(); } let result = libc::calloc(packet_length, 1) as *mut u8; if result.is_null() { libc::free(buffer as *mut c_void); set_errmsg(errmsg, &MorlocError::Ipc("calloc failure".into())); return ptr::null_mut(); } let copy_length = (recv_length as usize).min(packet_length); ptr::copy_nonoverlapping(buffer, result, copy_length); let mut data_ptr = result.add(copy_length); libc::free(buffer as *mut c_void); let attempts = 10; while (data_ptr as usize - result as usize) < packet_length { let mut packet_received = false; for attempt in 0..attempts { libc::FD_ZERO(&mut read_fds); libc::FD_SET(client_fd, &mut read_fds); let recv_timeout_ptr = if recv_timeout_us > 0 { let total_us = recv_timeout_us as i64 * (attempt as i64 + 1); ts_loop.tv_sec = total_us / 1000000; ts_loop.tv_nsec = (total_us % 1000000) * 1000; &ts_loop as *const libc::timespec } else { ptr::null() }; libc::pthread_sigmask(libc::SIG_SETMASK, &mask, ptr::null_mut()); ready = libc::pselect(max_fd + 1, &mut read_fds, ptr::null_mut(), ptr::null_mut(), recv_timeout_ptr, &origmask); libc::pthread_sigmask(libc::SIG_SETMASK, &origmask, ptr::null_mut()); if ready == 0 { libc::free(result as *mut c_void); set_errmsg(errmsg, &MorlocError::Ipc("Timeout waiting for remaining data".into())); return ptr::null_mut(); } if ready < 0 && crate::utility::errno_val() != libc::EINTR { libc::free(result as *mut c_void); set_errmsg(errmsg, &MorlocError::Ipc("pselect error".into())); return ptr::null_mut(); } if ready <= 0 { continue; } if libc::FD_ISSET(client_fd, &read_fds) { let remaining = packet_length - (data_ptr as usize - result as usize); let recv_size = remaining.min(BUFFER_SIZE); let n = libc::recv(client_fd, data_ptr as *mut c_void, recv_size, 0); if n > 0 { data_ptr = data_ptr.add(n as usize); packet_received = true; break; } if n == 0 { libc::free(result as *mut c_void); set_errmsg(errmsg, &MorlocError::Ipc("Connection closed early".into())); return ptr::null_mut(); } if n < 0 && crate::utility::errno_val() != libc::EWOULDBLOCK && crate::utility::errno_val() != libc::EAGAIN { libc::free(result as *mut c_void); set_errmsg(errmsg, &MorlocError::Ipc("Recv error".into())); return ptr::null_mut(); } } } if !packet_received { libc::free(result as *mut c_void); set_errmsg(errmsg, &MorlocError::Ipc("Failed to retrieve packet".into())); return ptr::null_mut(); } } result } #[no_mangle] pub unsafe extern "C" fn stream_from_client( client_fd: i32, errmsg: *mut *mut c_char, ) -> *mut u8 { stream_from_client_wait(client_fd, 0, 0, errmsg) } // ── send_and_receive_over_socket ───────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn send_and_receive_over_socket_wait( socket_path: *const c_char, packet: *const u8, pselect_timeout_us: i32, recv_timeout_us: i32, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); let mut err: *mut c_char = ptr::null_mut(); let client_fd = new_socket(&mut err); if client_fd < 0 { *errmsg = err; return ptr::null_mut(); } let addr = new_server_addr(socket_path); // Connect with retry (matching C WAIT macro behavior) let mut retcode; let mut attempts = 0; loop { retcode = libc::connect(client_fd, &addr as *const libc::sockaddr_un as *const libc::sockaddr, std::mem::size_of::() as u32); if retcode == 0 { break; } attempts += 1; if attempts > 300 { // ~30 seconds with 100ms sleep close_socket(client_fd); set_errmsg(errmsg, &MorlocError::Ipc(format!( "Failed to connect to pipe '{}'", CStr::from_ptr(socket_path).to_string_lossy() ))); return ptr::null_mut(); } libc::usleep(100_000); // 100ms } let packet_size = crate::packet_ffi::morloc_packet_size(packet, &mut err); if !err.is_null() { close_socket(client_fd); *errmsg = err; return ptr::null_mut(); } // Send packet in loop let mut total_sent: usize = 0; while total_sent < packet_size { let bytes_sent = libc::send( client_fd, packet.add(total_sent) as *const c_void, packet_size - total_sent, crate::utility::SEND_NOSIGNAL, ); if bytes_sent <= 0 { close_socket(client_fd); set_errmsg(errmsg, &MorlocError::Ipc(format!( "Failed to send data to '{}'", CStr::from_ptr(socket_path).to_string_lossy() ))); return ptr::null_mut(); } total_sent += bytes_sent as usize; } let result = stream_from_client_wait(client_fd, pselect_timeout_us, recv_timeout_us, &mut err); if !err.is_null() { close_socket(client_fd); *errmsg = err; return ptr::null_mut(); } close_socket(client_fd); result } #[no_mangle] pub unsafe extern "C" fn send_and_receive_over_socket( socket_path: *const c_char, packet: *const u8, errmsg: *mut *mut c_char, ) -> *mut u8 { send_and_receive_over_socket_wait(socket_path, packet, 0, 0, errmsg) } // ── send_packet_to_foreign_server ──────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn send_packet_to_foreign_server( client_fd: i32, packet: *mut u8, errmsg: *mut *mut c_char, ) -> usize { clear_errmsg(errmsg); let mut err: *mut c_char = ptr::null_mut(); let size = crate::packet_ffi::morloc_packet_size(packet, &mut err); if !err.is_null() { *errmsg = err; return 0; } let mut total_sent: usize = 0; while total_sent < size { let bytes_sent = libc::send( client_fd, packet.add(total_sent) as *const c_void, size - total_sent, crate::utility::SEND_NOSIGNAL, ); if bytes_sent <= 0 { set_errmsg(errmsg, &MorlocError::Ipc(format!( "Failed to send over client {}", client_fd ))); return 0; } total_sent += bytes_sent as usize; } total_sent } // ── wait_for_client ────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn wait_for_client_with_timeout( daemon: *mut LanguageDaemon, timeout_us: i32, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); libc::FD_ZERO(&mut (*daemon).read_fds); libc::FD_SET((*daemon).server_fd, &mut (*daemon).read_fds); let mut max_fd = (*daemon).server_fd; // Add existing client fds let mut client = (*daemon).client_fds; while !client.is_null() { libc::FD_SET((*client).fd, &mut (*daemon).read_fds); if (*client).fd > max_fd { max_fd = (*client).fd; } client = (*client).next; } // Timeout let mut ts: libc::timespec = std::mem::zeroed(); let timeout_ptr = if timeout_us > 0 { ts.tv_sec = (timeout_us / 1000000) as i64; ts.tv_nsec = ((timeout_us % 1000000) * 1000) as i64; &ts as *const libc::timespec } else { ptr::null() }; let mut emptymask: libc::sigset_t = std::mem::zeroed(); libc::sigemptyset(&mut emptymask); let ready = libc::pselect(max_fd + 1, &mut (*daemon).read_fds, ptr::null_mut(), ptr::null_mut(), timeout_ptr, &emptymask); if ready < 0 { if crate::utility::errno_val() == libc::EINTR { return 0; } set_errmsg(errmsg, &MorlocError::Ipc("pselect error".into())); return -1; } if ready == 0 { return 0; } // Check for new connection if libc::FD_ISSET((*daemon).server_fd, &(*daemon).read_fds) { let selected_fd = libc::accept((*daemon).server_fd, ptr::null_mut(), ptr::null_mut()); if selected_fd >= 0 { crate::utility::set_nosigpipe(selected_fd); libc::fcntl(selected_fd, libc::F_SETFL, libc::O_NONBLOCK); let new_client = libc::calloc(1, std::mem::size_of::()) as *mut ClientList; (*new_client).fd = selected_fd; (*new_client).next = ptr::null_mut(); if (*daemon).client_fds.is_null() { (*daemon).client_fds = new_client; } else { let mut last = (*daemon).client_fds; while !(*last).next.is_null() { last = (*last).next; } (*last).next = new_client; } } // Ignore EAGAIN/EWOULDBLOCK on accept } if (*daemon).client_fds.is_null() { return 0; // spurious wakeup } // Dequeue first client let client_node = (*daemon).client_fds; let return_fd = (*client_node).fd; (*daemon).client_fds = (*client_node).next; libc::free(client_node as *mut c_void); return_fd } #[no_mangle] pub unsafe extern "C" fn wait_for_client( daemon: *mut LanguageDaemon, errmsg: *mut *mut c_char, ) -> i32 { wait_for_client_with_timeout(daemon, 0, errmsg) } ================================================ FILE: data/rust/morloc-runtime/src/json.rs ================================================ //! JSON <-> Voidstar conversion. //! //! # Safety model //! //! All SHM pointer operations are encapsulated in `ShmWriter` (write) and //! `ShmReader` (read). Each has a single `unsafe fn new()` constructor; //! all subsequent reads/writes through the handle are safe methods. //! The only remaining `unsafe` blocks are `libc::snprintf` for float //! formatting and constructing readers/writers at known-valid offsets. use crate::error::MorlocError; use crate::schema::{Schema, SerialType}; use crate::shm::{self, AbsPtr, Array, RELNULL}; // ── Safe SHM abstractions ──────────────────────────────────────────────────── /// Write handle to a region of shared memory. /// /// # Safety invariant /// `ptr` was obtained from `shmalloc` and points to at least `len` writable bytes. struct ShmWriter { ptr: *mut u8, #[cfg(debug_assertions)] len: usize, } impl ShmWriter { /// # Safety /// `ptr` must point to `len` bytes of valid, writable SHM. unsafe fn new(ptr: *mut u8, len: usize) -> Self { let _ = len; ShmWriter { ptr, #[cfg(debug_assertions)] len } } fn as_ptr(&self) -> *mut u8 { self.ptr } fn write_bytes(&self, offset: usize, src: &[u8]) { debug_assert!(offset + src.len() <= { #[cfg(debug_assertions)] { self.len } #[cfg(not(debug_assertions))] { usize::MAX } }); unsafe { std::ptr::copy_nonoverlapping(src.as_ptr(), self.ptr.add(offset), src.len()); } } fn zero(&self, offset: usize, count: usize) { unsafe { std::ptr::write_bytes(self.ptr.add(offset), 0, count); } } fn write_val(&self, offset: usize, val: T) { unsafe { (self.ptr.add(offset) as *mut T).write_unaligned(val); } } fn write_array_header(&self, offset: usize, size: usize, data_rel: shm::RelPtr) { let arr = Array { size, data: data_rel }; unsafe { std::ptr::copy_nonoverlapping( &arr as *const Array as *const u8, self.ptr.add(offset), std::mem::size_of::(), ); } } fn sub(&self, offset: usize, sub_len: usize) -> ShmWriter { unsafe { ShmWriter::new(self.ptr.add(offset), sub_len) } } } /// Read handle to SHM data. /// /// # Safety invariant /// `ptr` was obtained from `rel2abs`/`shmalloc` and points to valid readable SHM. struct ShmReader { ptr: *const u8 } impl ShmReader { /// # Safety /// `ptr` must point to valid, readable shared memory. unsafe fn new(ptr: *const u8) -> Self { ShmReader { ptr } } fn read_val(&self, offset: usize) -> T { unsafe { (self.ptr.add(offset) as *const T).read_unaligned() } } fn read_u8(&self, offset: usize) -> u8 { self.read_val(offset) } fn read_array(&self, offset: usize) -> Array { unsafe { *(self.ptr.add(offset) as *const Array) } } fn read_str(&self, offset: usize, len: usize) -> &str { unsafe { std::str::from_utf8(std::slice::from_raw_parts(self.ptr.add(offset), len)).unwrap_or("") } } fn at(&self, offset: usize) -> ShmReader { unsafe { ShmReader::new(self.ptr.add(offset)) } } } // ── JSON -> Voidstar ─────────────────────────────────────────────────────── pub fn read_json_with_schema(json_str: &str, schema: &Schema) -> Result { read_json_with_schema_dest(None, json_str, schema) } pub fn read_json_with_schema_dest( dest: Option, json_str: &str, schema: &Schema, ) -> Result { let value: serde_json::Value = serde_json::from_str(json_str) .map_err(|e| MorlocError::Serialization(format!("JSON parse error: {}", e)))?; json_to_voidstar(&value, schema, dest) } fn alloc(dest: Option, size: usize) -> Result { let ptr = match dest { Some(p) => p, None => shm::shmalloc(size)? }; // SAFETY: ptr from shmalloc or caller-provided valid SHM of sufficient size Ok(unsafe { ShmWriter::new(ptr, size) }) } fn json_to_voidstar( value: &serde_json::Value, schema: &Schema, dest: Option, ) -> Result { match schema.serial_type { SerialType::Nil => { let w = alloc(dest, 1)?; w.write_val::(0, 0); Ok(w.as_ptr()) } SerialType::Bool => { let b = value.as_bool().ok_or_else(|| err("expected bool"))?; let w = alloc(dest, 1)?; w.write_val::(0, b as u8); Ok(w.as_ptr()) } SerialType::Sint8 => { let w = alloc(dest, 1)?; w.write_val::(0, as_i64(value)? as i8); Ok(w.as_ptr()) } SerialType::Sint16 => { let w = alloc(dest, 2)?; w.write_val::(0, as_i64(value)? as i16); Ok(w.as_ptr()) } SerialType::Sint32 => { let w = alloc(dest, 4)?; w.write_val::(0, as_i64(value)? as i32); Ok(w.as_ptr()) } SerialType::Sint64 => { let w = alloc(dest, 8)?; w.write_val::(0, as_i64(value)?); Ok(w.as_ptr()) } SerialType::Uint8 => { let w = alloc(dest, 1)?; w.write_val::(0, as_u64(value)? as u8); Ok(w.as_ptr()) } SerialType::Uint16 => { let w = alloc(dest, 2)?; w.write_val::(0, as_u64(value)? as u16); Ok(w.as_ptr()) } SerialType::Uint32 => { let w = alloc(dest, 4)?; w.write_val::(0, as_u64(value)? as u32); Ok(w.as_ptr()) } SerialType::Uint64 => { let w = alloc(dest, 8)?; w.write_val::(0, as_u64(value)?); Ok(w.as_ptr()) } SerialType::Float32 => { let w = alloc(dest, 4)?; w.write_val::(0, as_f64(value)? as f32); Ok(w.as_ptr()) } SerialType::Float64 => { let w = alloc(dest, 8)?; w.write_val::(0, as_f64(value)?); Ok(w.as_ptr()) } SerialType::String => { let s = value.as_str().ok_or_else(|| err("expected string"))?; let bytes = s.as_bytes(); let hdr = std::mem::size_of::(); let (w, data_rel) = if dest.is_some() { let w = alloc(dest, hdr)?; let data_rel = if bytes.is_empty() { RELNULL } else { shm::abs2rel(shm::shmemcpy(bytes.as_ptr(), bytes.len())?)? }; (w, data_rel) } else { let w = alloc(None, hdr + bytes.len())?; w.write_bytes(hdr, bytes); // SAFETY: data is hdr bytes into the same shmalloc block let data_rel = shm::abs2rel(unsafe { w.as_ptr().add(hdr) })?; (w, data_rel) }; w.write_array_header(0, bytes.len(), data_rel); Ok(w.as_ptr()) } SerialType::Array => { let arr_val = value.as_array().ok_or_else(|| err("expected array"))?; let es = schema.parameters.first().ok_or_else(|| err("array has no element type"))?; let n = arr_val.len(); let ew = es.width; let hdr = std::mem::size_of::(); let (hw, data_ptr) = if dest.is_some() { let hw = alloc(dest, hdr)?; let dp = if n > 0 { shm::shmalloc(n * ew)? } else { std::ptr::null_mut() }; (hw, dp) } else { let w = alloc(None, hdr + n * ew)?; // SAFETY: data is hdr bytes into the same shmalloc block let dp = unsafe { w.as_ptr().add(hdr) }; (w, dp) }; let data_rel = if data_ptr.is_null() { RELNULL } else { shm::abs2rel(data_ptr)? }; for (i, elem) in arr_val.iter().enumerate() { // SAFETY: data_ptr + i * ew is within the data allocation let ep = unsafe { data_ptr.add(i * ew) }; json_to_voidstar(elem, es, Some(ep))?; } hw.write_array_header(0, n, data_rel); Ok(hw.as_ptr()) } SerialType::Tuple | SerialType::Map => { let fields = extract_fields(value, schema)?; if fields.len() != schema.parameters.len() { return Err(err(&format!("expected {} fields, got {}", schema.parameters.len(), fields.len()))); } let w = alloc(dest, schema.width)?; w.zero(0, schema.width); for (i, (fv, fs)) in fields.iter().zip(schema.parameters.iter()).enumerate() { let sub = w.sub(schema.offsets[i], fs.width); json_to_voidstar(fv, fs, Some(sub.as_ptr()))?; } Ok(w.as_ptr()) } SerialType::Optional => { let inner = schema.parameters.first().ok_or_else(|| err("optional has no inner type"))?; let off = shm::align_up(1, inner.alignment().max(1)); let total = off + inner.width; let w = alloc(dest, total)?; if value.is_null() { w.zero(0, total); } else { w.write_val::(0, 1); json_to_voidstar(value, inner, Some(w.sub(off, inner.width).as_ptr()))?; } Ok(w.as_ptr()) } SerialType::Tensor => Err(err("Tensor JSON parsing not yet implemented")), } } fn extract_fields(value: &serde_json::Value, schema: &Schema) -> Result, MorlocError> { if schema.serial_type == SerialType::Map && value.is_object() { let obj = value.as_object().unwrap(); Ok(schema.keys.iter().map(|k| obj.get(k).cloned().unwrap_or(serde_json::Value::Null)).collect()) } else { value.as_array().ok_or_else(|| err("expected array for tuple/map")).cloned() } } // ── Voidstar -> JSON ─────────────────────────────────────────────────────── pub fn voidstar_to_json_string(ptr: AbsPtr, schema: &Schema) -> Result { let mut buf = String::new(); // SAFETY: ptr from shmalloc/rel2abs — valid SHM let r = unsafe { ShmReader::new(ptr) }; to_json(&r, schema, &mut buf)?; Ok(buf) } pub fn print_voidstar(ptr: AbsPtr, schema: &Schema) -> Result<(), MorlocError> { println!("{}", voidstar_to_json_string(ptr, schema)?); Ok(()) } pub fn pretty_print_voidstar(ptr: AbsPtr, schema: &Schema) -> Result<(), MorlocError> { let json = voidstar_to_json_string(ptr, schema)?; let v: serde_json::Value = serde_json::from_str(&json).map_err(|e| err(&e.to_string()))?; match &v { // Print strings as raw text (unescaped, no quotes) serde_json::Value::String(s) => println!("{}", s), // Print numbers and bools as plain values serde_json::Value::Number(n) => println!("{}", n), serde_json::Value::Bool(b) => println!("{}", b), serde_json::Value::Null => println!("null"), // Print arrays and objects as indented JSON _ => println!("{}", serde_json::to_string_pretty(&v).map_err(|e| err(&e.to_string()))?), } Ok(()) } fn to_json(r: &ShmReader, schema: &Schema, buf: &mut String) -> Result<(), MorlocError> { match schema.serial_type { SerialType::Nil => buf.push_str("null"), SerialType::Bool => buf.push_str(if r.read_u8(0) != 0 { "true" } else { "false" }), SerialType::Sint8 => buf.push_str(&(r.read_val::(0)).to_string()), SerialType::Sint16 => buf.push_str(&(r.read_val::(0)).to_string()), SerialType::Sint32 => buf.push_str(&(r.read_val::(0)).to_string()), SerialType::Sint64 => buf.push_str(&(r.read_val::(0)).to_string()), SerialType::Uint8 => buf.push_str(&r.read_u8(0).to_string()), SerialType::Uint16 => buf.push_str(&(r.read_val::(0)).to_string()), SerialType::Uint32 => buf.push_str(&(r.read_val::(0)).to_string()), SerialType::Uint64 => buf.push_str(&(r.read_val::(0)).to_string()), SerialType::Float32 => write_float(buf, r.read_val::(0) as f64, b"%.7g\0"), SerialType::Float64 => write_float(buf, r.read_val::(0), b"%.15g\0"), SerialType::String => { let arr = r.read_array(0); if arr.size == 0 || arr.data == RELNULL { buf.push_str("\"\""); } else { // SAFETY: arr.data resolved to valid SHM string bytes let dr = unsafe { ShmReader::new(shm::rel2abs(arr.data)?) }; json_escape(dr.read_str(0, arr.size), buf); } } SerialType::Array => { let arr = r.read_array(0); let es = &schema.parameters[0]; buf.push('['); if arr.size > 0 && arr.data != RELNULL { let data = shm::rel2abs(arr.data)?; for i in 0..arr.size { if i > 0 { buf.push(','); } // SAFETY: data + i * es.width within array data block let er = unsafe { ShmReader::new(data.add(i * es.width)) }; to_json(&er, es, buf)?; } } buf.push(']'); } SerialType::Tuple => { buf.push('['); for (i, fs) in schema.parameters.iter().enumerate() { if i > 0 { buf.push(','); } to_json(&r.at(schema.offsets[i]), fs, buf)?; } buf.push(']'); } SerialType::Map => { buf.push('{'); for (i, fs) in schema.parameters.iter().enumerate() { if i > 0 { buf.push(','); } if i < schema.keys.len() { buf.push('"'); buf.push_str(&schema.keys[i]); buf.push_str("\":"); } to_json(&r.at(schema.offsets[i]), fs, buf)?; } buf.push('}'); } SerialType::Optional => { if r.read_u8(0) == 0 { buf.push_str("null"); } else { let inner = &schema.parameters[0]; to_json(&r.at(shm::align_up(1, inner.alignment().max(1))), inner, buf)?; } } SerialType::Tensor => { // SAFETY: reading Tensor struct from SHM let tensor = unsafe { &*(r.ptr as *const shm::Tensor) }; if tensor.total_elements == 0 { buf.push_str("[]"); } else { let ndim = schema.offsets.first().copied().unwrap_or(1); let sp = shm::rel2abs(tensor.shape)?; // SAFETY: sp points to ndim i64 values in SHM let shape: Vec = (0..ndim).map(|i| unsafe { *((sp as *const i64).add(i)) } as usize).collect(); let dp = shm::rel2abs(tensor.data)?; let es = &schema.parameters[0]; tensor_to_json(buf, dp, &shape, tensor.total_elements, es)?; } } } Ok(()) } fn tensor_to_json( buf: &mut String, data: *const u8, shape: &[usize], stride: usize, es: &Schema, ) -> Result<(), MorlocError> { buf.push('['); if shape.len() == 1 { for i in 0..shape[0] { if i > 0 { buf.push(','); } // SAFETY: data + i * es.width within tensor data let r = unsafe { ShmReader::new(data.add(i * es.width)) }; to_json(&r, es, buf)?; } } else { let inner = stride / shape[0]; for i in 0..shape[0] { if i > 0 { buf.push(','); } tensor_to_json(buf, data.wrapping_add(i * inner * es.width), &shape[1..], inner, es)?; } } buf.push(']'); Ok(()) } // ── Helpers ──────────────────────────────────────────────────────────────── fn json_escape(s: &str, buf: &mut String) { buf.push('"'); for ch in s.chars() { match ch { '"' => buf.push_str("\\\""), '\\' => buf.push_str("\\\\"), '/' => buf.push_str("\\/"), '\x08' => buf.push_str("\\b"), '\x0c' => buf.push_str("\\f"), '\n' => buf.push_str("\\n"), '\r' => buf.push_str("\\r"), '\t' => buf.push_str("\\t"), c if c < '\x20' => buf.push_str(&format!("\\u{:04x}", c as u32)), c => buf.push(c), } } buf.push('"'); } fn err(msg: &str) -> MorlocError { MorlocError::Serialization(msg.into()) } fn as_i64(v: &serde_json::Value) -> Result { v.as_i64().ok_or_else(|| err("expected integer")) } fn as_u64(v: &serde_json::Value) -> Result { v.as_u64().ok_or_else(|| err("expected unsigned integer")) } fn as_f64(v: &serde_json::Value) -> Result { v.as_f64().ok_or_else(|| err("expected number")) } fn write_float(buf: &mut String, f: f64, fmt: &[u8]) { if f.is_nan() || f.is_infinite() { buf.push_str("null"); return; } let mut cbuf = [0u8; 64]; // SAFETY: snprintf writes to stack-local buffer with explicit size limit let n = unsafe { libc::snprintf(cbuf.as_mut_ptr() as *mut libc::c_char, cbuf.len(), fmt.as_ptr() as *const libc::c_char, f) }; if n > 0 && (n as usize) < cbuf.len() { buf.push_str(std::str::from_utf8(&cbuf[..n as usize]).unwrap_or("0")); } else { buf.push_str("0"); } } #[cfg(test)] mod tests { use super::*; use crate::schema::parse_schema; fn setup() { crate::init_test_shm(); } #[test] fn test_int() { setup(); let s = parse_schema("i4").unwrap(); let p = read_json_with_schema("42", &s).unwrap(); assert_eq!(voidstar_to_json_string(p, &s).unwrap(), "42"); } #[test] fn test_string() { setup(); let s = parse_schema("s").unwrap(); let p = read_json_with_schema("\"hello\"", &s).unwrap(); assert_eq!(voidstar_to_json_string(p, &s).unwrap(), "\"hello\""); } #[test] fn test_bool() { setup(); let s = parse_schema("b").unwrap(); let p = read_json_with_schema("true", &s).unwrap(); assert_eq!(voidstar_to_json_string(p, &s).unwrap(), "true"); } #[test] fn test_array() { setup(); let s = parse_schema("ai4").unwrap(); let p = read_json_with_schema("[1,2,3]", &s).unwrap(); assert_eq!(voidstar_to_json_string(p, &s).unwrap(), "[1,2,3]"); } #[test] fn test_opt_some(){ setup(); let s = parse_schema("?i4").unwrap(); let p = read_json_with_schema("5", &s).unwrap(); assert_eq!(voidstar_to_json_string(p, &s).unwrap(), "5"); } #[test] fn test_opt_null(){ setup(); let s = parse_schema("?i4").unwrap(); let p = read_json_with_schema("null", &s).unwrap(); assert_eq!(voidstar_to_json_string(p, &s).unwrap(), "null"); } } ================================================ FILE: data/rust/morloc-runtime/src/json_ffi.rs ================================================ //! C ABI wrappers for JSON functions. //! Replaces json.c's core functions with calls to Rust json.rs. //! Arrow output and json_buf API are also implemented here. use std::ffi::{c_char, c_void, CStr, CString}; use std::ptr; use crate::cschema::CSchema; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; // ── quoted ───────────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn quoted(input: *const c_char) -> *mut c_char { if input.is_null() { return ptr::null_mut(); } let s = CStr::from_ptr(input); let bytes = s.to_bytes(); let len = bytes.len(); // Simple wrapping: "input" (matching C behavior — no escaping) let buf = libc::calloc(len + 3, 1) as *mut c_char; if buf.is_null() { return ptr::null_mut(); } *buf = b'"' as c_char; std::ptr::copy_nonoverlapping(bytes.as_ptr(), buf.add(1) as *mut u8, len); *buf.add(len + 1) = b'"' as c_char; buf } // ── read_json_with_schema ────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn read_json_with_schema( dest: *mut u8, json_str: *mut c_char, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); if json_str.is_null() || schema.is_null() { set_errmsg(errmsg, &MorlocError::NullPointer); return ptr::null_mut(); } let rs = CSchema::to_rust(schema); let json = CStr::from_ptr(json_str).to_string_lossy(); let dest_opt = if dest.is_null() { None } else { Some(dest) }; match crate::json::read_json_with_schema_dest(dest_opt, &json, &rs) { Ok(ptr) => ptr, Err(e) => { set_errmsg(errmsg, &e); ptr::null_mut() } } } // ── voidstar_to_json_string ──────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn voidstar_to_json_string( data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut c_char { clear_errmsg(errmsg); let rs = CSchema::to_rust(schema); match crate::json::voidstar_to_json_string(data as *mut u8, &rs) { Ok(s) => { match CString::new(s) { Ok(cs) => cs.into_raw(), Err(_) => { set_errmsg(errmsg, &MorlocError::Other("CString conversion failed".into())); ptr::null_mut() } } } Err(e) => { set_errmsg(errmsg, &e); ptr::null_mut() } } } // ── print_voidstar ───────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn print_voidstar( data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); let rs = CSchema::to_rust(schema); match crate::json::print_voidstar(data as *mut u8, &rs) { Ok(_) => true, Err(e) => { set_errmsg(errmsg, &e); false } } } #[no_mangle] pub unsafe extern "C" fn pretty_print_voidstar( data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); let rs = CSchema::to_rust(schema); match crate::json::pretty_print_voidstar(data as *mut u8, &rs) { Ok(_) => true, Err(e) => { set_errmsg(errmsg, &e); false } } } // ── json_buf API (used by daemon.c for discovery JSON) ───────────────────── /// Dynamic JSON string builder. pub struct JsonBuf { buf: String, needs_comma: Vec, } #[no_mangle] pub extern "C" fn json_buf_new() -> *mut JsonBuf { Box::into_raw(Box::new(JsonBuf { buf: String::with_capacity(256), needs_comma: Vec::new(), })) } #[no_mangle] pub unsafe extern "C" fn json_buf_free(jb: *mut JsonBuf) { if !jb.is_null() { let _ = Box::from_raw(jb); } } #[no_mangle] pub unsafe extern "C" fn json_buf_finish(jb: *mut JsonBuf) -> *mut c_char { if jb.is_null() { return ptr::null_mut(); } let jb = Box::from_raw(jb); match CString::new(jb.buf) { Ok(cs) => cs.into_raw(), Err(_) => ptr::null_mut(), } } unsafe fn jb_maybe_comma(jb: &mut JsonBuf) { if let Some(needs) = jb.needs_comma.last_mut() { if *needs { jb.buf.push(','); } *needs = true; } } #[no_mangle] pub unsafe extern "C" fn json_write_obj_start(jb: *mut JsonBuf) { if jb.is_null() { return; } let jb = &mut *jb; jb_maybe_comma(jb); jb.buf.push('{'); jb.needs_comma.push(false); } #[no_mangle] pub unsafe extern "C" fn json_write_obj_end(jb: *mut JsonBuf) { if jb.is_null() { return; } let jb = &mut *jb; jb.needs_comma.pop(); jb.buf.push('}'); } #[no_mangle] pub unsafe extern "C" fn json_write_arr_start(jb: *mut JsonBuf) { if jb.is_null() { return; } let jb = &mut *jb; jb_maybe_comma(jb); jb.buf.push('['); jb.needs_comma.push(false); } #[no_mangle] pub unsafe extern "C" fn json_write_arr_end(jb: *mut JsonBuf) { if jb.is_null() { return; } let jb = &mut *jb; jb.needs_comma.pop(); jb.buf.push(']'); } #[no_mangle] pub unsafe extern "C" fn json_write_key(jb: *mut JsonBuf, key: *const c_char) { if jb.is_null() || key.is_null() { return; } let jb = &mut *jb; jb_maybe_comma(jb); let s = CStr::from_ptr(key).to_string_lossy(); jb.buf.push('"'); jb.buf.push_str(&s); jb.buf.push_str("\":"); // Don't set needs_comma — the value will follow immediately if let Some(needs) = jb.needs_comma.last_mut() { *needs = false; } } #[no_mangle] pub unsafe extern "C" fn json_write_string(jb: *mut JsonBuf, val: *const c_char) { if jb.is_null() { return; } let jb = &mut *jb; jb_maybe_comma(jb); if val.is_null() { jb.buf.push_str("null"); } else { let s = CStr::from_ptr(val).to_string_lossy(); // JSON-escape the string jb.buf.push('"'); for ch in s.chars() { match ch { '"' => jb.buf.push_str("\\\""), '\\' => jb.buf.push_str("\\\\"), '\n' => jb.buf.push_str("\\n"), '\r' => jb.buf.push_str("\\r"), '\t' => jb.buf.push_str("\\t"), c if c < '\x20' => { jb.buf.push_str(&format!("\\u{:04x}", c as u32)); } c => jb.buf.push(c), } } jb.buf.push('"'); } } #[no_mangle] pub unsafe extern "C" fn json_write_int(jb: *mut JsonBuf, val: i64) { if jb.is_null() { return; } let jb = &mut *jb; jb_maybe_comma(jb); jb.buf.push_str(&val.to_string()); } #[no_mangle] pub unsafe extern "C" fn json_write_uint(jb: *mut JsonBuf, val: u64) { if jb.is_null() { return; } let jb = &mut *jb; jb_maybe_comma(jb); jb.buf.push_str(&val.to_string()); } #[no_mangle] pub unsafe extern "C" fn json_write_bool(jb: *mut JsonBuf, val: bool) { if jb.is_null() { return; } let jb = &mut *jb; jb_maybe_comma(jb); jb.buf.push_str(if val { "true" } else { "false" }); } #[no_mangle] pub unsafe extern "C" fn json_write_null(jb: *mut JsonBuf) { if jb.is_null() { return; } let jb = &mut *jb; jb_maybe_comma(jb); jb.buf.push_str("null"); } #[no_mangle] pub unsafe extern "C" fn json_write_raw(jb: *mut JsonBuf, raw: *const c_char) { if jb.is_null() || raw.is_null() { return; } let jb = &mut *jb; jb_maybe_comma(jb); let s = CStr::from_ptr(raw).to_string_lossy(); jb.buf.push_str(&s); } // ── Arrow JSON output ────────────────────────────────────────────────────── // Arrow output is complex and depends on the Arrow C Data Interface. // These are implemented in C (arrow_json.c) and linked via the hybrid build. // The functions below are stubs that will be overridden by the C implementations // when we create a separate arrow_json.c file. // For now, remove the Rust stubs and let C json.c's implementations be used // from a separate compilation unit. // print_arrow_as_json and print_arrow_as_table are provided by the C // arrow_json code (extracted from json.c, kept in build as arrow_json.c) #[allow(dead_code)] unsafe fn _print_arrow_as_json_stub( data: *const c_void, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); // Use the arrow.c implementation which handles the Arrow C Data Interface extern "C" { fn arrow_column_desc( header: *const c_void, col_idx: usize, ) -> *const c_void; fn arrow_column_data( header: *const c_void, col_idx: usize, ) -> *const c_void; fn arrow_column_name( header: *const c_void, col_idx: usize, ) -> *const c_char; } // Read arrow_shm_header fields // arrow_shm_header_t: { magic: u32, n_columns: u32, n_rows: u64, ... } let header = data as *const u8; let n_columns = *(header.add(4) as *const u32) as usize; let n_rows = *(header.add(8) as *const u64) as usize; // Build JSON array of objects print!("["); for row in 0..n_rows { if row > 0 { print!(","); } print!("{{"); for col in 0..n_columns { if col > 0 { print!(","); } let name = arrow_column_name(data, col); let name_str = if name.is_null() { "?" } else { CStr::from_ptr(name).to_str().unwrap_or("?") }; print!("\"{}\":", name_str); let desc = arrow_column_desc(data, col); if desc.is_null() { print!("null"); continue; } // desc is arrow_column_desc_t: { type: u8, length: u32, null_count: u32, name_offset, data_offset } let col_type = *(desc as *const u8); let col_data = arrow_column_data(data, col); // Print value based on type // Types: 0=nil, 1=bool, 2=i8, ..., 11=f64, 13=string match col_type { 4 => { // i32 let vals = col_data as *const i32; print!("{}", *vals.add(row)); } 5 => { // i64 let vals = col_data as *const i64; print!("{}", *vals.add(row)); } 11 => { // f64 let vals = col_data as *const f64; let mut cbuf = [0u8; 64]; let fmt = b"%.15g\0"; let n = libc::snprintf(cbuf.as_mut_ptr() as *mut c_char, 64, fmt.as_ptr() as *const c_char, *vals.add(row)); if n > 0 { let s = std::str::from_utf8(&cbuf[..n as usize]).unwrap_or("0"); print!("{}", s); } } 13 => { // string // Arrow strings: offsets array + data buffer // For simplicity, use arrow_column_data which gives the data pointer // This is a simplified implementation — full Arrow string handling // requires offset arrays print!("\"\""); } _ => { print!("null"); } } } print!("}}"); } println!("]"); true } #[allow(dead_code)] unsafe fn _print_arrow_as_table_stub( _data: *const c_void, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); // Stub — Arrow table output is rarely used // The full implementation would print TSV-formatted columns set_errmsg(errmsg, &MorlocError::Other("Arrow table output not yet implemented in Rust".into())); false } ================================================ FILE: data/rust/morloc-runtime/src/lib.rs ================================================ pub mod error; pub mod schema; pub mod packet; pub mod shm; pub mod hash; pub mod ipc; pub mod json; pub mod mpack; // FFI and utility modules export #[no_mangle] extern "C" symbols. // When the "no-ffi-exports" feature is active (nexus build), these modules // are not compiled, preventing symbol conflicts with libmorloc.so. // CSchema type is always available (used by nexus for Rust<->C conversion) pub mod cschema; pub mod ffi; pub mod utility; pub mod cache; pub mod intrinsics; pub mod voidstar; pub mod json_ffi; pub mod packet_ffi; pub mod ipc_ffi; pub mod http_ffi; pub mod slurm_ffi; pub mod manifest_ffi; pub mod eval_ffi; pub mod arrow_ffi; pub mod pool_ffi; pub mod daemon_ffi; pub mod router_ffi; pub mod cli; /// Shared test SHM initialization. Call from all test modules. #[cfg(test)] pub(crate) fn init_test_shm() { use std::sync::Once; static INIT: Once = Once::new(); INIT.call_once(|| { let tmpdir = std::env::temp_dir(); let test_dir = tmpdir.join(format!("morloc_test_{}", std::process::id())); let _ = std::fs::create_dir_all(&test_dir); shm::shm_set_fallback_dir(test_dir.to_str().unwrap()); let basename = format!("morloc_test_{}", std::process::id()); shm::shinit(&basename, 0, 0x100000).unwrap(); // 1MB }); } // Re-export core types at crate root pub use error::MorlocError; pub use schema::{Schema, SerialType}; pub use packet::{PacketHeader, PACKET_MAGIC}; pub use shm::{RelPtr, VolPtr, AbsPtr, Array, Tensor}; ================================================ FILE: data/rust/morloc-runtime/src/manifest_ffi.rs ================================================ //! C ABI wrappers for manifest parsing and discovery JSON. //! //! This file mirrors the manifest data model into raw `#[repr(C)]` //! structs that the daemon and slurm subsystems read via FFI from C //! code. It is **not** the canonical Rust deserializer of the manifest //! schema -- that lives in //! `data/rust/morloc-nexus/src/manifest.rs`, which has full doc //! comments describing the v2 manifest schema, every field's purpose, //! and which slots are reserved for future expansion. //! //! The split exists for two reasons: //! //! 1. **C ABI stability.** The C structs here have the original v1 //! field layout (flat `arg_schemas`, `return_schema`, `return_type`, //! `return_desc`, `build_dir`, `version`) so that downstream C //! callers (the daemon, the slurm bridge, any third-party FFI //! consumers) don't break when the JSON schema evolves. The //! `parse_manifest` function below reads the new v2 JSON shape and //! populates these legacy C struct fields, acting as a translation //! layer. //! //! 2. **Build-time decoupling.** The morloc-runtime crate needs to //! consume manifests without depending on the morloc-nexus crate. //! Sharing a Rust-level data model would create a circular //! dependency between the two crates. //! //! When the v2 schema gains new fields (constraints, metadata, etc.), //! the canonical Rust model in `morloc-nexus/src/manifest.rs` is //! updated first. This file gets new C struct fields only when a C-side //! consumer needs them; otherwise the new JSON keys are silently //! ignored here, which is the correct forward-compatible behavior. use std::ffi::{c_char, c_void, CStr, CString}; use std::ptr; use crate::cschema::CSchema; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; // -- C-compatible types matching eval.h --------------------------------------- #[repr(C)] #[derive(Debug, Clone, Copy, PartialEq)] pub enum MorlocExpressionType { Dat = 0, App = 1, Lam = 2, Bnd = 3, Pat = 4, Fmt = 5, Show = 6, Read = 7, Hash = 8, Save = 9, Load = 10, } #[repr(C)] #[derive(Debug, Clone, Copy, PartialEq)] pub enum MorlocAppExpressionType { Pattern = 0, Lambda = 1, Format = 2, } #[repr(C)] #[derive(Debug, Clone, Copy, PartialEq)] pub enum MorlocPatternType { ByKey = 0, ByIndex = 1, End = 2, } #[repr(C)] pub union PatternFields { pub indices: *mut usize, pub keys: *mut *mut c_char, } #[repr(C)] pub struct MorlocPattern { pub ptype: MorlocPatternType, pub size: usize, pub fields: PatternFields, pub selectors: *mut *mut MorlocPattern, } #[repr(C)] pub union Primitive { pub s: *mut c_char, pub z: u8, pub b: bool, pub i1: i8, pub i2: i16, pub i4: i32, pub i8_: i64, pub u1: u8, pub u2: u16, pub u4: u32, pub u8_: u64, pub f4: f32, pub f8: f64, } #[repr(C)] pub struct MorlocDataArray { pub schema: *mut CSchema, pub size: usize, pub values: *mut *mut MorlocExpression, } #[repr(C)] // Primitive contains a pointer (s: *mut c_char), so DataUnion uses ManuallyDrop #[repr(C)] pub union DataUnion { pub lit_val: std::mem::ManuallyDrop, pub tuple_val: *mut *mut MorlocExpression, pub array_val: *mut MorlocDataArray, pub voidstar: *mut c_void, } #[repr(C)] pub struct MorlocData { pub is_voidstar: bool, pub data: DataUnion, } #[repr(C)] pub union AppFunction { pub pattern: *mut MorlocPattern, pub lambda: *mut MorlocLamExpression, pub fmt: *mut *mut c_char, } #[repr(C)] pub struct MorlocAppExpression { pub atype: MorlocAppExpressionType, pub function: AppFunction, pub args: *mut *mut MorlocExpression, pub nargs: usize, } #[repr(C)] pub struct MorlocLamExpression { pub nargs: usize, pub args: *mut *mut c_char, pub body: *mut MorlocExpression, } #[repr(C)] pub struct MorlocSaveExpression { pub format: *mut c_char, pub value: *mut MorlocExpression, pub path: *mut MorlocExpression, } #[repr(C)] pub union ExprUnion { pub app_expr: *mut MorlocAppExpression, pub lam_expr: *mut MorlocLamExpression, pub bnd_expr: *mut c_char, pub interpolation: *mut *mut c_char, pub pattern_expr: *mut MorlocPattern, pub data_expr: *mut MorlocData, pub unary_expr: *mut MorlocExpression, pub save_expr: *mut MorlocSaveExpression, } #[repr(C)] pub struct MorlocExpression { pub etype: MorlocExpressionType, pub schema: *mut CSchema, pub expr: ExprUnion, } // -- C-ABI mirror of morloc-manifest v2 --------------------------------------- // // These #[repr(C)] structs are the in-memory layout that daemon_ffi / // router_ffi / slurm_ffi consume via raw pointers. They mirror the // shape of morloc_manifest's Rust types one-for-one (modulo C-string // encoding) -- when the Rust schema gains a new field, it's added here // too as a parallel C field. There is no longer any "translation" or // reshape layer; parse_manifest below is a near-1:1 walker. // // Conventions: // - C-string fields are owned by the manifest and freed by free_manifest. // - Array fields use a (pointer, count) pair (e.g. desc + n_desc). // - The "constraints" and "metadata" extension slots described in // morloc-manifest's docs are mirrored here so daemon-side constraint // enforcement can later read them without another C ABI break. // - metadata is serialized as JSON text (`metadata_json`) because the // C side has no natural map type and the slot is reserved for now. #[repr(C)] pub struct ManifestBuild { pub path: *mut c_char, pub time: i64, pub morloc_version: *mut c_char, } #[repr(C)] pub struct ManifestConstraint { /// Constraint discriminator (e.g. "kind", "min", "regex"). pub ctype: *mut c_char, /// JSON-encoded payload for the constraint, or NULL when the /// constraint type carries no value (e.g. "non_empty"). pub value_json: *mut c_char, } #[repr(C)] pub struct ManifestPool { pub lang: *mut c_char, pub exec: *mut *mut c_char, // NULL-terminated pub socket: *mut c_char, /// JSON-encoded pool-level metadata. Reserved. pub metadata_json: *mut c_char, } #[repr(C)] #[derive(Debug, Clone, Copy, PartialEq)] pub enum ManifestArgKind { Pos = 0, Opt = 1, Flag = 2, Grp = 3, } #[repr(C)] pub struct ManifestGrpEntry { pub key: *mut c_char, pub arg: *mut ManifestArg, } #[repr(C)] pub struct ManifestArg { pub kind: ManifestArgKind, /// Per-arg morloc serialization schema. NULL for flags. Group /// entries also have NULL here (the group's top-level schema /// covers them). pub schema: *mut c_char, /// User-facing type name. NULL for flags. pub type_desc: *mut c_char, pub metavar: *mut c_char, pub quoted: bool, pub short_opt: c_char, pub long_opt: *mut c_char, pub long_rev: *mut c_char, pub default_val: *mut c_char, /// NULL-terminated array of description lines. pub desc: *mut *mut c_char, pub n_desc: usize, /// Array of ManifestConstraint owned by this arg. pub constraints: *mut ManifestConstraint, pub n_constraints: usize, /// Group sub-fields (only meaningful when kind == Grp). pub grp_short: c_char, pub grp_long: *mut c_char, pub entries: *mut ManifestGrpEntry, pub n_entries: usize, /// JSON-encoded per-arg metadata. Reserved. pub metadata_json: *mut c_char, } #[repr(C)] pub struct ManifestReturn { pub schema: *mut c_char, pub type_desc: *mut c_char, pub desc: *mut *mut c_char, pub n_desc: usize, pub constraints: *mut ManifestConstraint, pub n_constraints: usize, pub metadata_json: *mut c_char, } #[repr(C)] pub struct ManifestCmdGroup { pub name: *mut c_char, pub desc: *mut *mut c_char, pub n_desc: usize, pub metadata_json: *mut c_char, } #[repr(C)] pub struct ManifestCommand { pub name: *mut c_char, pub is_pure: bool, pub mid: u32, pub pool_index: usize, pub needed_pools: *mut usize, pub n_needed_pools: usize, pub desc: *mut *mut c_char, pub n_desc: usize, pub args: *mut ManifestArg, pub n_args: usize, /// Return-value descriptor as a sub-struct (replaces v1's flat /// return_schema/return_type/return_desc fields). pub ret: ManifestReturn, pub constraints: *mut ManifestConstraint, pub n_constraints: usize, pub expr: *mut MorlocExpression, pub group: *mut c_char, pub metadata_json: *mut c_char, } #[repr(C)] pub struct ManifestService { pub stype: *mut c_char, pub host: *mut c_char, pub port: i32, pub socket: *mut c_char, pub metadata_json: *mut c_char, } #[repr(C)] pub struct Manifest { pub name: *mut c_char, pub build: ManifestBuild, pub pools: *mut ManifestPool, pub n_pools: usize, pub commands: *mut ManifestCommand, pub n_commands: usize, pub groups: *mut ManifestCmdGroup, pub n_groups: usize, pub service: *mut ManifestService, pub metadata_json: *mut c_char, } impl ManifestCommand { /// Build a transient NULL-terminated array of schema strings for /// the command's args, in declaration order. The caller owns the /// outer array allocation but NOT the inner C strings (they /// remain owned by the ManifestArg objects). Use /// `libc::free(arr as *mut c_void)` to release the outer array /// when done. /// /// The array has one entry per arg INCLUDING flags. Flags have a /// per-arg `schema` field of NULL on the v2 ManifestArg, but the /// legacy callers (e.g. make_call_packet_from_cli) expect a slot /// per arg position to keep index alignment with the parallel /// args array; we substitute "b" (the boolean schema) for flags /// so dispatch reads the flag value as a Bool, matching v1 /// behavior. pub unsafe fn build_arg_schemas_array(&self) -> *mut *mut c_char { let n = self.n_args; let arr = libc::calloc(n + 1, std::mem::size_of::<*mut c_char>()) as *mut *mut c_char; for i in 0..n { let arg = &*self.args.add(i); *arr.add(i) = if arg.kind == ManifestArgKind::Flag || arg.schema.is_null() { // Flag schema fallback: use the static "b" string. This // pointer is NOT freed by the caller (it's a global // string literal), but since the caller only frees the // OUTER array, this is fine. We use a per-call CString // leak so each call has a distinct C-string buffer the // caller doesn't try to free with the per-arg owners. // Cleaner: we just point to the existing per-arg // schema slot if non-null, else fall through. Most // flag args won't be hit by legacy callers anyway. static FLAG_SCHEMA: &[u8] = b"b\0"; FLAG_SCHEMA.as_ptr() as *mut c_char } else { arg.schema }; } *arr.add(n) = ptr::null_mut(); arr } } // -- Helpers ------------------------------------------------------------------ unsafe fn c_strdup(s: &str) -> *mut c_char { match CString::new(s) { Ok(cs) => libc::strdup(cs.as_ptr()), Err(_) => ptr::null_mut(), } } unsafe fn nullable_strdup(s: Option<&str>) -> *mut c_char { match s { Some(s) => c_strdup(s), None => ptr::null_mut(), } } // -- Expression builder (using serde_json::Value) ----------------------------- unsafe fn build_pattern(jp: &serde_json::Value) -> Result<*mut MorlocPattern, MorlocError> { let ptype = jp.get("type").and_then(|v| v.as_str()).ok_or_else(|| MorlocError::Other("Pattern missing 'type' field".into()))?; if ptype == "end" { // make_morloc_pattern_end - call C function extern "C" { fn make_morloc_pattern_end() -> *mut MorlocPattern; } return Ok(make_morloc_pattern_end()); } let sels = jp.get("selectors").and_then(|v| v.as_array()); let n = sels.map(|a| a.len()).unwrap_or(0); let pat = libc::calloc(1, std::mem::size_of::()) as *mut MorlocPattern; (*pat).size = n; (*pat).selectors = libc::calloc(n, std::mem::size_of::<*mut MorlocPattern>()) as *mut *mut MorlocPattern; if ptype == "idx" { (*pat).ptype = MorlocPatternType::ByIndex; (*pat).fields.indices = libc::calloc(n, std::mem::size_of::()) as *mut usize; if let Some(sels) = sels { for (i, sel) in sels.iter().enumerate() { *(*pat).fields.indices.add(i) = sel.get("index").and_then(|v| v.as_f64()).unwrap_or(0.0) as usize; *(*pat).selectors.add(i) = build_pattern(sel.get("sub").unwrap_or(&serde_json::Value::Null))?; } } } else if ptype == "key" { (*pat).ptype = MorlocPatternType::ByKey; (*pat).fields.keys = libc::calloc(n, std::mem::size_of::<*mut c_char>()) as *mut *mut c_char; if let Some(sels) = sels { for (i, sel) in sels.iter().enumerate() { let key = sel.get("key").and_then(|v| v.as_str()).unwrap_or(""); *(*pat).fields.keys.add(i) = c_strdup(key); *(*pat).selectors.add(i) = build_pattern(sel.get("sub").unwrap_or(&serde_json::Value::Null))?; } } } else { return Err(MorlocError::Other(format!("Unknown pattern type: {}", ptype))); } Ok(pat) } unsafe fn build_expr(je: &serde_json::Value) -> Result<*mut MorlocExpression, MorlocError> { let tag = je.get("tag").and_then(|v| v.as_str()).ok_or_else(|| MorlocError::Other("Expression missing 'tag' field".into()))?; extern "C" { fn parse_schema(s: *const c_char, errmsg: *mut *mut c_char) -> *mut CSchema; fn make_morloc_literal(schema: *const c_char, prim: Primitive, errmsg: *mut *mut c_char) -> *mut MorlocExpression; fn make_morloc_bound_var(schema: *const c_char, var: *mut c_char, errmsg: *mut *mut c_char) -> *mut MorlocExpression; fn make_morloc_pattern(schema: *const c_char, pat: *mut MorlocPattern, errmsg: *mut *mut c_char) -> *mut MorlocExpression; } let mut err: *mut c_char = ptr::null_mut(); match tag { "lit" => { let schema = je.get("schema").and_then(|v| v.as_str()).unwrap_or(""); let lt = je.get("lit_type").and_then(|v| v.as_str()).unwrap_or(""); let val = je.get("value").and_then(|v| v.as_str()).unwrap_or("0"); let mut prim: Primitive = std::mem::zeroed(); match lt { "f4" => prim.f4 = val.parse::().unwrap_or(0.0), "f8" => prim.f8 = val.parse::().unwrap_or(0.0), "i1" => prim.i1 = val.parse::().unwrap_or(0), "i2" => prim.i2 = val.parse::().unwrap_or(0), "i4" => prim.i4 = val.parse::().unwrap_or(0), "i8" => prim.i8_ = val.parse::().unwrap_or(0), "u1" => prim.u1 = val.parse::().unwrap_or(0), "u2" => prim.u2 = val.parse::().unwrap_or(0), "u4" => prim.u4 = val.parse::().unwrap_or(0), "u8" => prim.u8_ = val.parse::().unwrap_or(0), "b" => prim.b = val != "0", "z" => prim.z = 0, _ => return Err(MorlocError::Other(format!("Unknown lit_type: {}", lt))), } let c_schema = CString::new(schema).unwrap_or_default(); let result = make_morloc_literal(c_schema.as_ptr(), prim, &mut err); if !err.is_null() { let msg = CStr::from_ptr(err).to_string_lossy().into_owned(); libc::free(err as *mut c_void); return Err(MorlocError::Other(msg)); } Ok(result) } "str" => { let schema = je.get("schema").and_then(|v| v.as_str()).unwrap_or(""); let val = je.get("value").and_then(|v| v.as_str()).unwrap_or(""); let mut prim: Primitive = std::mem::zeroed(); prim.s = c_strdup(val); let c_schema = CString::new(schema).unwrap_or_default(); let result = make_morloc_literal(c_schema.as_ptr(), prim, &mut err); if !err.is_null() { let msg = CStr::from_ptr(err).to_string_lossy().into_owned(); libc::free(err as *mut c_void); return Err(MorlocError::Other(msg)); } Ok(result) } "container" => { let schema_str = je.get("schema").and_then(|v| v.as_str()).unwrap_or(""); let elems = je.get("elements").and_then(|v| v.as_array()); let n = elems.map(|a| a.len()).unwrap_or(0); let c_schema_str = CString::new(schema_str).unwrap_or_default(); let schema = parse_schema(c_schema_str.as_ptr(), &mut err); if !err.is_null() { let msg = CStr::from_ptr(err).to_string_lossy().into_owned(); libc::free(err as *mut c_void); return Err(MorlocError::Other(msg)); } let values = libc::calloc(n, std::mem::size_of::<*mut MorlocExpression>()) as *mut *mut MorlocExpression; if let Some(elems) = elems { for (i, elem) in elems.iter().enumerate() { *values.add(i) = build_expr(elem)?; } } let data = libc::calloc(1, std::mem::size_of::()) as *mut MorlocData; (*data).is_voidstar = false; let schema_type = (*schema).serial_type; // MORLOC_ARRAY = 14, MORLOC_TUPLE = 15, MORLOC_MAP = 16 if schema_type == 14 { // Array let arr = libc::calloc(1, std::mem::size_of::()) as *mut MorlocDataArray; (*arr).schema = if (*schema).size > 0 && !(*schema).parameters.is_null() { *(*schema).parameters } else { ptr::null_mut() }; (*arr).size = n; (*arr).values = values; (*data).data.array_val = arr; } else if schema_type == 15 || schema_type == 16 { // Tuple or Map (*data).data.tuple_val = values; } else { libc::free(values as *mut c_void); libc::free(data as *mut c_void); CSchema::free(schema); return Err(MorlocError::Other("Container schema is not a container type".into())); } let expr = libc::calloc(1, std::mem::size_of::()) as *mut MorlocExpression; (*expr).etype = MorlocExpressionType::Dat; (*expr).schema = schema; (*expr).expr.data_expr = data; Ok(expr) } "app" => { let schema_str = je.get("schema").and_then(|v| v.as_str()).unwrap_or(""); let jargs = je.get("args").and_then(|v| v.as_array()); let n = jargs.map(|a| a.len()).unwrap_or(0); let c_schema_str = CString::new(schema_str).unwrap_or_default(); let schema = parse_schema(c_schema_str.as_ptr(), &mut err); if !err.is_null() { let msg = CStr::from_ptr(err).to_string_lossy().into_owned(); libc::free(err as *mut c_void); return Err(MorlocError::Other(msg)); } let func = build_expr(je.get("func").unwrap_or(&serde_json::Value::Null))?; let args = libc::calloc(n, std::mem::size_of::<*mut MorlocExpression>()) as *mut *mut MorlocExpression; if let Some(jargs) = jargs { for (i, a) in jargs.iter().enumerate() { *args.add(i) = build_expr(a)?; } } let app = libc::calloc(1, std::mem::size_of::()) as *mut MorlocAppExpression; match (*func).etype { MorlocExpressionType::Pat => { (*app).atype = MorlocAppExpressionType::Pattern; (*app).function.pattern = (*func).expr.pattern_expr; } MorlocExpressionType::Lam => { (*app).atype = MorlocAppExpressionType::Lambda; (*app).function.lambda = (*func).expr.lam_expr; } MorlocExpressionType::Fmt => { (*app).atype = MorlocAppExpressionType::Format; (*app).function.fmt = (*func).expr.interpolation; } _ => { return Err(MorlocError::Other(format!("Invalid function in app expression (type={:?})", (*func).etype))); } } (*app).args = args; (*app).nargs = n; let expr = libc::calloc(1, std::mem::size_of::()) as *mut MorlocExpression; (*expr).etype = MorlocExpressionType::App; (*expr).schema = schema; (*expr).expr.app_expr = app; Ok(expr) } "lambda" => { let jvars = je.get("vars").and_then(|v| v.as_array()); let n = jvars.map(|a| a.len()).unwrap_or(0); let body = build_expr(je.get("body").unwrap_or(&serde_json::Value::Null))?; let vars = libc::calloc(n, std::mem::size_of::<*mut c_char>()) as *mut *mut c_char; if let Some(jvars) = jvars { for (i, v) in jvars.iter().enumerate() { *vars.add(i) = c_strdup(v.as_str().unwrap_or("")); } } let lam = libc::calloc(1, std::mem::size_of::()) as *mut MorlocLamExpression; (*lam).nargs = n; (*lam).args = vars; (*lam).body = body; let expr = libc::calloc(1, std::mem::size_of::()) as *mut MorlocExpression; (*expr).etype = MorlocExpressionType::Lam; (*expr).schema = ptr::null_mut(); (*expr).expr.lam_expr = lam; Ok(expr) } "bound" => { let schema = je.get("schema").and_then(|v| v.as_str()).unwrap_or(""); let var = je.get("var").and_then(|v| v.as_str()).unwrap_or(""); let c_schema = CString::new(schema).unwrap_or_default(); let c_var = c_strdup(var); let result = make_morloc_bound_var(c_schema.as_ptr(), c_var, &mut err); if !err.is_null() { let msg = CStr::from_ptr(err).to_string_lossy().into_owned(); libc::free(err as *mut c_void); return Err(MorlocError::Other(msg)); } Ok(result) } "show" | "read" | "hash" | "load" => { let schema_str = je.get("schema").and_then(|v| v.as_str()).unwrap_or(""); let c_schema_str = CString::new(schema_str).unwrap_or_default(); let schema = parse_schema(c_schema_str.as_ptr(), &mut err); if !err.is_null() { let msg = CStr::from_ptr(err).to_string_lossy().into_owned(); libc::free(err as *mut c_void); return Err(MorlocError::Other(msg)); } let child = build_expr(je.get("child").unwrap_or(&serde_json::Value::Null))?; let expr = libc::calloc(1, std::mem::size_of::()) as *mut MorlocExpression; (*expr).etype = match tag { "show" => MorlocExpressionType::Show, "read" => MorlocExpressionType::Read, "hash" => MorlocExpressionType::Hash, "load" => MorlocExpressionType::Load, _ => unreachable!(), }; (*expr).schema = schema; (*expr).expr.unary_expr = child; Ok(expr) } "save" => { let schema_str = je.get("schema").and_then(|v| v.as_str()).unwrap_or(""); let c_schema_str = CString::new(schema_str).unwrap_or_default(); let schema = parse_schema(c_schema_str.as_ptr(), &mut err); if !err.is_null() { let msg = CStr::from_ptr(err).to_string_lossy().into_owned(); libc::free(err as *mut c_void); return Err(MorlocError::Other(msg)); } let fmt_str = je.get("format").and_then(|v| v.as_str()).unwrap_or("voidstar"); let c_fmt = CString::new(fmt_str).unwrap_or_default(); let value = build_expr(je.get("value").unwrap_or(&serde_json::Value::Null))?; let path = build_expr(je.get("path").unwrap_or(&serde_json::Value::Null))?; let save = libc::calloc(1, std::mem::size_of::()) as *mut MorlocSaveExpression; (*save).format = c_fmt.into_raw(); (*save).value = value; (*save).path = path; let expr = libc::calloc(1, std::mem::size_of::()) as *mut MorlocExpression; (*expr).etype = MorlocExpressionType::Save; (*expr).schema = schema; (*expr).expr.save_expr = save; Ok(expr) } "interpolation" => { let schema_str = je.get("schema").and_then(|v| v.as_str()).unwrap_or(""); let jstrs = je.get("strings").and_then(|v| v.as_array()); let n = jstrs.map(|a| a.len()).unwrap_or(0); let c_schema_str = CString::new(schema_str).unwrap_or_default(); let schema = parse_schema(c_schema_str.as_ptr(), &mut err); if !err.is_null() { let msg = CStr::from_ptr(err).to_string_lossy().into_owned(); libc::free(err as *mut c_void); return Err(MorlocError::Other(msg)); } let strings = libc::calloc(n + 1, std::mem::size_of::<*mut c_char>()) as *mut *mut c_char; if let Some(jstrs) = jstrs { for (i, s) in jstrs.iter().enumerate() { *strings.add(i) = c_strdup(s.as_str().unwrap_or("")); } } let expr = libc::calloc(1, std::mem::size_of::()) as *mut MorlocExpression; (*expr).etype = MorlocExpressionType::Fmt; (*expr).schema = schema; (*expr).expr.interpolation = strings; Ok(expr) } "pattern" => { let schema_str = je.get("schema").and_then(|v| v.as_str()).unwrap_or(""); let pat = build_pattern(je.get("pattern").unwrap_or(&serde_json::Value::Null))?; let c_schema = CString::new(schema_str).unwrap_or_default(); let result = make_morloc_pattern(c_schema.as_ptr(), pat, &mut err); if !err.is_null() { let msg = CStr::from_ptr(err).to_string_lossy().into_owned(); libc::free(err as *mut c_void); return Err(MorlocError::Other(msg)); } Ok(result) } _ => Err(MorlocError::Other(format!("Unknown expression tag: {}", tag))), } } // -- build_manifest_expr ------------------------------------------------------ #[no_mangle] pub unsafe extern "C" fn build_manifest_expr( json_str: *const c_char, errmsg: *mut *mut c_char, ) -> *mut MorlocExpression { clear_errmsg(errmsg); let s = CStr::from_ptr(json_str).to_string_lossy(); let jv: serde_json::Value = match serde_json::from_str(&s) { Ok(v) => v, Err(e) => { set_errmsg(errmsg, &MorlocError::Other(format!("Failed to parse expression JSON: {}", e))); return ptr::null_mut(); } }; match build_expr(&jv) { Ok(expr) => expr, Err(e) => { set_errmsg(errmsg, &e); ptr::null_mut() } } } // -- C-ABI population from morloc-manifest Rust types ------------------------- // // parse_manifest is now a thin shell over morloc_manifest::parse_manifest // (which does all the JSON parsing, version checking, and serde // validation). The walker functions below convert the Rust-native // structs into owned C structs, mirroring the v2 schema field-for-field. unsafe fn populate_constraint(dst: *mut ManifestConstraint, src: &morloc_manifest::Constraint) { (*dst).ctype = c_strdup(&src.ctype); (*dst).value_json = match &src.value { Some(v) => c_strdup(&v.to_string()), None => ptr::null_mut(), }; } unsafe fn populate_constraints( src: &[morloc_manifest::Constraint], ) -> (*mut ManifestConstraint, usize) { if src.is_empty() { return (ptr::null_mut(), 0); } let arr = libc::calloc(src.len(), std::mem::size_of::()) as *mut ManifestConstraint; for (i, c) in src.iter().enumerate() { populate_constraint(arr.add(i), c); } (arr, src.len()) } unsafe fn populate_metadata(src: &morloc_manifest::Metadata) -> *mut c_char { if src.is_empty() { c_strdup("{}") } else { let s = serde_json::to_string(src).unwrap_or_else(|_| "{}".into()); c_strdup(&s) } } /// Convert a Vec to a NULL-terminated array of C strings, /// and return (pointer, count). Caller owns the allocation. unsafe fn populate_str_vec(src: &[String]) -> (*mut *mut c_char, usize) { let n = src.len(); let arr = libc::calloc(n + 1, std::mem::size_of::<*mut c_char>()) as *mut *mut c_char; for (i, s) in src.iter().enumerate() { *arr.add(i) = c_strdup(s); } *arr.add(n) = ptr::null_mut(); (arr, n) } unsafe fn populate_arg(dst: *mut ManifestArg, src: &morloc_manifest::Arg) { use morloc_manifest::Arg; match src { Arg::Positional { schema, type_desc, metavar, quoted, desc, constraints, .. } => { (*dst).kind = ManifestArgKind::Pos; (*dst).schema = nullable_strdup(schema.as_deref()); (*dst).type_desc = nullable_strdup(type_desc.as_deref()); (*dst).metavar = nullable_strdup(metavar.as_deref()); (*dst).quoted = *quoted; let (d, n) = populate_str_vec(desc); (*dst).desc = d; (*dst).n_desc = n; let (cs, nc) = populate_constraints(constraints); (*dst).constraints = cs; (*dst).n_constraints = nc; (*dst).metadata_json = c_strdup("{}"); } Arg::Optional { schema, type_desc, metavar, quoted, short_opt, long_opt, default_val, desc, constraints, .. } => { (*dst).kind = ManifestArgKind::Opt; (*dst).schema = nullable_strdup(schema.as_deref()); (*dst).type_desc = nullable_strdup(type_desc.as_deref()); (*dst).metavar = nullable_strdup(metavar.as_deref()); (*dst).quoted = *quoted; (*dst).short_opt = short_opt .as_ref() .and_then(|s| s.as_bytes().first().copied()) .map(|b| b as c_char) .unwrap_or(0); (*dst).long_opt = nullable_strdup(long_opt.as_deref()); (*dst).default_val = nullable_strdup(default_val.as_deref()); let (d, n) = populate_str_vec(desc); (*dst).desc = d; (*dst).n_desc = n; let (cs, nc) = populate_constraints(constraints); (*dst).constraints = cs; (*dst).n_constraints = nc; (*dst).metadata_json = c_strdup("{}"); } Arg::Flag { short_opt, long_opt, long_rev, default_val, desc, .. } => { (*dst).kind = ManifestArgKind::Flag; (*dst).short_opt = short_opt .as_ref() .and_then(|s| s.as_bytes().first().copied()) .map(|b| b as c_char) .unwrap_or(0); (*dst).long_opt = nullable_strdup(long_opt.as_deref()); (*dst).long_rev = nullable_strdup(long_rev.as_deref()); (*dst).default_val = nullable_strdup(default_val.as_deref()); let (d, n) = populate_str_vec(desc); (*dst).desc = d; (*dst).n_desc = n; (*dst).metadata_json = c_strdup("{}"); } Arg::Group { schema, type_desc, metavar, desc, group_opt, entries, constraints, .. } => { (*dst).kind = ManifestArgKind::Grp; (*dst).schema = nullable_strdup(schema.as_deref()); (*dst).type_desc = nullable_strdup(type_desc.as_deref()); (*dst).metavar = nullable_strdup(metavar.as_deref()); let (d, n) = populate_str_vec(desc); (*dst).desc = d; (*dst).n_desc = n; if let Some(g) = group_opt { (*dst).grp_short = g .short_opt .as_ref() .and_then(|s| s.as_bytes().first().copied()) .map(|b| b as c_char) .unwrap_or(0); (*dst).grp_long = nullable_strdup(g.long_opt.as_deref()); } if !entries.is_empty() { (*dst).n_entries = entries.len(); (*dst).entries = libc::calloc( entries.len(), std::mem::size_of::(), ) as *mut ManifestGrpEntry; for (i, ge) in entries.iter().enumerate() { let ge_dst = &mut *(*dst).entries.add(i); ge_dst.key = c_strdup(&ge.key); let sub_arg = libc::calloc(1, std::mem::size_of::()) as *mut ManifestArg; populate_arg(sub_arg, &ge.arg); ge_dst.arg = sub_arg; } } let (cs, nc) = populate_constraints(constraints); (*dst).constraints = cs; (*dst).n_constraints = nc; (*dst).metadata_json = c_strdup("{}"); } } } unsafe fn populate_return(dst: *mut ManifestReturn, src: &morloc_manifest::Return) { (*dst).schema = c_strdup(&src.schema); (*dst).type_desc = c_strdup(&src.type_desc); let (d, n) = populate_str_vec(&src.desc); (*dst).desc = d; (*dst).n_desc = n; let (cs, nc) = populate_constraints(&src.constraints); (*dst).constraints = cs; (*dst).n_constraints = nc; (*dst).metadata_json = populate_metadata(&src.metadata); } unsafe fn populate_command(dst: *mut ManifestCommand, src: &morloc_manifest::Command) -> Result<(), MorlocError> { (*dst).name = c_strdup(&src.name); (*dst).is_pure = src.is_pure(); (*dst).mid = src.mid; (*dst).pool_index = src.pool_index; if !src.needed_pools.is_empty() { (*dst).n_needed_pools = src.needed_pools.len(); (*dst).needed_pools = libc::calloc( src.needed_pools.len(), std::mem::size_of::(), ) as *mut usize; for (i, p) in src.needed_pools.iter().enumerate() { *(*dst).needed_pools.add(i) = *p; } } let (d, n) = populate_str_vec(&src.desc); (*dst).desc = d; (*dst).n_desc = n; if !src.args.is_empty() { (*dst).n_args = src.args.len(); (*dst).args = libc::calloc( src.args.len(), std::mem::size_of::(), ) as *mut ManifestArg; for (i, a) in src.args.iter().enumerate() { populate_arg((*dst).args.add(i), a); } } populate_return(&mut (*dst).ret, &src.ret); let (cs, nc) = populate_constraints(&src.constraints); (*dst).constraints = cs; (*dst).n_constraints = nc; (*dst).metadata_json = populate_metadata(&src.metadata); if src.is_pure() { if let Some(expr_val) = &src.expr { match build_expr(expr_val) { Ok(e) => (*dst).expr = e, Err(e) => return Err(e), } } } (*dst).group = match &src.group { Some(g) => c_strdup(g), None => ptr::null_mut(), }; Ok(()) } unsafe fn populate_pool(dst: *mut ManifestPool, src: &morloc_manifest::Pool) { (*dst).lang = c_strdup(&src.lang); let n = src.exec.len(); (*dst).exec = libc::calloc(n + 1, std::mem::size_of::<*mut c_char>()) as *mut *mut c_char; for (i, e) in src.exec.iter().enumerate() { *(*dst).exec.add(i) = c_strdup(e); } *(*dst).exec.add(n) = ptr::null_mut(); (*dst).socket = c_strdup(&src.socket); (*dst).metadata_json = populate_metadata(&src.metadata); } unsafe fn populate_cmd_group(dst: *mut ManifestCmdGroup, src: &morloc_manifest::CmdGroup) { (*dst).name = c_strdup(&src.name); let (d, n) = populate_str_vec(&src.desc); (*dst).desc = d; (*dst).n_desc = n; (*dst).metadata_json = populate_metadata(&src.metadata); } unsafe fn populate_service(dst: *mut ManifestService, src: &morloc_manifest::Service) { (*dst).stype = nullable_strdup(src.service_type.as_deref()); (*dst).host = nullable_strdup(src.host.as_deref()); (*dst).port = src.port.unwrap_or(0); (*dst).socket = nullable_strdup(src.socket.as_deref()); (*dst).metadata_json = populate_metadata(&src.metadata); } // -- parse_manifest ----------------------------------------------------------- // // Reads a manifest JSON string, parses it via the canonical // morloc-manifest crate (which performs the version staleness check), // and converts the resulting Rust struct into owned C structs for // daemon-side consumers. There is no separate JSON walker here -- the // shape and validation rules live in one place (the morloc-manifest // crate). #[no_mangle] pub unsafe extern "C" fn parse_manifest( text: *const c_char, errmsg: *mut *mut c_char, ) -> *mut Manifest { clear_errmsg(errmsg); let s = CStr::from_ptr(text).to_string_lossy(); let parsed = match morloc_manifest::parse_manifest(&s) { Ok(p) => p, Err(e) => { set_errmsg(errmsg, &MorlocError::Other(e)); return ptr::null_mut(); } }; let m = libc::calloc(1, std::mem::size_of::()) as *mut Manifest; (*m).name = c_strdup(&parsed.name); // build sub-object (*m).build.path = c_strdup(&parsed.build.path); (*m).build.time = parsed.build.time; (*m).build.morloc_version = c_strdup(&parsed.build.morloc_version); // pools if !parsed.pools.is_empty() { (*m).n_pools = parsed.pools.len(); (*m).pools = libc::calloc( parsed.pools.len(), std::mem::size_of::(), ) as *mut ManifestPool; for (i, p) in parsed.pools.iter().enumerate() { populate_pool((*m).pools.add(i), p); } } // commands if !parsed.commands.is_empty() { (*m).n_commands = parsed.commands.len(); (*m).commands = libc::calloc( parsed.commands.len(), std::mem::size_of::(), ) as *mut ManifestCommand; for (i, c) in parsed.commands.iter().enumerate() { if let Err(e) = populate_command((*m).commands.add(i), c) { set_errmsg(errmsg, &e); return ptr::null_mut(); } } } // groups if !parsed.groups.is_empty() { (*m).n_groups = parsed.groups.len(); (*m).groups = libc::calloc( parsed.groups.len(), std::mem::size_of::(), ) as *mut ManifestCmdGroup; for (i, g) in parsed.groups.iter().enumerate() { populate_cmd_group((*m).groups.add(i), g); } } // service if let Some(svc) = parsed.service { (*m).service = libc::calloc(1, std::mem::size_of::()) as *mut ManifestService; populate_service((*m).service, &svc); } (*m).metadata_json = populate_metadata(&parsed.metadata); m } // -- read_manifest ------------------------------------------------------------ #[no_mangle] pub unsafe extern "C" fn read_manifest( path: *const c_char, errmsg: *mut *mut c_char, ) -> *mut Manifest { clear_errmsg(errmsg); let path_str = CStr::from_ptr(path).to_string_lossy(); match std::fs::read_to_string(path_str.as_ref()) { Ok(text) => { let c_text = CString::new(text).unwrap_or_default(); parse_manifest(c_text.as_ptr(), errmsg) } Err(e) => { set_errmsg(errmsg, &MorlocError::Io(e)); ptr::null_mut() } } } // -- free_manifest ------------------------------------------------------------ // // Walks the v2 C structs and releases every owned C string + sub-allocation. // Helper functions mirror the populate_* helpers above for symmetry. unsafe fn free_str_array(arr: *mut *mut c_char) { if arr.is_null() { return; } let mut j = 0; while !(*arr.add(j)).is_null() { libc::free(*arr.add(j) as *mut c_void); j += 1; } libc::free(arr as *mut c_void); } unsafe fn free_constraints(ptr: *mut ManifestConstraint, n: usize) { if ptr.is_null() { return; } for i in 0..n { let c = &*ptr.add(i); if !c.ctype.is_null() { libc::free(c.ctype as *mut c_void); } if !c.value_json.is_null() { libc::free(c.value_json as *mut c_void); } } libc::free(ptr as *mut c_void); } unsafe fn free_arg(arg: &ManifestArg) { if !arg.schema.is_null() { libc::free(arg.schema as *mut c_void); } if !arg.type_desc.is_null() { libc::free(arg.type_desc as *mut c_void); } if !arg.metavar.is_null() { libc::free(arg.metavar as *mut c_void); } if !arg.long_opt.is_null() { libc::free(arg.long_opt as *mut c_void); } if !arg.long_rev.is_null() { libc::free(arg.long_rev as *mut c_void); } if !arg.default_val.is_null() { libc::free(arg.default_val as *mut c_void); } free_str_array(arg.desc); free_constraints(arg.constraints, arg.n_constraints); if !arg.grp_long.is_null() { libc::free(arg.grp_long as *mut c_void); } if !arg.entries.is_null() { for i in 0..arg.n_entries { let ge = &*arg.entries.add(i); if !ge.key.is_null() { libc::free(ge.key as *mut c_void); } if !ge.arg.is_null() { free_arg(&*ge.arg); libc::free(ge.arg as *mut c_void); } } libc::free(arg.entries as *mut c_void); } if !arg.metadata_json.is_null() { libc::free(arg.metadata_json as *mut c_void); } } unsafe fn free_return(ret: &ManifestReturn) { if !ret.schema.is_null() { libc::free(ret.schema as *mut c_void); } if !ret.type_desc.is_null() { libc::free(ret.type_desc as *mut c_void); } free_str_array(ret.desc); free_constraints(ret.constraints, ret.n_constraints); if !ret.metadata_json.is_null() { libc::free(ret.metadata_json as *mut c_void); } } #[no_mangle] pub unsafe extern "C" fn free_manifest(manifest: *mut Manifest) { if manifest.is_null() { return; } let m = &*manifest; if !m.name.is_null() { libc::free(m.name as *mut c_void); } // build sub-object if !m.build.path.is_null() { libc::free(m.build.path as *mut c_void); } if !m.build.morloc_version.is_null() { libc::free(m.build.morloc_version as *mut c_void); } // pools for i in 0..m.n_pools { let pool = &*m.pools.add(i); if !pool.lang.is_null() { libc::free(pool.lang as *mut c_void); } free_str_array(pool.exec); if !pool.socket.is_null() { libc::free(pool.socket as *mut c_void); } if !pool.metadata_json.is_null() { libc::free(pool.metadata_json as *mut c_void); } } if !m.pools.is_null() { libc::free(m.pools as *mut c_void); } // commands for i in 0..m.n_commands { let cmd = &*m.commands.add(i); if !cmd.name.is_null() { libc::free(cmd.name as *mut c_void); } if !cmd.needed_pools.is_null() { libc::free(cmd.needed_pools as *mut c_void); } free_str_array(cmd.desc); for j in 0..cmd.n_args { free_arg(&*cmd.args.add(j)); } if !cmd.args.is_null() { libc::free(cmd.args as *mut c_void); } free_return(&cmd.ret); free_constraints(cmd.constraints, cmd.n_constraints); if !cmd.group.is_null() { libc::free(cmd.group as *mut c_void); } if !cmd.metadata_json.is_null() { libc::free(cmd.metadata_json as *mut c_void); } // Note: cmd.expr is owned by the C side and freed by its own // free function in eval_ffi.rs (not in scope here). } if !m.commands.is_null() { libc::free(m.commands as *mut c_void); } // groups for i in 0..m.n_groups { let g = &*m.groups.add(i); if !g.name.is_null() { libc::free(g.name as *mut c_void); } free_str_array(g.desc); if !g.metadata_json.is_null() { libc::free(g.metadata_json as *mut c_void); } } if !m.groups.is_null() { libc::free(m.groups as *mut c_void); } // service if !m.service.is_null() { let svc = &*m.service; if !svc.stype.is_null() { libc::free(svc.stype as *mut c_void); } if !svc.host.is_null() { libc::free(svc.host as *mut c_void); } if !svc.socket.is_null() { libc::free(svc.socket as *mut c_void); } if !svc.metadata_json.is_null() { libc::free(svc.metadata_json as *mut c_void); } libc::free(m.service as *mut c_void); } if !m.metadata_json.is_null() { libc::free(m.metadata_json as *mut c_void); } libc::free(manifest as *mut c_void); } // -- manifest_to_discovery_json ----------------------------------------------- // // Emits a v2-shape JSON describing the manifest's commands. Used by the // daemon/router code for discovery RPC. The output mirrors the morloc // compiler's manifest format closely (no v1 legacy field names). #[no_mangle] pub unsafe extern "C" fn manifest_to_discovery_json(manifest: *const Manifest) -> *mut c_char { if manifest.is_null() { return ptr::null_mut(); } let m = &*manifest; extern "C" { fn json_buf_new() -> *mut c_void; fn json_buf_finish(jb: *mut c_void) -> *mut c_char; fn json_write_obj_start(jb: *mut c_void); fn json_write_obj_end(jb: *mut c_void); fn json_write_arr_start(jb: *mut c_void); fn json_write_arr_end(jb: *mut c_void); fn json_write_key(jb: *mut c_void, key: *const c_char); fn json_write_string(jb: *mut c_void, val: *const c_char); } let jb = json_buf_new(); json_write_obj_start(jb); let name_key = b"name\0".as_ptr() as *const c_char; let type_key = b"type\0".as_ptr() as *const c_char; let kind_key = b"kind\0".as_ptr() as *const c_char; let schema_key = b"schema\0".as_ptr() as *const c_char; json_write_key(jb, name_key); json_write_string( jb, if m.name.is_null() { b"unknown\0".as_ptr() as *const c_char } else { m.name }, ); // Surface the morloc compiler version that built this manifest. if !m.build.morloc_version.is_null() { let mv_key = b"morloc_version\0".as_ptr() as *const c_char; json_write_key(jb, mv_key); json_write_string(jb, m.build.morloc_version); } let commands_key = b"commands\0".as_ptr() as *const c_char; json_write_key(jb, commands_key); json_write_arr_start(jb); for i in 0..m.n_commands { let cmd = &*m.commands.add(i); json_write_obj_start(jb); json_write_key(jb, name_key); json_write_string(jb, cmd.name); json_write_key(jb, type_key); json_write_string( jb, if cmd.is_pure { b"pure\0".as_ptr() as *const c_char } else { b"remote\0".as_ptr() as *const c_char }, ); // Return descriptor (nested return object). if !cmd.ret.type_desc.is_null() || !cmd.ret.schema.is_null() { let ret_key = b"return\0".as_ptr() as *const c_char; json_write_key(jb, ret_key); json_write_obj_start(jb); if !cmd.ret.type_desc.is_null() { json_write_key(jb, type_key); json_write_string(jb, cmd.ret.type_desc); } if !cmd.ret.schema.is_null() { json_write_key(jb, schema_key); json_write_string(jb, cmd.ret.schema); } json_write_obj_end(jb); } // Args. Each arg's schema (if any) is on the arg itself; no // parallel array, no flag-skipping bug. let args_key = b"args\0".as_ptr() as *const c_char; json_write_key(jb, args_key); json_write_arr_start(jb); for a in 0..cmd.n_args { let arg = &*cmd.args.add(a); json_write_obj_start(jb); json_write_key(jb, kind_key); match arg.kind { ManifestArgKind::Pos => json_write_string(jb, b"pos\0".as_ptr() as *const c_char), ManifestArgKind::Opt => json_write_string(jb, b"opt\0".as_ptr() as *const c_char), ManifestArgKind::Flag => json_write_string(jb, b"flag\0".as_ptr() as *const c_char), ManifestArgKind::Grp => json_write_string(jb, b"grp\0".as_ptr() as *const c_char), } if !arg.metavar.is_null() { json_write_key(jb, b"metavar\0".as_ptr() as *const c_char); json_write_string(jb, arg.metavar); } if !arg.type_desc.is_null() { json_write_key(jb, type_key); json_write_string(jb, arg.type_desc); } if !arg.schema.is_null() { json_write_key(jb, schema_key); json_write_string(jb, arg.schema); } if !arg.default_val.is_null() { json_write_key(jb, b"default\0".as_ptr() as *const c_char); json_write_string(jb, arg.default_val); } if !arg.long_opt.is_null() { json_write_key(jb, b"long\0".as_ptr() as *const c_char); json_write_string(jb, arg.long_opt); } if arg.short_opt != 0 { let short_str = [arg.short_opt as u8, 0]; json_write_key(jb, b"short\0".as_ptr() as *const c_char); json_write_string(jb, short_str.as_ptr() as *const c_char); } if arg.n_desc > 0 && !arg.desc.is_null() && !(*arg.desc).is_null() { let first = *arg.desc; if *first != 0 { let desc_key = b"desc\0".as_ptr() as *const c_char; json_write_key(jb, desc_key); json_write_string(jb, first); } } json_write_obj_end(jb); } json_write_arr_end(jb); if cmd.n_desc > 0 && !cmd.desc.is_null() && !(*cmd.desc).is_null() { let first = *cmd.desc; if *first != 0 { json_write_key(jb, b"desc\0".as_ptr() as *const c_char); json_write_string(jb, first); } } if !cmd.group.is_null() { json_write_key(jb, b"group\0".as_ptr() as *const c_char); json_write_string(jb, cmd.group); } json_write_obj_end(jb); } json_write_arr_end(jb); if m.n_groups > 0 { let groups_key = b"groups\0".as_ptr() as *const c_char; json_write_key(jb, groups_key); json_write_arr_start(jb); for i in 0..m.n_groups { let g = &*m.groups.add(i); json_write_obj_start(jb); json_write_key(jb, name_key); json_write_string(jb, g.name); if g.n_desc > 0 && !g.desc.is_null() && !(*g.desc).is_null() { json_write_key(jb, b"desc\0".as_ptr() as *const c_char); json_write_string(jb, *g.desc); } json_write_obj_end(jb); } json_write_arr_end(jb); } json_write_obj_end(jb); json_buf_finish(jb) } ================================================ FILE: data/rust/morloc-runtime/src/mpack.rs ================================================ //! MessagePack <-> Voidstar conversion. //! //! Replaces serialize.c + mpack.c. Uses the `rmp` crate for MessagePack I/O. //! The voidstar binary format is morloc-specific (Array/Tensor structs with relptrs). use crate::error::MorlocError; use crate::schema::{Schema, SerialType}; use crate::shm::{self, AbsPtr, Array, RELNULL}; // ── Voidstar -> MessagePack ──────────────────────────────────────────────── /// Serialize voidstar data to MessagePack bytes. pub fn pack_with_schema(ptr: AbsPtr, schema: &Schema) -> Result, MorlocError> { let mut buf = Vec::with_capacity(256); pack_data(ptr, schema, &mut buf)?; Ok(buf) } fn pack_data(ptr: AbsPtr, schema: &Schema, buf: &mut Vec) -> Result<(), MorlocError> { // SAFETY: ptr points to voidstar data in SHM with layout described by schema. // All reads are within bounds defined by schema.width, Array headers, etc. unsafe { match schema.serial_type { SerialType::Nil => { rmp::encode::write_nil(buf) .map_err(|e| MorlocError::Serialization(format!("msgpack nil: {}", e)))?; } SerialType::Bool => { let v = *ptr != 0; rmp::encode::write_bool(buf, v) .map_err(|e| MorlocError::Serialization(format!("msgpack bool: {}", e)))?; } SerialType::Uint8 => { rmp::encode::write_uint(buf, *ptr as u64) .map_err(|e| MorlocError::Serialization(format!("msgpack uint: {}", e)))?; } SerialType::Uint16 => { rmp::encode::write_uint(buf, *(ptr as *const u16) as u64) .map_err(|e| MorlocError::Serialization(format!("msgpack uint: {}", e)))?; } SerialType::Uint32 => { rmp::encode::write_uint(buf, *(ptr as *const u32) as u64) .map_err(|e| MorlocError::Serialization(format!("msgpack uint: {}", e)))?; } SerialType::Uint64 => { rmp::encode::write_uint(buf, *(ptr as *const u64)) .map_err(|e| MorlocError::Serialization(format!("msgpack uint: {}", e)))?; } SerialType::Sint8 => { rmp::encode::write_sint(buf, *(ptr as *const i8) as i64) .map_err(|e| MorlocError::Serialization(format!("msgpack sint: {}", e)))?; } SerialType::Sint16 => { rmp::encode::write_sint(buf, *(ptr as *const i16) as i64) .map_err(|e| MorlocError::Serialization(format!("msgpack sint: {}", e)))?; } SerialType::Sint32 => { rmp::encode::write_sint(buf, *(ptr as *const i32) as i64) .map_err(|e| MorlocError::Serialization(format!("msgpack sint: {}", e)))?; } SerialType::Sint64 => { rmp::encode::write_sint(buf, *(ptr as *const i64)) .map_err(|e| MorlocError::Serialization(format!("msgpack sint: {}", e)))?; } SerialType::Float32 => { let f = *(ptr as *const f32) as f64; rmp::encode::write_f64(buf, f) .map_err(|e| MorlocError::Serialization(format!("msgpack float: {}", e)))?; } SerialType::Float64 => { let f = *(ptr as *const f64); rmp::encode::write_f64(buf, f) .map_err(|e| MorlocError::Serialization(format!("msgpack float: {}", e)))?; } SerialType::String => { let arr = &*(ptr as *const Array); let data = shm::rel2abs(arr.data)?; let bytes = std::slice::from_raw_parts(data, arr.size); rmp::encode::write_str_len(buf, arr.size as u32) .map_err(|e| MorlocError::Serialization(format!("msgpack str: {}", e)))?; buf.extend_from_slice(bytes); } SerialType::Array => { let arr = &*(ptr as *const Array); let elem_schema = &schema.parameters[0]; let elem_width = elem_schema.width; rmp::encode::write_array_len(buf, arr.size as u32) .map_err(|e| MorlocError::Serialization(format!("msgpack array: {}", e)))?; if arr.size > 0 && arr.data != RELNULL { let data = shm::rel2abs(arr.data)?; for i in 0..arr.size { let elem_ptr = data.add(i * elem_width); pack_data(elem_ptr, elem_schema, buf)?; } } } SerialType::Tuple | SerialType::Map => { rmp::encode::write_array_len(buf, schema.parameters.len() as u32) .map_err(|e| MorlocError::Serialization(format!("msgpack tuple: {}", e)))?; for (i, field_schema) in schema.parameters.iter().enumerate() { let field_ptr = ptr.add(schema.offsets[i]); pack_data(field_ptr, field_schema, buf)?; } } SerialType::Optional => { let tag = *ptr; if tag == 0 { rmp::encode::write_nil(buf) .map_err(|e| MorlocError::Serialization(format!("msgpack nil: {}", e)))?; } else { let inner_schema = &schema.parameters[0]; let inner_offset = schema.offsets.first().copied() .unwrap_or_else(|| shm::align_up(1, inner_schema.alignment().max(1))); let inner_ptr = ptr.add(inner_offset); pack_data(inner_ptr, inner_schema, buf)?; } } SerialType::Tensor => { return Err(MorlocError::Serialization( "MessagePack serialization of tensors not yet supported".into(), )); } } } Ok(()) } // ── MessagePack -> Voidstar ──────────────────────────────────────────────── /// Deserialize MessagePack bytes into voidstar format in SHM. pub fn unpack_with_schema( data: &[u8], schema: &Schema, ) -> Result { // Two-pass: first calculate size, then deserialize let size = calc_unpack_size(data, schema)?; let base = shm::shmalloc(size)?; // SAFETY: base is freshly allocated with `size` bytes. unsafe { std::ptr::write_bytes(base, 0, size) }; // SAFETY: cursor starts at base + schema.width, within the allocated region. let mut cursor = unsafe { base.add(schema.width) }; let mut reader = &data[..]; unpack_obj(base, schema, &mut cursor, &mut reader)?; Ok(base) } fn unpack_obj( ptr: AbsPtr, schema: &Schema, cursor: &mut AbsPtr, reader: &mut &[u8], ) -> Result<(), MorlocError> { use rmp::decode; // SAFETY: ptr and cursor point into a single contiguous SHM allocation // sized by calc_unpack_size. Each write respects schema.width bounds. unsafe { match schema.serial_type { SerialType::Nil => { decode::read_nil(reader) .map_err(|e| MorlocError::Serialization(format!("msgpack nil: {}", e)))?; *ptr = 0; } SerialType::Bool => { let v = decode::read_bool(reader) .map_err(|e| MorlocError::Serialization(format!("msgpack bool: {}", e)))?; *ptr = v as u8; } SerialType::Uint8 | SerialType::Uint16 | SerialType::Uint32 | SerialType::Uint64 | SerialType::Sint8 | SerialType::Sint16 | SerialType::Sint32 | SerialType::Sint64 => { unpack_int(ptr, schema.serial_type, reader)?; } SerialType::Float32 => { let f = read_float(reader)?; *(ptr as *mut f32) = f as f32; } SerialType::Float64 => { let f = read_float(reader)?; *(ptr as *mut f64) = f; } SerialType::String => { let len = decode::read_str_len(reader) .map_err(|e| MorlocError::Serialization(format!("msgpack str len: {}", e)))? as usize; let arr = &mut *(ptr as *mut Array); arr.size = len; arr.data = shm::abs2rel(*cursor)?; // Read string bytes directly if len > 0 { if reader.len() < len { return Err(MorlocError::Serialization("msgpack str truncated".into())); } std::ptr::copy_nonoverlapping(reader.as_ptr(), *cursor, len); *reader = &reader[len..]; } *cursor = cursor.add(len); } SerialType::Array => { let n = decode::read_array_len(reader) .map_err(|e| MorlocError::Serialization(format!("msgpack array len: {}", e)))? as usize; let elem_schema = &schema.parameters[0]; let elem_width = elem_schema.width; let arr = &mut *(ptr as *mut Array); arr.size = n; // Align cursor for element data let align = elem_schema.alignment(); let aligned = shm::align_up(*cursor as usize, align); *cursor = aligned as AbsPtr; arr.data = shm::abs2rel(*cursor)?; let data_start = *cursor; *cursor = cursor.add(n * elem_width); for i in 0..n { let elem_ptr = data_start.add(i * elem_width); unpack_obj(elem_ptr, elem_schema, cursor, reader)?; } } SerialType::Tuple | SerialType::Map => { let _n = decode::read_array_len(reader) .map_err(|e| MorlocError::Serialization(format!("msgpack tuple len: {}", e)))?; for (i, field_schema) in schema.parameters.iter().enumerate() { let field_ptr = ptr.add(schema.offsets[i]); unpack_obj(field_ptr, field_schema, cursor, reader)?; } } SerialType::Optional => { let inner_schema = &schema.parameters[0]; let inner_offset = schema.offsets.first().copied() .unwrap_or_else(|| shm::align_up(1, inner_schema.alignment().max(1))); // Peek at the next byte to detect nil if !reader.is_empty() && reader[0] == 0xc0 { // Null: consume nil byte, set tag = 0 decode::read_nil(reader) .map_err(|e| MorlocError::Serialization(format!("msgpack nil: {}", e)))?; *ptr = 0; } else { // Present: set tag = 1, parse inner *ptr = 1; let inner_ptr = ptr.add(inner_offset); unpack_obj(inner_ptr, inner_schema, cursor, reader)?; } } SerialType::Tensor => { return Err(MorlocError::Serialization( "MessagePack tensor deserialization not yet supported".into(), )); } } } Ok(()) } fn unpack_int(ptr: AbsPtr, st: SerialType, reader: &mut &[u8]) -> Result<(), MorlocError> { // Use rmp's generic read_int which handles all integer markers let val: i64 = rmp::decode::read_int(reader) .map_err(|e| MorlocError::Serialization(format!("msgpack int: {}", e)))?; // SAFETY: ptr points to schema.width bytes in SHM; each cast writes exactly that width. unsafe { match st { SerialType::Sint8 => *(ptr as *mut i8) = val as i8, SerialType::Sint16 => *(ptr as *mut i16) = val as i16, SerialType::Sint32 => *(ptr as *mut i32) = val as i32, SerialType::Sint64 => *(ptr as *mut i64) = val, SerialType::Uint8 => *ptr = val as u8, SerialType::Uint16 => *(ptr as *mut u16) = val as u16, SerialType::Uint32 => *(ptr as *mut u32) = val as u32, SerialType::Uint64 => *(ptr as *mut u64) = val as u64, _ => {} } } Ok(()) } fn read_float(reader: &mut &[u8]) -> Result { let marker = rmp::decode::read_marker(reader) .map_err(|_| MorlocError::Serialization("msgpack float: unexpected EOF".into()))?; match marker { rmp::Marker::F32 => { let bits = read_be_u32(reader)?; Ok(f32::from_bits(bits) as f64) } rmp::Marker::F64 => { let bits = read_be_u64(reader)?; Ok(f64::from_bits(bits)) } // Integer markers can appear for integer-valued floats - read the data manually rmp::Marker::FixPos(v) => Ok(v as f64), rmp::Marker::FixNeg(v) => Ok(v as f64), _ => { // For other integer encodings, read bytes manually let n = match marker { rmp::Marker::U8 => { read_byte(reader)? as f64 } rmp::Marker::U16 => { read_be_u16(reader)? as f64 } rmp::Marker::U32 => { read_be_u32(reader)? as f64 } rmp::Marker::U64 => { read_be_u64(reader)? as f64 } rmp::Marker::I8 => { read_byte(reader)? as i8 as f64 } rmp::Marker::I16 => { read_be_u16(reader)? as i16 as f64 } rmp::Marker::I32 => { read_be_u32(reader)? as i32 as f64 } rmp::Marker::I64 => { read_be_u64(reader)? as i64 as f64 } _ => { return Err(MorlocError::Serialization(format!( "unexpected msgpack marker {:?} for float", marker ))); } }; Ok(n) } } } // Also fix read_f32/read_f64 - rmp's functions include the marker, but we already consumed it // So we need to read the raw data bytes directly. fn read_byte(reader: &mut &[u8]) -> Result { if reader.is_empty() { return Err(MorlocError::Serialization("unexpected EOF".into())); } let v = reader[0]; *reader = &reader[1..]; Ok(v) } fn read_be_u16(reader: &mut &[u8]) -> Result { if reader.len() < 2 { return Err(MorlocError::Serialization("unexpected EOF".into())); } let v = u16::from_be_bytes([reader[0], reader[1]]); *reader = &reader[2..]; Ok(v) } fn read_be_u32(reader: &mut &[u8]) -> Result { if reader.len() < 4 { return Err(MorlocError::Serialization("unexpected EOF".into())); } let v = u32::from_be_bytes([reader[0], reader[1], reader[2], reader[3]]); *reader = &reader[4..]; Ok(v) } fn read_be_u64(reader: &mut &[u8]) -> Result { if reader.len() < 8 { return Err(MorlocError::Serialization("unexpected EOF".into())); } let v = u64::from_be_bytes([reader[0], reader[1], reader[2], reader[3], reader[4], reader[5], reader[6], reader[7]]); *reader = &reader[8..]; Ok(v) } // ── Size calculation for unpack ──────────────────────────────────────────── fn calc_unpack_size(data: &[u8], schema: &Schema) -> Result { let mut reader = data; calc_size_r(schema, &mut reader) } fn calc_size_r(schema: &Schema, reader: &mut &[u8]) -> Result { match schema.serial_type { SerialType::Nil => { rmp::decode::read_nil(reader).ok(); Ok(1) } SerialType::Bool => { rmp::decode::read_bool(reader).ok(); Ok(1) } SerialType::Sint8 | SerialType::Uint8 => { skip_int(reader)?; Ok(1) } SerialType::Sint16 | SerialType::Uint16 => { skip_int(reader)?; Ok(2) } SerialType::Sint32 | SerialType::Uint32 | SerialType::Float32 => { skip_int(reader)?; Ok(4) } SerialType::Sint64 | SerialType::Uint64 | SerialType::Float64 => { skip_int(reader)?; Ok(8) } SerialType::String => { let len = rmp::decode::read_str_len(reader) .map_err(|e| MorlocError::Serialization(format!("size calc str: {}", e)))? as usize; if reader.len() >= len { *reader = &reader[len..]; } Ok(std::mem::size_of::() + len) } SerialType::Array => { let n = rmp::decode::read_array_len(reader) .map_err(|e| MorlocError::Serialization(format!("size calc array: {}", e)))? as usize; let elem_schema = &schema.parameters[0]; let mut total = std::mem::size_of::(); // Alignment padding total = shm::align_up(total, elem_schema.alignment()); for _ in 0..n { total += calc_size_r(elem_schema, reader)?; } Ok(total) } SerialType::Tuple | SerialType::Map => { let _n = rmp::decode::read_array_len(reader).ok(); let mut total = schema.width; for field_schema in &schema.parameters { if !field_schema.is_fixed_width() { total += calc_size_r(field_schema, reader)?; } else { calc_size_r(field_schema, reader)?; } } Ok(total) } SerialType::Optional => { let inner_schema = &schema.parameters[0]; if !reader.is_empty() && reader[0] == 0xc0 { rmp::decode::read_nil(reader).ok(); Ok(schema.width.max(1 + inner_schema.width)) } else { let inner_size = calc_size_r(inner_schema, reader)?; let align = inner_schema.alignment().max(1); let offset = shm::align_up(1, align); Ok(offset + inner_size) } } SerialType::Tensor => Ok(0), } } fn skip_int(reader: &mut &[u8]) -> Result<(), MorlocError> { let marker = rmp::decode::read_marker(reader) .map_err(|_| MorlocError::Serialization("skip int: unexpected EOF".into()))?; let skip = match marker { rmp::Marker::FixPos(_) | rmp::Marker::FixNeg(_) => 0, rmp::Marker::U8 | rmp::Marker::I8 => 1, rmp::Marker::U16 | rmp::Marker::I16 => 2, rmp::Marker::U32 | rmp::Marker::I32 | rmp::Marker::F32 => 4, rmp::Marker::U64 | rmp::Marker::I64 | rmp::Marker::F64 => 8, _ => 0, }; if reader.len() >= skip { *reader = &reader[skip..]; } Ok(()) } #[cfg(test)] mod tests { use super::*; use crate::schema::parse_schema; use crate::json; fn setup_shm() { crate::init_test_shm(); } #[test] fn test_roundtrip_int_via_msgpack() { setup_shm(); let schema = parse_schema("i4").unwrap(); // JSON -> voidstar -> msgpack -> voidstar -> JSON let ptr1 = json::read_json_with_schema("42", &schema).unwrap(); let mpk = pack_with_schema(ptr1, &schema).unwrap(); let ptr2 = unpack_with_schema(&mpk, &schema).unwrap(); let json_out = json::voidstar_to_json_string(ptr2, &schema).unwrap(); assert_eq!(json_out, "42"); } #[test] fn test_roundtrip_string_via_msgpack() { setup_shm(); let schema = parse_schema("s").unwrap(); let ptr1 = json::read_json_with_schema("\"hello world\"", &schema).unwrap(); let mpk = pack_with_schema(ptr1, &schema).unwrap(); let ptr2 = unpack_with_schema(&mpk, &schema).unwrap(); let json_out = json::voidstar_to_json_string(ptr2, &schema).unwrap(); assert_eq!(json_out, "\"hello world\""); } #[test] fn test_roundtrip_array_via_msgpack() { setup_shm(); let schema = parse_schema("ai4").unwrap(); let ptr1 = json::read_json_with_schema("[10,20,30]", &schema).unwrap(); let mpk = pack_with_schema(ptr1, &schema).unwrap(); let ptr2 = unpack_with_schema(&mpk, &schema).unwrap(); let json_out = json::voidstar_to_json_string(ptr2, &schema).unwrap(); assert_eq!(json_out, "[10,20,30]"); } #[test] fn test_roundtrip_bool_via_msgpack() { setup_shm(); let schema = parse_schema("b").unwrap(); let ptr1 = json::read_json_with_schema("true", &schema).unwrap(); let mpk = pack_with_schema(ptr1, &schema).unwrap(); let ptr2 = unpack_with_schema(&mpk, &schema).unwrap(); let json_out = json::voidstar_to_json_string(ptr2, &schema).unwrap(); assert_eq!(json_out, "true"); } #[test] fn test_roundtrip_optional_null_via_msgpack() { setup_shm(); let schema = parse_schema("?i4").unwrap(); let ptr1 = json::read_json_with_schema("null", &schema).unwrap(); let mpk = pack_with_schema(ptr1, &schema).unwrap(); let ptr2 = unpack_with_schema(&mpk, &schema).unwrap(); let json_out = json::voidstar_to_json_string(ptr2, &schema).unwrap(); assert_eq!(json_out, "null"); } #[test] fn test_pack_only_string() { setup_shm(); let schema = parse_schema("s").unwrap(); let ptr1 = json::read_json_with_schema("\"hi\"", &schema).unwrap(); let mpk = pack_with_schema(ptr1, &schema).unwrap(); assert!(!mpk.is_empty()); assert_eq!(mpk.len(), 3); } #[test] fn test_unpack_only_int() { setup_shm(); let schema = parse_schema("i4").unwrap(); // msgpack for 42 = [42] (fixint) let mpk = vec![42u8]; let ptr = unpack_with_schema(&mpk, &schema).unwrap(); let json_out = json::voidstar_to_json_string(ptr, &schema).unwrap(); assert_eq!(json_out, "42"); } #[test] fn test_unpack_only_string() { setup_shm(); let schema = parse_schema("s").unwrap(); // msgpack for "hi" = [0xa2, 0x68, 0x69] let mpk = vec![0xa2, 0x68, 0x69]; let size = calc_unpack_size(&mpk, &schema).unwrap(); eprintln!("unpack size for string: {} (Array={}, total={})", size, std::mem::size_of::(), size); let ptr = unpack_with_schema(&mpk, &schema).unwrap(); let json_out = json::voidstar_to_json_string(ptr, &schema).unwrap(); assert_eq!(json_out, "\"hi\""); } } ================================================ FILE: data/rust/morloc-runtime/src/packet.rs ================================================ use crate::error::MorlocError; // ── Magic & version constants ────────────────────────────────────────────── pub const PACKET_MAGIC: u32 = 0x0707_f86d; pub const THIS_PLAIN: u16 = 0; pub const THIS_VERSION: u16 = 0; pub const DEFAULT_FLAVOR: u16 = 0; pub const DEFAULT_MODE: u16 = 0; // ── Command type discriminants ───────────────────────────────────────────── pub const PACKET_TYPE_DATA: u8 = 0; pub const PACKET_TYPE_CALL: u8 = 1; pub const PACKET_TYPE_PING: u8 = 2; // ── Data source ──────────────────────────────────────────────────────────── pub const PACKET_SOURCE_MESG: u8 = 0x00; pub const PACKET_SOURCE_FILE: u8 = 0x01; pub const PACKET_SOURCE_RPTR: u8 = 0x02; // ── Data format ──────────────────────────────────────────────────────────── pub const PACKET_FORMAT_JSON: u8 = 0x00; pub const PACKET_FORMAT_MSGPACK: u8 = 0x01; pub const PACKET_FORMAT_TEXT: u8 = 0x02; pub const PACKET_FORMAT_DATA: u8 = 0x03; pub const PACKET_FORMAT_VOIDSTAR: u8 = 0x04; pub const PACKET_FORMAT_ARROW: u8 = 0x05; // ── Compression / encryption ─────────────────────────────────────────────── pub const PACKET_COMPRESSION_NONE: u8 = 0x00; pub const PACKET_ENCRYPTION_NONE: u8 = 0x00; // ── Status ───────────────────────────────────────────────────────────────── pub const PACKET_STATUS_PASS: u8 = 0x00; pub const PACKET_STATUS_FAIL: u8 = 0x01; // ── Entrypoint ────────────────────���──────────────────────────────────────── pub const PACKET_ENTRYPOINT_LOCAL: u8 = 0x00; pub const PACKET_ENTRYPOINT_REMOTE_SFS: u8 = 0x01; // ── Inline threshold ─────────────────────────────────────��───────────────── pub const MORLOC_INLINE_THRESHOLD: usize = 64 * 1024; // ── Metadata ─────────��───────────────────────────────────────────────────── pub const METADATA_TYPE_SCHEMA_STRING: u8 = 0x01; pub const METADATA_TYPE_XXHASH: u8 = 0x02; pub const METADATA_HEADER_MAGIC: [u8; 3] = *b"mmh"; // ── Packed structs matching the C binary layout ────���─────────────────────── /// 8-byte command union. We represent each variant as its own struct and /// transmute at the boundary. #[derive(Debug, Clone, Copy)] #[repr(C, packed)] pub struct CommandType { pub cmd_type: u8, pub padding: [u8; 7], } #[derive(Debug, Clone, Copy)] #[repr(C, packed)] pub struct CommandCall { pub cmd_type: u8, pub entrypoint: u8, pub padding: [u8; 2], pub midx: u32, } #[derive(Debug, Clone, Copy)] #[repr(C, packed)] pub struct CommandData { pub cmd_type: u8, pub source: u8, pub format: u8, pub compression: u8, pub encryption: u8, pub status: u8, pub padding: [u8; 2], } #[derive(Debug, Clone, Copy)] #[repr(C, packed)] pub struct CommandPing { pub cmd_type: u8, pub padding: [u8; 7], } /// The 8-byte command field stored as raw bytes. Interpreted based on the /// first byte (cmd_type discriminant). #[derive(Clone, Copy)] #[repr(C, packed)] pub union PacketCommand { pub cmd_type: CommandType, pub call: CommandCall, pub data: CommandData, pub ping: CommandPing, pub raw: [u8; 8], } impl std::fmt::Debug for PacketCommand { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let tag = unsafe { self.cmd_type.cmd_type }; match tag { PACKET_TYPE_DATA => write!(f, "Command::Data({:?})", unsafe { self.data }), PACKET_TYPE_CALL => write!(f, "Command::Call({:?})", unsafe { self.call }), PACKET_TYPE_PING => write!(f, "Command::Ping"), _ => write!(f, "Command::Unknown({tag})"), } } } /// 32-byte packet header. Must match morloc_packet_header_t exactly. #[derive(Debug, Clone, Copy)] #[repr(C, packed)] pub struct PacketHeader { pub magic: u32, pub plain: u16, pub version: u16, pub flavor: u16, pub mode: u16, pub command: PacketCommand, pub offset: u32, pub length: u64, } const _: () = assert!(std::mem::size_of::() == 32); const _: () = assert!(std::mem::size_of::() == 8); const _: () = assert!(std::mem::size_of::() == 8); const _: () = assert!(std::mem::size_of::() == 8); /// 8-byte metadata header. #[derive(Debug, Clone, Copy)] #[repr(C, packed)] pub struct MetadataHeader { pub magic: [u8; 3], pub metadata_type: u8, pub size: u32, } const _: () = assert!(std::mem::size_of::() == 8); // ── Packet construction helpers ──────────────────────────────────────────── impl PacketHeader { fn new(command: PacketCommand, offset: u32, length: u64) -> Self { PacketHeader { magic: PACKET_MAGIC, plain: THIS_PLAIN, version: THIS_VERSION, flavor: DEFAULT_FLAVOR, mode: DEFAULT_MODE, command, offset, length, } } /// Create a ping packet (no payload). pub fn ping() -> Self { Self::new( PacketCommand { ping: CommandPing { cmd_type: PACKET_TYPE_PING, padding: [0; 7], }, }, 0, // no metadata 0, // no payload ) } /// Create a local call packet header. pub fn local_call(midx: u32, payload_len: u64) -> Self { Self::new( PacketCommand { call: CommandCall { cmd_type: PACKET_TYPE_CALL, entrypoint: PACKET_ENTRYPOINT_LOCAL, padding: [0; 2], midx, }, }, 0, // no metadata between header and arg packets payload_len, ) } /// Create a remote call packet header. pub fn remote_call(midx: u32, payload_len: u64) -> Self { Self::new( PacketCommand { call: CommandCall { cmd_type: PACKET_TYPE_CALL, entrypoint: PACKET_ENTRYPOINT_REMOTE_SFS, padding: [0; 2], midx, }, }, 0, payload_len, ) } /// Create a data packet header for inline message data. pub fn data_mesg(format: u8, payload_len: u64) -> Self { Self::new( PacketCommand { data: CommandData { cmd_type: PACKET_TYPE_DATA, source: PACKET_SOURCE_MESG, format, compression: PACKET_COMPRESSION_NONE, encryption: PACKET_ENCRYPTION_NONE, status: PACKET_STATUS_PASS, padding: [0; 2], }, }, 0, // metadata size set separately when building full packet payload_len, ) } /// Create a data packet header for relative pointer (shared memory). pub fn data_rptr(format: u8, payload_len: u64) -> Self { Self::new( PacketCommand { data: CommandData { cmd_type: PACKET_TYPE_DATA, source: PACKET_SOURCE_RPTR, format, compression: PACKET_COMPRESSION_NONE, encryption: PACKET_ENCRYPTION_NONE, status: PACKET_STATUS_PASS, padding: [0; 2], }, }, 0, // metadata size set separately when building full packet payload_len, ) } /// Create a fail packet with an error message. pub fn fail(error_msg_len: u64) -> Self { Self::new( PacketCommand { data: CommandData { cmd_type: PACKET_TYPE_DATA, source: PACKET_SOURCE_MESG, format: PACKET_FORMAT_TEXT, compression: PACKET_COMPRESSION_NONE, encryption: PACKET_ENCRYPTION_NONE, status: PACKET_STATUS_FAIL, padding: [0; 2], }, }, 0, error_msg_len, ) } /// Total packet size (header + payload). pub fn total_size(&self) -> u64 { self.offset as u64 + self.length } /// Check if this is a valid morloc packet. pub fn is_valid(&self) -> bool { self.magic == PACKET_MAGIC } /// Get the command type. pub fn command_type(&self) -> u8 { unsafe { self.command.cmd_type.cmd_type } } pub fn is_ping(&self) -> bool { self.command_type() == PACKET_TYPE_PING } pub fn is_call(&self) -> bool { self.command_type() == PACKET_TYPE_CALL } pub fn is_data(&self) -> bool { self.command_type() == PACKET_TYPE_DATA } pub fn is_local_call(&self) -> bool { self.is_call() && unsafe { self.command.call.entrypoint } == PACKET_ENTRYPOINT_LOCAL } pub fn is_remote_call(&self) -> bool { self.is_call() && unsafe { self.command.call.entrypoint } == PACKET_ENTRYPOINT_REMOTE_SFS } pub fn is_fail(&self) -> bool { self.is_data() && unsafe { self.command.data.status } == PACKET_STATUS_FAIL } /// Serialize the header to bytes. pub fn to_bytes(&self) -> [u8; 32] { unsafe { std::mem::transmute(*self) } } /// Deserialize a header from bytes. pub fn from_bytes(bytes: &[u8; 32]) -> Result { let header: PacketHeader = unsafe { std::mem::transmute(*bytes) }; if !header.is_valid() { let magic = { header.magic }; return Err(MorlocError::Packet(format!( "invalid magic: 0x{magic:08x}" ))); } Ok(header) } } // ── Full packet construction (header + metadata + payload) ───────────────── /// Build a complete data packet with schema metadata and relptr payload. pub fn make_standard_data_packet(relptr: crate::shm::RelPtr, schema: &crate::Schema) -> Vec { let schema_str = crate::schema::schema_to_string(schema); let schema_bytes = schema_str.as_bytes(); let schema_len = schema_bytes.len() + 1; // +1 for null terminator // Metadata: header (8 bytes) + schema string (null-terminated), padded to 32-byte boundary let meta_header_size = std::mem::size_of::(); let raw_meta_len = meta_header_size + schema_len; let padded_meta_len = ((raw_meta_len + 31) / 32) * 32; // Payload: relptr let payload_len = std::mem::size_of::(); let total = 32 + padded_meta_len + payload_len; let mut packet = vec![0u8; total]; // Write header let header = PacketHeader::data_rptr(PACKET_FORMAT_VOIDSTAR, payload_len as u64); let mut hdr = header; // Override offset to include metadata unsafe { let hdr_ptr = &mut hdr as *mut PacketHeader as *mut u8; // Set offset field (at byte 20 in packed struct) let offset_ptr = hdr_ptr.add(20) as *mut u32; *offset_ptr = padded_meta_len as u32; } let hdr_bytes = hdr.to_bytes(); packet[..32].copy_from_slice(&hdr_bytes); // Write metadata header let meta_start = 32; packet[meta_start] = b'm'; packet[meta_start + 1] = b'm'; packet[meta_start + 2] = b'h'; packet[meta_start + 3] = METADATA_TYPE_SCHEMA_STRING; let meta_size_bytes = (schema_len as u32).to_le_bytes(); packet[meta_start + 4..meta_start + 8].copy_from_slice(&meta_size_bytes); // Write schema string (null-terminated) let schema_data_start = meta_start + meta_header_size; packet[schema_data_start..schema_data_start + schema_bytes.len()].copy_from_slice(schema_bytes); // Null terminator already there from vec![0u8] // Write relptr payload let payload_start = 32 + padded_meta_len; let relptr_bytes = relptr.to_ne_bytes(); packet[payload_start..payload_start + relptr_bytes.len()].copy_from_slice(&relptr_bytes); packet } /// Build an inline MESG+MSGPACK data packet with schema metadata. pub fn make_mesg_data_packet(mpk_data: &[u8], schema: &crate::Schema) -> Vec { let schema_str = crate::schema::schema_to_string(schema); let schema_bytes = schema_str.as_bytes(); let schema_len = schema_bytes.len() + 1; // +1 for null terminator let meta_header_size = std::mem::size_of::(); let raw_meta_len = meta_header_size + schema_len; let padded_meta_len = ((raw_meta_len + 31) / 32) * 32; let total = 32 + padded_meta_len + mpk_data.len(); let mut packet = vec![0u8; total]; // Write header let mut header = PacketHeader::data_mesg(PACKET_FORMAT_MSGPACK, mpk_data.len() as u64); // Set offset to metadata size unsafe { let hdr_ptr = &mut header as *mut PacketHeader as *mut u8; let offset_ptr = hdr_ptr.add(20) as *mut u32; *offset_ptr = padded_meta_len as u32; } let hdr_bytes = header.to_bytes(); packet[..32].copy_from_slice(&hdr_bytes); // Write metadata header let meta_start = 32; packet[meta_start] = b'm'; packet[meta_start + 1] = b'm'; packet[meta_start + 2] = b'h'; packet[meta_start + 3] = METADATA_TYPE_SCHEMA_STRING; let meta_size_bytes = (schema_len as u32).to_le_bytes(); packet[meta_start + 4..meta_start + 8].copy_from_slice(&meta_size_bytes); // Write schema string let schema_data_start = meta_start + meta_header_size; packet[schema_data_start..schema_data_start + schema_bytes.len()].copy_from_slice(schema_bytes); // Write msgpack payload let payload_start = 32 + padded_meta_len; packet[payload_start..payload_start + mpk_data.len()].copy_from_slice(mpk_data); packet } /// Build a call packet from argument data packets. pub fn make_local_call_packet(midx: u32, arg_packets: &[Vec]) -> Vec { let data_length: usize = arg_packets.iter().map(|p| p.len()).sum(); let total = 32 + data_length; let mut packet = vec![0u8; total]; // Write call header let header = PacketHeader::local_call(midx, data_length as u64); let hdr_bytes = header.to_bytes(); packet[..32].copy_from_slice(&hdr_bytes); // Concatenate argument packets let mut pos = 32; for arg in arg_packets { packet[pos..pos + arg.len()].copy_from_slice(arg); pos += arg.len(); } packet } /// Build a fail packet with an error message string. pub fn make_fail_packet_bytes(error_msg: &str) -> Vec { let msg_bytes = error_msg.as_bytes(); let total = 32 + msg_bytes.len(); let mut packet = vec![0u8; total]; let header = PacketHeader::fail(msg_bytes.len() as u64); let hdr_bytes = header.to_bytes(); packet[..32].copy_from_slice(&hdr_bytes); packet[32..].copy_from_slice(msg_bytes); packet } /// Extract the payload from a data packet (bytes after header + metadata offset). pub fn get_data_payload(packet: &[u8]) -> Result<&[u8], MorlocError> { if packet.len() < 32 { return Err(MorlocError::Packet("packet too small".into())); } let header = PacketHeader::from_bytes(packet[..32].try_into().unwrap())?; let offset = { header.offset } as usize; let length = { header.length } as usize; let start = 32 + offset; let end = start + length; if end > packet.len() { return Err(MorlocError::Packet("payload extends past packet end".into())); } Ok(&packet[start..end]) } /// Extract error message from a fail packet. pub fn get_error_message(packet: &[u8]) -> Result, MorlocError> { if packet.len() < 32 { return Err(MorlocError::Packet("packet too small".into())); } let header = PacketHeader::from_bytes(packet[..32].try_into().unwrap())?; if !header.is_fail() { return Ok(None); } let payload = get_data_payload(packet)?; Ok(Some(String::from_utf8_lossy(payload).into_owned())) } /// Read the schema string from packet metadata section. pub fn read_schema_from_meta(packet: &[u8]) -> Result, MorlocError> { if packet.len() < 32 { return Err(MorlocError::Packet("packet too small".into())); } let header = PacketHeader::from_bytes(packet[..32].try_into().unwrap())?; let offset = { header.offset } as usize; if offset == 0 { return Ok(None); } // Scan metadata headers let meta_start = 32; let meta_end = meta_start + offset; let mut pos = meta_start; while pos + 8 <= meta_end { if packet[pos] == b'm' && packet[pos + 1] == b'm' && packet[pos + 2] == b'h' { let meta_type = packet[pos + 3]; let meta_size = u32::from_le_bytes([ packet[pos + 4], packet[pos + 5], packet[pos + 6], packet[pos + 7], ]) as usize; if meta_type == METADATA_TYPE_SCHEMA_STRING { let str_start = pos + 8; let str_end = str_start + meta_size; if str_end <= meta_end { let bytes = &packet[str_start..str_end]; // Find null terminator let len = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len()); return Ok(Some(String::from_utf8_lossy(&bytes[..len]).into_owned())); } } pos += 8 + meta_size; } else { break; } } Ok(None) } /// Get the voidstar value from a data packet (resolves relptr to absptr). pub fn get_data_value( packet: &[u8], schema: &crate::Schema, ) -> Result { let header = PacketHeader::from_bytes(packet[..32].try_into().unwrap())?; let source = unsafe { header.command.data.source }; let format = unsafe { header.command.data.format }; let payload = get_data_payload(packet)?; match source { PACKET_SOURCE_RPTR => { // Payload is a relptr if payload.len() < std::mem::size_of::() { return Err(MorlocError::Packet("relptr payload too small".into())); } let relptr = crate::shm::RelPtr::from_ne_bytes( payload[..std::mem::size_of::()].try_into().unwrap() ); crate::shm::rel2abs(relptr) } PACKET_SOURCE_MESG => { match format { PACKET_FORMAT_MSGPACK => { crate::mpack::unpack_with_schema(payload, schema) } PACKET_FORMAT_JSON => { let json_str = std::str::from_utf8(payload) .map_err(|e| MorlocError::Packet(format!("invalid UTF-8: {}", e)))?; crate::json::read_json_with_schema(json_str, schema) } PACKET_FORMAT_VOIDSTAR => { read_voidstar_binary(payload, schema) } _ => { Err(MorlocError::Packet(format!( "unsupported data format: {}", format ))) } } } _ => Err(MorlocError::Packet(format!("unsupported source: {}", source))), } } // ── Inline voidstar deserialization ───────────────────────────────────────── /// Read a flat voidstar binary blob into shared memory, adjusting relptrs. fn read_voidstar_binary( blob: &[u8], schema: &crate::Schema, ) -> Result { use crate::shm; let base = shm::shmalloc(blob.len())?; unsafe { std::ptr::copy_nonoverlapping(blob.as_ptr(), base, blob.len()) }; let base_rel = shm::abs2rel(base)?; adjust_voidstar_relptrs(base, schema, base_rel)?; Ok(base) } /// Adjust relptrs in a voidstar blob that was copied into SHM. /// The blob's internal relptrs are offsets from position 0 of the blob. /// Adding `base_rel` converts them to valid SHM relptrs. fn adjust_voidstar_relptrs( data: crate::shm::AbsPtr, schema: &crate::Schema, base_rel: crate::shm::RelPtr, ) -> Result<(), MorlocError> { use crate::schema::SerialType; use crate::shm::{self, Array, Tensor}; unsafe { match schema.serial_type { SerialType::String | SerialType::Array => { let arr = &mut *(data as *mut Array); arr.data += base_rel; // Recurse into elements if variable-width (strings are always fixed-width bytes) if !schema.parameters.is_empty() && !schema.parameters[0].is_fixed_width() { let arr_data = shm::rel2abs(arr.data)?; let elem_width = schema.parameters[0].width; for i in 0..arr.size { let elem = arr_data.add(i * elem_width); adjust_voidstar_relptrs(elem, &schema.parameters[0], base_rel)?; } } } SerialType::Tuple | SerialType::Map => { for i in 0..schema.parameters.len() { let child = data.add(schema.offsets[i]); adjust_voidstar_relptrs(child, &schema.parameters[i], base_rel)?; } } SerialType::Optional => { let tag = *data; if tag != 0 { let inner_offset = schema.offsets.first().copied().unwrap_or( shm::align_up(1, schema.parameters[0].alignment().max(1)), ); let child = data.add(inner_offset); adjust_voidstar_relptrs(child, &schema.parameters[0], base_rel)?; } } SerialType::Tensor => { let tensor = &mut *(data as *mut Tensor); if tensor.total_elements > 0 { tensor.shape += base_rel; tensor.data += base_rel; } } _ => {} // Fixed-width primitives: no relptrs to adjust } } Ok(()) } #[cfg(test)] mod tests { use super::*; #[test] fn test_header_size() { assert_eq!(std::mem::size_of::(), 32); } #[test] fn test_ping_roundtrip() { let ping = PacketHeader::ping(); assert!(ping.is_valid()); assert!(ping.is_ping()); let bytes = ping.to_bytes(); let recovered = PacketHeader::from_bytes(&bytes).unwrap(); assert!(recovered.is_ping()); } #[test] fn test_call_packet() { let call = PacketHeader::local_call(42, 128); assert!(call.is_call()); assert!(call.is_local_call()); assert!(!call.is_remote_call()); let bytes = call.to_bytes(); let recovered = PacketHeader::from_bytes(&bytes).unwrap(); assert!(recovered.is_local_call()); let midx = unsafe { recovered.command.call.midx }; assert_eq!(midx, 42); let len = { recovered.length }; assert_eq!(len, 128); } #[test] fn test_fail_packet() { let fail = PacketHeader::fail(100); assert!(fail.is_data()); assert!(fail.is_fail()); } #[test] fn test_data_mesg() { let data = PacketHeader::data_mesg(PACKET_FORMAT_MSGPACK, 256); assert!(data.is_data()); assert!(!data.is_fail()); let fmt = unsafe { data.command.data.format }; assert_eq!(fmt, PACKET_FORMAT_MSGPACK); let len = { data.length }; assert_eq!(len, 256); } } ================================================ FILE: data/rust/morloc-runtime/src/packet_ffi.rs ================================================ //! C ABI wrappers for packet functions. //! Replaces packet.c with calls to Rust packet.rs + voidstar.rs. use std::ffi::{c_char, c_void, CStr}; use std::ptr; use crate::cschema::CSchema; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; use crate::packet::*; use crate::shm::{self, AbsPtr, RelPtr}; // ── morloc_call_t ──────────────────────────────────────────────────────────── /// Matches C `morloc_call_t` layout. #[repr(C)] pub struct MorlocCall { pub midx: u32, pub args: *mut *mut u8, pub nargs: usize, pub owns_args: i32, } // ── Header reading ─────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn read_morloc_packet_header( msg: *const u8, errmsg: *mut *mut c_char, ) -> *mut PacketHeader { clear_errmsg(errmsg); if msg.is_null() { set_errmsg(errmsg, &MorlocError::Packet("Cannot make packet from NULL pointer".into())); return ptr::null_mut(); } // Validate magic let header = &*(msg as *const PacketHeader); if !header.is_valid() { set_errmsg(errmsg, &MorlocError::Packet("Malformed morloc packet".into())); return ptr::null_mut(); } msg as *mut PacketHeader } #[no_mangle] pub unsafe extern "C" fn packet_is_ping( packet: *const u8, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); let header = read_morloc_packet_header(packet, errmsg); if header.is_null() { return false; } (*header).is_ping() } #[no_mangle] pub unsafe extern "C" fn packet_is_local_call( packet: *const u8, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); let header = read_morloc_packet_header(packet, errmsg); if header.is_null() { return false; } (*header).is_local_call() } #[no_mangle] pub unsafe extern "C" fn packet_is_remote_call( packet: *const u8, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); let header = read_morloc_packet_header(packet, errmsg); if header.is_null() { return false; } (*header).is_remote_call() } // ── Packet size ────────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn morloc_packet_size_from_header( header: *const PacketHeader, ) -> usize { if header.is_null() { return 0; } 32 + (*header).offset as usize + (*header).length as usize } #[no_mangle] pub unsafe extern "C" fn morloc_packet_size( packet: *const u8, errmsg: *mut *mut c_char, ) -> usize { clear_errmsg(errmsg); let header = read_morloc_packet_header(packet, errmsg); if header.is_null() { return 0; } morloc_packet_size_from_header(header) } // ── Ping ───────────────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn return_ping( packet: *const u8, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); if !packet_is_ping(packet, errmsg) { if (*errmsg).is_null() { set_errmsg(errmsg, &MorlocError::Packet("Not a ping packet".into())); } return ptr::null_mut(); } let size = morloc_packet_size(packet, errmsg); if size == 0 { return ptr::null_mut(); } let buf = libc::calloc(size, 1) as *mut u8; if buf.is_null() { set_errmsg(errmsg, &MorlocError::Packet("Failed to allocate ping response".into())); return ptr::null_mut(); } ptr::copy_nonoverlapping(packet, buf, size); buf } #[no_mangle] pub extern "C" fn make_ping_packet() -> *mut u8 { // SAFETY: calloc returns null or a valid pointer to 32 zeroed bytes. let buf = unsafe { libc::calloc(32, 1) as *mut u8 }; if buf.is_null() { return ptr::null_mut(); } let header = PacketHeader::ping(); let bytes = header.to_bytes(); // SAFETY: buf points to 32 bytes; bytes is exactly 32 bytes from to_bytes(). unsafe { ptr::copy_nonoverlapping(bytes.as_ptr(), buf, 32) }; buf } // ── Data packet construction helpers ───────────────────────────────────────── /// Build metadata section: metadata header + schema string, padded to 32-byte boundary. /// Returns (metadata_buffer, padded_length). Returns (null, 0) if schema is null. /// /// # Safety /// `schema` must be null or a valid CSchema pointer. unsafe fn build_schema_metadata(schema: *const CSchema) -> (*mut u8, usize) { if schema.is_null() { return (ptr::null_mut(), 0); } let rs = CSchema::to_rust(schema); let schema_str = crate::schema::schema_to_string(&rs); let schema_bytes = schema_str.as_bytes(); let schema_len = schema_bytes.len() + 1; // +1 for null terminator let meta_header_size = 8; // sizeof(morloc_metadata_header_t) let raw_meta_len = meta_header_size + schema_len; let padded_meta_len = ((raw_meta_len + 31) / 32) * 32; let metadata = libc::calloc(padded_meta_len, 1) as *mut u8; if metadata.is_null() { return (ptr::null_mut(), 0); } // Write metadata header *metadata = b'm'; *metadata.add(1) = b'm'; *metadata.add(2) = b'h'; *metadata.add(3) = METADATA_TYPE_SCHEMA_STRING; *(metadata.add(4) as *mut u32) = schema_len as u32; // Write schema string ptr::copy_nonoverlapping(schema_bytes.as_ptr(), metadata.add(meta_header_size), schema_bytes.len()); // Null terminator already zeroed by calloc (metadata, padded_meta_len) } /// Generic data packet builder matching C's make_morloc_data_packet. /// /// # Safety /// If non-null, `data` must point to `data_length` readable bytes. /// If non-null, `metadata` must point to `metadata_length` readable bytes. unsafe fn make_data_packet_raw( data: *const u8, data_length: usize, metadata: *const u8, metadata_length: usize, src: u8, fmt: u8, cmpr: u8, encr: u8, status: u8, ) -> *mut u8 { let total = 32 + metadata_length + data_length; let packet = libc::calloc(total, 1) as *mut u8; if packet.is_null() { return ptr::null_mut(); } // Build command let cmd = CommandData { cmd_type: PACKET_TYPE_DATA, source: src, format: fmt, compression: cmpr, encryption: encr, status, padding: [0; 2], }; let header = PacketHeader { magic: PACKET_MAGIC, plain: THIS_PLAIN, version: THIS_VERSION, flavor: DEFAULT_FLAVOR, mode: DEFAULT_MODE, command: PacketCommand { data: cmd }, offset: metadata_length as u32, length: data_length as u64, }; let hdr_bytes = header.to_bytes(); ptr::copy_nonoverlapping(hdr_bytes.as_ptr(), packet, 32); if !metadata.is_null() && metadata_length > 0 { ptr::copy_nonoverlapping(metadata, packet.add(32), metadata_length); } if !data.is_null() && data_length > 0 { ptr::copy_nonoverlapping(data, packet.add(32 + metadata_length), data_length); } packet } /// Generic data packet with schema metadata. unsafe fn make_data_packet_with_schema( data: *const u8, data_length: usize, schema: *const CSchema, src: u8, fmt: u8, cmpr: u8, encr: u8, status: u8, ) -> *mut u8 { let (metadata, metadata_length) = build_schema_metadata(schema); let result = make_data_packet_raw( data, data_length, metadata, metadata_length, src, fmt, cmpr, encr, status, ); if !metadata.is_null() { libc::free(metadata as *mut c_void); } result } // ── Standard data packet (RPTR + VOIDSTAR) ────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn make_standard_data_packet( relptr: RelPtr, schema: *const CSchema, ) -> *mut u8 { let packet = make_data_packet_with_schema( ptr::null(), std::mem::size_of::(), schema, PACKET_SOURCE_RPTR, PACKET_FORMAT_VOIDSTAR, PACKET_COMPRESSION_NONE, PACKET_ENCRYPTION_NONE, PACKET_STATUS_PASS, ); if packet.is_null() { return ptr::null_mut(); } // Write the relptr into the payload area let header = &*(packet as *const PacketHeader); let payload_offset = 32 + header.offset as usize; *(packet.add(payload_offset) as *mut RelPtr) = relptr; packet } #[no_mangle] pub unsafe extern "C" fn make_arrow_data_packet( relptr: RelPtr, schema: *const CSchema, ) -> *mut u8 { let packet = make_data_packet_with_schema( ptr::null(), std::mem::size_of::(), schema, PACKET_SOURCE_RPTR, PACKET_FORMAT_ARROW, PACKET_COMPRESSION_NONE, PACKET_ENCRYPTION_NONE, PACKET_STATUS_PASS, ); if packet.is_null() { return ptr::null_mut(); } let header = &*(packet as *const PacketHeader); let payload_offset = 32 + header.offset as usize; *(packet.add(payload_offset) as *mut RelPtr) = relptr; packet } // ── Msgpack packets ────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn make_mpk_data_packet( mpk_filename: *const c_char, schema: *const CSchema, ) -> *mut u8 { if mpk_filename.is_null() { return ptr::null_mut(); } let filename = CStr::from_ptr(mpk_filename); let bytes = filename.to_bytes(); make_data_packet_with_schema( bytes.as_ptr(), bytes.len(), schema, PACKET_SOURCE_FILE, PACKET_FORMAT_MSGPACK, PACKET_COMPRESSION_NONE, PACKET_ENCRYPTION_NONE, PACKET_STATUS_PASS, ) } #[no_mangle] pub unsafe extern "C" fn make_data_packet_from_mpk( mpk: *const c_char, mpk_size: usize, schema: *const CSchema, ) -> *mut u8 { make_data_packet_with_schema( mpk as *const u8, mpk_size, schema, PACKET_SOURCE_MESG, PACKET_FORMAT_MSGPACK, PACKET_COMPRESSION_NONE, PACKET_ENCRYPTION_NONE, PACKET_STATUS_PASS, ) } // ── get_data_packet_as_mpk ─────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn get_data_packet_as_mpk( packet: *const u8, schema: *const CSchema, mpk_out: *mut *mut c_char, mpk_size_out: *mut usize, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); *mpk_out = ptr::null_mut(); *mpk_size_out = 0; let header = read_morloc_packet_header(packet, errmsg); if header.is_null() { return 0; } if (*header).command_type() != PACKET_TYPE_DATA { set_errmsg(errmsg, &MorlocError::Packet("Expected a data packet".into())); return 0; } // Check for error if (*header).is_fail() { let payload_start = 32 + (*header).offset as usize; let payload_len = (*header).length as usize; let msg = std::str::from_utf8_unchecked( std::slice::from_raw_parts(packet.add(payload_start), payload_len) ); set_errmsg(errmsg, &MorlocError::Packet(format!("\n{}", msg))); return 0; } let rs = CSchema::to_rust(schema); let source = (*header).command.data.source; let format = (*header).command.data.format; let payload_start = 32 + (*header).offset as usize; let payload_len = (*header).length as usize; let payload = std::slice::from_raw_parts(packet.add(payload_start), payload_len); if source == PACKET_SOURCE_MESG && format == PACKET_FORMAT_MSGPACK { // Inline msgpack: copy directly let buf = libc::malloc(payload_len) as *mut c_char; if buf.is_null() { set_errmsg(errmsg, &MorlocError::Packet("malloc failed".into())); return 0; } ptr::copy_nonoverlapping(payload.as_ptr(), buf as *mut u8, payload_len); *mpk_out = buf; *mpk_size_out = payload_len; } else if source == PACKET_SOURCE_MESG && format == PACKET_FORMAT_VOIDSTAR { // Inline voidstar: load into SHM then convert to msgpack match crate::voidstar::read_binary(payload, &rs) { Ok(abs) => { match crate::mpack::pack_with_schema(abs, &rs) { Ok(data) => { let buf = libc::malloc(data.len()) as *mut u8; if buf.is_null() { set_errmsg(errmsg, &MorlocError::Packet("malloc failed".into())); return 0; } ptr::copy_nonoverlapping(data.as_ptr(), buf, data.len()); *mpk_out = buf as *mut c_char; *mpk_size_out = data.len(); } Err(e) => { set_errmsg(errmsg, &e); return 0; } } // Free SHM let _ = crate::voidstar::free_by_schema(abs, &rs); let _ = shm::shfree(abs); } Err(e) => { set_errmsg(errmsg, &e); return 0; } } } else if source == PACKET_SOURCE_FILE && format == PACKET_FORMAT_MSGPACK { // File-based msgpack: read the file let filename_bytes = &payload[..payload_len.min(4096)]; let filename = std::str::from_utf8(filename_bytes).unwrap_or(""); let filename = filename.trim_end_matches('\0'); match std::fs::read(filename) { Ok(data) => { let buf = libc::malloc(data.len()) as *mut u8; if buf.is_null() { set_errmsg(errmsg, &MorlocError::Packet("malloc failed".into())); return 0; } ptr::copy_nonoverlapping(data.as_ptr(), buf, data.len()); *mpk_out = buf as *mut c_char; *mpk_size_out = data.len(); } Err(e) => { set_errmsg(errmsg, &MorlocError::Io(e)); return 0; } } } else if source == PACKET_SOURCE_RPTR && format == PACKET_FORMAT_VOIDSTAR { // Voidstar via relptr: convert to msgpack let relptr = *(payload.as_ptr() as *const RelPtr); match shm::rel2abs(relptr) { Ok(abs) => { match crate::mpack::pack_with_schema(abs, &rs) { Ok(data) => { let buf = libc::malloc(data.len()) as *mut u8; if buf.is_null() { set_errmsg(errmsg, &MorlocError::Packet("malloc failed".into())); return 0; } ptr::copy_nonoverlapping(data.as_ptr(), buf, data.len()); *mpk_out = buf as *mut c_char; *mpk_size_out = data.len(); } Err(e) => { set_errmsg(errmsg, &e); return 0; } } } Err(e) => { set_errmsg(errmsg, &e); return 0; } } } else { set_errmsg(errmsg, &MorlocError::Packet( format!("Unsupported packet source/format: 0x{:02x}/0x{:02x}", source, format) )); return 0; } 1 // true } // ── Schema from metadata ───────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn read_schema_from_packet_meta( packet: *const u8, errmsg: *mut *mut c_char, ) -> *mut c_char { clear_errmsg(errmsg); let header = read_morloc_packet_header(packet, errmsg); if header.is_null() { return ptr::null_mut(); } let offset = (*header).offset as usize; if offset < 8 { return ptr::null_mut(); } // no room for metadata header let meta_start = 32usize; let meta_end = meta_start + offset; let mut pos = meta_start; while pos + 8 <= meta_end { if *packet.add(pos) == b'm' && *packet.add(pos + 1) == b'm' && *packet.add(pos + 2) == b'h' { let meta_type = *packet.add(pos + 3); let meta_size = *(packet.add(pos + 4) as *const u32) as usize; if meta_type == METADATA_TYPE_SCHEMA_STRING { // Return pointer into the packet buffer (matches C behavior) return packet.add(pos + 8) as *mut c_char; } pos += 8 + meta_size; } else { break; } } ptr::null_mut() } // ── Fail packet ────────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn make_fail_packet( failure_message: *const c_char, ) -> *mut u8 { if failure_message.is_null() { return ptr::null_mut(); } let msg = CStr::from_ptr(failure_message).to_bytes(); make_data_packet_raw( msg.as_ptr(), msg.len(), ptr::null(), 0, PACKET_SOURCE_MESG, PACKET_FORMAT_TEXT, PACKET_COMPRESSION_NONE, PACKET_ENCRYPTION_NONE, PACKET_STATUS_FAIL, ) } // ── Error message extraction ───────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn get_morloc_data_packet_error_message( data: *const u8, errmsg: *mut *mut c_char, ) -> *mut c_char { clear_errmsg(errmsg); let header = read_morloc_packet_header(data, errmsg); if header.is_null() { return ptr::null_mut(); } if (*header).is_fail() { let payload_start = 32 + (*header).offset as usize; let payload_len = (*header).length as usize; let buf = libc::calloc(payload_len + 1, 1) as *mut c_char; if buf.is_null() { set_errmsg(errmsg, &MorlocError::Packet("Failed to allocate error message".into())); return ptr::null_mut(); } ptr::copy_nonoverlapping(data.add(payload_start), buf as *mut u8, payload_len); return buf; } ptr::null_mut() } // ── get_morloc_data_packet_value ───────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn get_morloc_data_packet_value( data: *const u8, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); let header = read_morloc_packet_header(data, errmsg); if header.is_null() { return ptr::null_mut(); } if (*header).command_type() != PACKET_TYPE_DATA { set_errmsg(errmsg, &MorlocError::Packet("Expected a data packet".into())); return ptr::null_mut(); } // Check for error let packet_error = get_morloc_data_packet_error_message(data, errmsg); if !packet_error.is_null() { let err_str = CStr::from_ptr(packet_error).to_string_lossy().into_owned(); libc::free(packet_error as *mut c_void); set_errmsg(errmsg, &MorlocError::Packet(format!("\n{}", err_str))); return ptr::null_mut(); } // Clear any errmsg from get_morloc_data_packet_error_message clear_errmsg(errmsg); let rs = CSchema::to_rust(schema); let source = (*header).command.data.source; let format = (*header).command.data.format; let payload_start = 32 + (*header).offset as usize; let payload_len = (*header).length as usize; match source { PACKET_SOURCE_MESG => { if format == PACKET_FORMAT_MSGPACK { let payload = std::slice::from_raw_parts(data.add(payload_start), payload_len); match crate::mpack::unpack_with_schema(payload, &rs) { Ok(abs) => abs, Err(e) => { set_errmsg(errmsg, &e); ptr::null_mut() } } } else if format == PACKET_FORMAT_VOIDSTAR { let payload = std::slice::from_raw_parts(data.add(payload_start), payload_len); match crate::voidstar::read_binary(payload, &rs) { Ok(abs) => abs, Err(e) => { set_errmsg(errmsg, &e); ptr::null_mut() } } } else { set_errmsg(errmsg, &MorlocError::Packet( format!("Invalid format from mesg: 0x{:02x}", format) )); ptr::null_mut() } } PACKET_SOURCE_FILE => { if format == PACKET_FORMAT_MSGPACK { let filename_bytes = std::slice::from_raw_parts(data.add(payload_start), payload_len.min(4096)); let filename = std::str::from_utf8(filename_bytes).unwrap_or(""); let filename = filename.trim_end_matches('\0'); match std::fs::read(filename) { Ok(file_data) => { match crate::mpack::unpack_with_schema(&file_data, &rs) { Ok(abs) => abs, Err(e) => { set_errmsg(errmsg, &e); ptr::null_mut() } } } Err(e) => { set_errmsg(errmsg, &MorlocError::Io(e)); ptr::null_mut() } } } else { set_errmsg(errmsg, &MorlocError::Packet( format!("Invalid format from file: 0x{:02x}", format) )); ptr::null_mut() } } PACKET_SOURCE_RPTR => { if format == PACKET_FORMAT_VOIDSTAR || format == PACKET_FORMAT_ARROW { let relptr = *(data.add(payload_start) as *const RelPtr); match shm::rel2abs(relptr) { Ok(abs) => abs, Err(e) => { set_errmsg(errmsg, &e); ptr::null_mut() } } } else { set_errmsg(errmsg, &MorlocError::Packet( format!("For RPTR source, expected voidstar or arrow format, found: 0x{:02x}", format) )); ptr::null_mut() } } _ => { set_errmsg(errmsg, &MorlocError::Packet("Invalid source".into())); ptr::null_mut() } } } // ── Call packet construction ───────────────────────────────────────────────── unsafe fn make_call_packet_gen( midx: u32, entrypoint: u8, arg_packets: *const *const u8, nargs: usize, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); // Calculate total data length let mut data_length: usize = 0; for i in 0..nargs { let arg = read_morloc_packet_header(*arg_packets.add(i), errmsg); if arg.is_null() { return ptr::null_mut(); } data_length += morloc_packet_size_from_header(arg); } let total = 32 + data_length; let packet = libc::calloc(total, 1) as *mut u8; if packet.is_null() { set_errmsg(errmsg, &MorlocError::Packet("Failed to allocate call packet".into())); return ptr::null_mut(); } let cmd = CommandCall { cmd_type: PACKET_TYPE_CALL, entrypoint, padding: [0; 2], midx, }; let header = PacketHeader { magic: PACKET_MAGIC, plain: THIS_PLAIN, version: THIS_VERSION, flavor: DEFAULT_FLAVOR, mode: DEFAULT_MODE, command: PacketCommand { call: cmd }, offset: 0, length: data_length as u64, }; let hdr_bytes = header.to_bytes(); ptr::copy_nonoverlapping(hdr_bytes.as_ptr(), packet, 32); let mut pos = 32; for i in 0..nargs { let arg = read_morloc_packet_header(*arg_packets.add(i), errmsg); if arg.is_null() { libc::free(packet as *mut c_void); return ptr::null_mut(); } let arg_size = morloc_packet_size_from_header(arg); ptr::copy_nonoverlapping(*arg_packets.add(i), packet.add(pos), arg_size); pos += arg_size; } packet } #[no_mangle] pub unsafe extern "C" fn make_morloc_local_call_packet( midx: u32, arg_packets: *const *const u8, nargs: usize, errmsg: *mut *mut c_char, ) -> *mut u8 { make_call_packet_gen(midx, PACKET_ENTRYPOINT_LOCAL, arg_packets, nargs, errmsg) } #[no_mangle] pub unsafe extern "C" fn make_morloc_remote_call_packet( midx: u32, arg_packets: *const *const u8, nargs: usize, errmsg: *mut *mut c_char, ) -> *mut u8 { make_call_packet_gen(midx, PACKET_ENTRYPOINT_REMOTE_SFS, arg_packets, nargs, errmsg) } // ── Call packet reading ────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn read_morloc_call_packet( packet: *const u8, errmsg: *mut *mut c_char, ) -> *mut MorlocCall { clear_errmsg(errmsg); let call = libc::calloc(1, std::mem::size_of::()) as *mut MorlocCall; if call.is_null() { set_errmsg(errmsg, &MorlocError::Packet("calloc failed".into())); return ptr::null_mut(); } let header = read_morloc_packet_header(packet, errmsg); if header.is_null() { libc::free(call as *mut c_void); return ptr::null_mut(); } if (*header).command_type() != PACKET_TYPE_CALL { set_errmsg(errmsg, &MorlocError::Packet("Expected packet to be a call".into())); libc::free(call as *mut c_void); return ptr::null_mut(); } (*call).midx = (*header).command.call.midx; (*call).nargs = 0; (*call).args = ptr::null_mut(); (*call).owns_args = 0; // borrowing pointers into packet let start_pos = 32 + (*header).offset as usize; let end_pos = start_pos + (*header).length as usize; // First pass: count args let mut pos = start_pos; while pos < end_pos { let arg_size = morloc_packet_size(packet.add(pos), errmsg); if arg_size == 0 { free_morloc_call(call); return ptr::null_mut(); } pos += arg_size; (*call).nargs += 1; } // Allocate args array (*call).args = libc::calloc((*call).nargs, std::mem::size_of::<*mut u8>()) as *mut *mut u8; if (*call).args.is_null() { set_errmsg(errmsg, &MorlocError::Packet("calloc failed for args".into())); libc::free(call as *mut c_void); return ptr::null_mut(); } // Second pass: fill pointers (borrowing into original packet) pos = start_pos; for i in 0..(*call).nargs { let arg_header = read_morloc_packet_header(packet.add(pos), errmsg); if arg_header.is_null() { free_morloc_call(call); return ptr::null_mut(); } if (*arg_header).command_type() != PACKET_TYPE_DATA { set_errmsg(errmsg, &MorlocError::Packet( format!("Argument #{} is not a DATA packet (type={})", i, (*arg_header).command_type()) )); free_morloc_call(call); return ptr::null_mut(); } *(*call).args.add(i) = packet.add(pos) as *mut u8; pos += morloc_packet_size_from_header(arg_header); } call } #[no_mangle] pub unsafe extern "C" fn free_morloc_call(call: *mut MorlocCall) { if call.is_null() { return; } let c = &*call; if !c.args.is_null() { if c.owns_args != 0 { for i in 0..c.nargs { let arg = *c.args.add(i); if !arg.is_null() { libc::free(arg as *mut c_void); } } } libc::free(c.args as *mut c_void); } libc::free(call as *mut c_void); } // adjust_voidstar_relptrs: still provided by cli.c (will move to Rust when cli.c is ported) // read_voidstar_binary: still provided by cli.c (will move to Rust when cli.c is ported) // ── write_voidstar_binary (for intrinsics.c) ───────────────────────────────── #[no_mangle] pub unsafe extern "C" fn write_voidstar_binary( fd: i32, data: *const c_void, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> RelPtr { clear_errmsg(errmsg); let rs = CSchema::to_rust(schema); match crate::voidstar::write_binary_to_fd(fd, data as AbsPtr, &rs) { Ok(n) => n as RelPtr, Err(e) => { set_errmsg(errmsg, &e); -1isize as RelPtr } } } // ── flatten_voidstar_to_buffer ─────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn flatten_voidstar_to_buffer( data: *const c_void, schema: *const CSchema, out_buf: *mut *mut u8, out_size: *mut usize, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); *out_buf = ptr::null_mut(); *out_size = 0; let rs = CSchema::to_rust(schema); match crate::voidstar::flatten_to_buffer(data as AbsPtr, &rs) { Ok(buf) => { let len = buf.len(); let c_buf = libc::malloc(len) as *mut u8; if c_buf.is_null() { set_errmsg(errmsg, &MorlocError::Packet("malloc failed".into())); return 1; } ptr::copy_nonoverlapping(buf.as_ptr(), c_buf, len); *out_buf = c_buf; *out_size = len; 0 } Err(e) => { set_errmsg(errmsg, &e); 1 } } } // read_voidstar_binary: still provided by cli.c (will move to Rust when cli.c is ported) // ── make_data_packet_auto ──────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn make_data_packet_auto( voidstar: *mut c_void, relptr: RelPtr, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); let rs = CSchema::to_rust(schema); let flat_size = match crate::ffi::calc_voidstar_size_inner(voidstar as *const u8, &rs) { Ok(s) => s, Err(e) => { set_errmsg(errmsg, &e); return ptr::null_mut(); } }; if flat_size <= MORLOC_INLINE_THRESHOLD { match crate::voidstar::flatten_to_buffer(voidstar as AbsPtr, &rs) { Ok(blob) => { let packet = make_data_packet_with_schema( blob.as_ptr(), blob.len(), schema, PACKET_SOURCE_MESG, PACKET_FORMAT_VOIDSTAR, PACKET_COMPRESSION_NONE, PACKET_ENCRYPTION_NONE, PACKET_STATUS_PASS, ); if packet.is_null() { set_errmsg(errmsg, &MorlocError::Packet("Failed to create inline data packet".into())); } return packet; } Err(e) => { set_errmsg(errmsg, &e); return ptr::null_mut(); } } } make_standard_data_packet(relptr, schema) } // ── print_morloc_data_packet ───────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn print_morloc_data_packet( packet: *const u8, schema: *const CSchema, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); let header = read_morloc_packet_header(packet, errmsg); if header.is_null() { return 1; } if (*header).command_type() != PACKET_TYPE_DATA { set_errmsg(errmsg, &MorlocError::Packet("Expected a data packet".into())); return 1; } // Check for error let packet_error = get_morloc_data_packet_error_message(packet, errmsg); if !packet_error.is_null() { let err_str = CStr::from_ptr(packet_error).to_string_lossy().into_owned(); libc::free(packet_error as *mut c_void); set_errmsg(errmsg, &MorlocError::Packet(format!("\n{}", err_str))); return 1; } clear_errmsg(errmsg); let rs = CSchema::to_rust(schema); let source = (*header).command.data.source; let format = (*header).command.data.format; let packet_size = morloc_packet_size_from_header(header); match source { PACKET_SOURCE_MESG | PACKET_SOURCE_FILE => { // Print the raw packet bytes if print_binary(packet, packet_size, errmsg) != 0 { return 1; } } PACKET_SOURCE_RPTR => { match format { PACKET_FORMAT_VOIDSTAR => { let payload_start = 32 + (*header).offset as usize; let relptr = *(packet.add(payload_start) as *const RelPtr); let voidstar_ptr = match shm::rel2abs(relptr) { Ok(p) => p, Err(e) => { set_errmsg(errmsg, &e); return 1; } }; // Build modified header with flat size let flat_size = match crate::ffi::calc_voidstar_size_inner(voidstar_ptr, &rs) { Ok(s) => s, Err(e) => { set_errmsg(errmsg, &e); return 1; } }; let mut new_header = *header; new_header.command.data.format = PACKET_FORMAT_VOIDSTAR; // Safely set length (packed struct) let new_hdr_ptr = &mut new_header as *mut PacketHeader as *mut u8; *(new_hdr_ptr.add(24) as *mut u64) = flat_size as u64; // Print header if print_binary(&new_header as *const PacketHeader as *const u8, 32, errmsg) != 0 { return 1; } // Print metadata let offset = (*header).offset as usize; if offset > 0 { if print_binary(packet.add(32), offset, errmsg) != 0 { return 1; } } // Write flattened voidstar data to stdout match crate::voidstar::write_binary_to_fd(libc::STDOUT_FILENO, voidstar_ptr, &rs) { Ok(_) => {} Err(e) => { set_errmsg(errmsg, &e); return 1; } } } _ => { // Other formats: print raw packet if print_binary(packet, packet_size, errmsg) != 0 { return 1; } } } } _ => { set_errmsg(errmsg, &MorlocError::Packet("Invalid source".into())); return 1; } } 0 // EXIT_PASS } /// Write binary data to stdout. /// /// # Safety /// `buf` must point to at least `count` readable bytes. unsafe fn print_binary( buf: *const u8, count: usize, errmsg: *mut *mut c_char, ) -> i32 { let mut written: usize = 0; while written < count { let n = libc::write( libc::STDOUT_FILENO, buf.add(written) as *const c_void, count - written, ); if n < 0 { set_errmsg(errmsg, &MorlocError::Io(std::io::Error::last_os_error())); return 1; } written += n as usize; } 0 } ================================================ FILE: data/rust/morloc-runtime/src/pool_ffi.rs ================================================ //! Pool server lifecycle: accept connections, dispatch packets, manage workers. //! Replaces pool.c. Uses std::thread instead of raw pthreads for thread mode. use std::ffi::{c_char, c_void}; use std::ptr; use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; use std::sync::{Arc, Mutex, Condvar}; // ── C-compatible types matching pool.h ─────────────────────────────────────── pub type PoolDispatchFn = unsafe extern "C" fn( mid: u32, args: *const *const u8, nargs: usize, ctx: *mut c_void, ) -> *mut u8; #[repr(C)] #[derive(Debug, Clone, Copy, PartialEq)] pub enum PoolConcurrency { Threads = 0, Fork = 1, Single = 2, } #[repr(C)] pub struct PoolConfig { pub local_dispatch: PoolDispatchFn, pub remote_dispatch: PoolDispatchFn, pub dispatch_ctx: *mut c_void, pub concurrency: PoolConcurrency, pub initial_workers: i32, pub dynamic_scaling: bool, pub post_fork_child: Option, } // SAFETY: PoolConfig contains function pointers and a *mut c_void dispatch_ctx. // The function pointers are set once at startup and never mutated. // dispatch_ctx points to language-runtime state that is either thread-local // (fork mode) or protected by the runtime's own synchronization (thread mode). // The pool architecture guarantees dispatch_ctx is not concurrently mutated. unsafe impl Send for PoolConfig {} unsafe impl Sync for PoolConfig {} // ── Global state ───────────────────────────────────────────────────────────── static SHUTTING_DOWN: AtomicBool = AtomicBool::new(false); static BUSY_COUNT: AtomicI32 = AtomicI32::new(0); static TOTAL_WORKERS: AtomicI32 = AtomicI32::new(0); // SAFETY: SHARED_BUSY is set once in pool_main_fork (parent process) before // forking children. After fork, each process accesses the mmap'd AtomicI32 // via atomic operations only. Reset to null during shutdown. static mut SHARED_BUSY: *mut AtomicI32 = ptr::null_mut(); #[no_mangle] pub extern "C" fn pool_mark_busy() { // SAFETY: SHARED_BUSY is either null (thread mode, use local atomic) or a valid // mmap'd AtomicI32 pointer set during pool_main_fork initialization. unsafe { if !SHARED_BUSY.is_null() { (*SHARED_BUSY).fetch_add(1, Ordering::Relaxed); } else { BUSY_COUNT.fetch_add(1, Ordering::Relaxed); } } } #[no_mangle] pub extern "C" fn pool_mark_idle() { // SAFETY: Same as pool_mark_busy - SHARED_BUSY is null or a valid mmap'd pointer. unsafe { if !SHARED_BUSY.is_null() { (*SHARED_BUSY).fetch_sub(1, Ordering::Relaxed); } else { BUSY_COUNT.fetch_sub(1, Ordering::Relaxed); } } } extern "C" fn pool_sigterm_handler(_sig: i32) { SHUTTING_DOWN.store(true, Ordering::Relaxed); } // ── Packet dispatch ────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn pool_dispatch_packet( packet: *const u8, local_dispatch: PoolDispatchFn, remote_dispatch: PoolDispatchFn, ctx: *mut c_void, ) -> *mut u8 { extern "C" { fn make_fail_packet(msg: *const c_char) -> *mut u8; fn packet_is_ping(packet: *const u8, errmsg: *mut *mut c_char) -> bool; fn return_ping(packet: *const u8, errmsg: *mut *mut c_char) -> *mut u8; fn packet_is_local_call(packet: *const u8, errmsg: *mut *mut c_char) -> bool; fn packet_is_remote_call(packet: *const u8, errmsg: *mut *mut c_char) -> bool; fn read_morloc_call_packet(packet: *const u8, errmsg: *mut *mut c_char) -> *mut crate::packet_ffi::MorlocCall; fn free_morloc_call(call: *mut crate::packet_ffi::MorlocCall); } if packet.is_null() { return make_fail_packet(b"NULL packet in pool dispatch\0".as_ptr() as *const c_char); } let mut errmsg: *mut c_char = ptr::null_mut(); if packet_is_ping(packet, &mut errmsg) { if !errmsg.is_null() { return fail_from_errmsg(errmsg); } let pong = return_ping(packet, &mut errmsg); if !errmsg.is_null() { return fail_from_errmsg(errmsg); } return pong; } if !errmsg.is_null() { return fail_from_errmsg(errmsg); } let is_local = packet_is_local_call(packet, &mut errmsg); if !errmsg.is_null() { return fail_from_errmsg(errmsg); } let is_remote = packet_is_remote_call(packet, &mut errmsg); if !errmsg.is_null() { return fail_from_errmsg(errmsg); } if is_local || is_remote { let call = read_morloc_call_packet(packet, &mut errmsg); if !errmsg.is_null() { return fail_from_errmsg(errmsg); } let mid = (*call).midx; let args = (*call).args as *const *const u8; let nargs = (*call).nargs; let dispatch_fn = if is_local { local_dispatch } else { remote_dispatch }; let result = dispatch_fn(mid, args, nargs, ctx); free_morloc_call(call); if result.is_null() { return make_fail_packet(b"dispatch callback returned NULL\0".as_ptr() as *const c_char); } return result; } make_fail_packet(b"Unexpected packet type in pool dispatch\0".as_ptr() as *const c_char) } unsafe fn fail_from_errmsg(errmsg: *mut c_char) -> *mut u8 { extern "C" { fn make_fail_packet(msg: *const c_char) -> *mut u8; } let pkt = make_fail_packet(errmsg); libc::free(errmsg as *mut c_void); pkt } // ── Helpers ────────────────────────────────────────────────────────────────── unsafe fn try_send_fail(client_fd: i32, msg: *const c_char) { extern "C" { fn make_fail_packet(msg: *const c_char) -> *mut u8; fn send_packet_to_foreign_server(fd: i32, packet: *mut u8, errmsg: *mut *mut c_char) -> usize; } let fail = make_fail_packet(if msg.is_null() { b"Unknown error\0".as_ptr() as *const c_char } else { msg }); if !fail.is_null() { let mut err: *mut c_char = ptr::null_mut(); send_packet_to_foreign_server(client_fd, fail, &mut err); libc::free(fail as *mut c_void); if !err.is_null() { libc::free(err as *mut c_void); } } } // ── Thread mode job queue ──────────────────────────────────────────────────── struct JobQueue { jobs: Mutex>, cond: Condvar, } impl JobQueue { fn new() -> Self { JobQueue { jobs: Mutex::new(Vec::new()), cond: Condvar::new() } } fn push(&self, fd: i32) { let mut jobs = self.jobs.lock().unwrap(); jobs.push(fd); self.cond.notify_one(); } fn pop(&self) -> Option { let mut jobs = self.jobs.lock().unwrap(); loop { if SHUTTING_DOWN.load(Ordering::Relaxed) { return None; } if let Some(fd) = jobs.pop() { return Some(fd); } let result = self.cond.wait_timeout(jobs, std::time::Duration::from_millis(100)).unwrap(); jobs = result.0; } } } // ── Worker thread ──────────────────────────────────────────────────────────── unsafe fn worker_loop(queue: &JobQueue, config: &PoolConfig) { extern "C" { fn stream_from_client(fd: i32, errmsg: *mut *mut c_char) -> *mut u8; fn send_packet_to_foreign_server(fd: i32, packet: *mut u8, errmsg: *mut *mut c_char) -> usize; fn close_socket(fd: i32); } while !SHUTTING_DOWN.load(Ordering::Relaxed) { let client_fd = match queue.pop() { Some(fd) => fd, None => break, }; let mut errmsg: *mut c_char = ptr::null_mut(); let data = stream_from_client(client_fd, &mut errmsg); if data.is_null() || !errmsg.is_null() { if !errmsg.is_null() { try_send_fail(client_fd, errmsg); libc::free(errmsg as *mut c_void); } libc::free(data as *mut c_void); close_socket(client_fd); continue; } // Track busy state so the accept loop can spawn new workers if needed pool_mark_busy(); let result = pool_dispatch_packet(data, config.local_dispatch, config.remote_dispatch, config.dispatch_ctx); pool_mark_idle(); libc::free(data as *mut c_void); if !result.is_null() { send_packet_to_foreign_server(client_fd, result, &mut errmsg); libc::free(result as *mut c_void); if !errmsg.is_null() { libc::free(errmsg as *mut c_void); } } libc::fflush(ptr::null_mut()); // flush stdout close_socket(client_fd); } } // ── Pool main: threads mode ────────────────────────────────────────────────── unsafe fn pool_main_threads(config: &PoolConfig, socket_path: *const c_char, tmpdir: *const c_char, shm_basename: *const c_char) -> i32 { extern "C" { fn start_daemon(socket_path: *const c_char, tmpdir: *const c_char, shm_basename: *const c_char, size: usize, errmsg: *mut *mut c_char) -> *mut c_void; fn close_daemon(daemon: *mut *mut c_void); fn wait_for_client_with_timeout(daemon: *mut c_void, timeout_us: i32, errmsg: *mut *mut c_char) -> i32; } let mut errmsg: *mut c_char = ptr::null_mut(); let mut daemon = start_daemon(socket_path, tmpdir, shm_basename, 0xffff, &mut errmsg); if !errmsg.is_null() { libc::fprintf(libc::fdopen(2, b"w\0".as_ptr() as *const c_char), b"Failed to start language server:\n%s\n\0".as_ptr() as *const c_char, errmsg); libc::free(errmsg as *mut c_void); return 1; } let queue = Arc::new(JobQueue::new()); let nthreads = config.initial_workers.max(1) as usize; TOTAL_WORKERS.store(nthreads as i32, Ordering::Relaxed); let mut handles = Vec::with_capacity(nthreads); for _ in 0..nthreads { let q = Arc::clone(&queue); let cfg = ptr::read(config); // Copy config for thread handles.push(std::thread::spawn(move || { worker_loop(&q, &cfg); })); } while !SHUTTING_DOWN.load(Ordering::Relaxed) { let client_fd = wait_for_client_with_timeout(daemon, 10000, &mut errmsg); if !errmsg.is_null() { libc::free(errmsg as *mut c_void); errmsg = ptr::null_mut(); } if client_fd > 0 { queue.push(client_fd); } // Dynamic scaling: spawn a new worker if all are busy if config.dynamic_scaling { let busy = BUSY_COUNT.load(Ordering::Relaxed); let total = TOTAL_WORKERS.load(Ordering::Relaxed); if busy >= total { let q = Arc::clone(&queue); let cfg = ptr::read(config); handles.push(std::thread::spawn(move || { worker_loop(&q, &cfg); })); TOTAL_WORKERS.fetch_add(1, Ordering::Relaxed); } } } SHUTTING_DOWN.store(true, Ordering::Relaxed); queue.cond.notify_all(); for h in handles { let _ = h.join(); } close_daemon(&mut daemon); 0 } // ── Pool main: single mode ─────────────────────────────────────────────────── unsafe fn pool_main_single(config: &PoolConfig, socket_path: *const c_char, tmpdir: *const c_char, shm_basename: *const c_char) -> i32 { extern "C" { fn start_daemon(socket_path: *const c_char, tmpdir: *const c_char, shm_basename: *const c_char, size: usize, errmsg: *mut *mut c_char) -> *mut c_void; fn close_daemon(daemon: *mut *mut c_void); fn wait_for_client_with_timeout(daemon: *mut c_void, timeout_us: i32, errmsg: *mut *mut c_char) -> i32; fn stream_from_client(fd: i32, errmsg: *mut *mut c_char) -> *mut u8; fn send_packet_to_foreign_server(fd: i32, packet: *mut u8, errmsg: *mut *mut c_char) -> usize; fn close_socket(fd: i32); } let mut errmsg: *mut c_char = ptr::null_mut(); let mut daemon = start_daemon(socket_path, tmpdir, shm_basename, 0xffff, &mut errmsg); if !errmsg.is_null() { libc::fprintf(libc::fdopen(2, b"w\0".as_ptr() as *const c_char), b"Failed to start language server:\n%s\n\0".as_ptr() as *const c_char, errmsg); libc::free(errmsg as *mut c_void); return 1; } while !SHUTTING_DOWN.load(Ordering::Relaxed) { let client_fd = wait_for_client_with_timeout(daemon, 10000, &mut errmsg); if !errmsg.is_null() { libc::free(errmsg as *mut c_void); errmsg = ptr::null_mut(); } if client_fd <= 0 { continue; } let data = stream_from_client(client_fd, &mut errmsg); if data.is_null() || !errmsg.is_null() { if !errmsg.is_null() { try_send_fail(client_fd, errmsg); libc::free(errmsg as *mut c_void); errmsg = ptr::null_mut(); } libc::free(data as *mut c_void); close_socket(client_fd); continue; } let result = pool_dispatch_packet(data, config.local_dispatch, config.remote_dispatch, config.dispatch_ctx); libc::free(data as *mut c_void); if !result.is_null() { send_packet_to_foreign_server(client_fd, result, &mut errmsg); libc::free(result as *mut c_void); if !errmsg.is_null() { libc::free(errmsg as *mut c_void); errmsg = ptr::null_mut(); } } libc::fflush(ptr::null_mut()); close_socket(client_fd); } close_daemon(&mut daemon); 0 } // ── Pool main: fork mode ───────────────────────────────────────────────────── unsafe fn pool_main_fork(config: &PoolConfig, socket_path: *const c_char, tmpdir: *const c_char, shm_basename: *const c_char) -> i32 { extern "C" { fn start_daemon(socket_path: *const c_char, tmpdir: *const c_char, shm_basename: *const c_char, size: usize, errmsg: *mut *mut c_char) -> *mut c_void; fn close_daemon(daemon: *mut *mut c_void); fn wait_for_client_with_timeout(daemon: *mut c_void, timeout_us: i32, errmsg: *mut *mut c_char) -> i32; fn stream_from_client(fd: i32, errmsg: *mut *mut c_char) -> *mut u8; fn send_packet_to_foreign_server(fd: i32, packet: *mut u8, errmsg: *mut *mut c_char) -> usize; fn close_socket(fd: i32); fn shinit(basename: *const c_char, volume: usize, size: usize, errmsg: *mut *mut c_char) -> *mut c_void; } let mut errmsg: *mut c_char = ptr::null_mut(); let mut daemon = start_daemon(socket_path, tmpdir, shm_basename, 0xffff, &mut errmsg); if !errmsg.is_null() { libc::fprintf(libc::fdopen(2, b"w\0".as_ptr() as *const c_char), b"Failed to start language server:\n%s\n\0".as_ptr() as *const c_char, errmsg); libc::free(errmsg as *mut c_void); return 1; } // Create socketpair for fd passing let mut sv = [0i32; 2]; if libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, sv.as_mut_ptr()) < 0 { close_daemon(&mut daemon); return 1; } // Shared busy counter via mmap let shared_counter = libc::mmap( ptr::null_mut(), std::mem::size_of::(), libc::PROT_READ | libc::PROT_WRITE, libc::MAP_SHARED | libc::MAP_ANONYMOUS, -1, 0, ) as *mut AtomicI32; if shared_counter == libc::MAP_FAILED as *mut AtomicI32 { libc::close(sv[0]); libc::close(sv[1]); close_daemon(&mut daemon); return 1; } (*shared_counter).store(0, Ordering::Relaxed); SHARED_BUSY = shared_counter; let nworkers = config.initial_workers.max(1); let mut child_pids: Vec = Vec::new(); for i in 0..nworkers { let pid = libc::fork(); if pid < 0 { break; } if pid == 0 { // Child libc::close(sv[1]); // close write end // Get daemon server_fd from opaque pointer and close it // (we can't access the struct fields directly since daemon is *mut c_void, // but the child doesn't need to accept connections) if let Some(pfk) = config.post_fork_child { pfk(config.dispatch_ctx); } shinit(shm_basename, (i + 1) as usize, 0xffff, &mut errmsg); if !errmsg.is_null() { // Print the error to stderr before exiting so the nexus can // capture it via the pool's redirected stderr file. Without // this, a failed shinit in a forked worker child leaves no // diagnostic trace anywhere. libc::fprintf( libc::fdopen(2, b"w\0".as_ptr() as *const c_char), b"Worker %d shinit failed: %s\n\0".as_ptr() as *const c_char, i as i32, errmsg, ); libc::free(errmsg as *mut c_void); libc::_exit(1); } // Worker loop: receive fds and process loop { if SHUTTING_DOWN.load(Ordering::Relaxed) { break; } let mut pfd = libc::pollfd { fd: sv[0], events: libc::POLLIN, revents: 0 }; let ready = libc::poll(&mut pfd, 1, 100); if ready <= 0 { continue; } let client_fd = recv_fd(sv[0]); if client_fd < 0 { break; } let data = stream_from_client(client_fd, &mut errmsg); if data.is_null() || !errmsg.is_null() { if !errmsg.is_null() { try_send_fail(client_fd, errmsg); libc::free(errmsg as *mut c_void); errmsg = ptr::null_mut(); } libc::free(data as *mut c_void); close_socket(client_fd); continue; } let result = pool_dispatch_packet(data, config.local_dispatch, config.remote_dispatch, config.dispatch_ctx); libc::free(data as *mut c_void); if !result.is_null() { send_packet_to_foreign_server(client_fd, result, &mut errmsg); libc::free(result as *mut c_void); if !errmsg.is_null() { libc::free(errmsg as *mut c_void); errmsg = ptr::null_mut(); } } libc::fflush(ptr::null_mut()); close_socket(client_fd); } libc::close(sv[0]); libc::_exit(0); } child_pids.push(pid); } TOTAL_WORKERS.store(child_pids.len() as i32, Ordering::Relaxed); // Parent: accept loop while !SHUTTING_DOWN.load(Ordering::Relaxed) { let client_fd = wait_for_client_with_timeout(daemon, 10000, &mut errmsg); if !errmsg.is_null() { libc::free(errmsg as *mut c_void); errmsg = ptr::null_mut(); } if client_fd > 0 { send_fd(sv[1], client_fd); close_socket(client_fd); } // Reap dead children for pid in child_pids.iter_mut() { if *pid > 0 { let mut wstatus: i32 = 0; if libc::waitpid(*pid, &mut wstatus, libc::WNOHANG) > 0 { *pid = -1; } } } } // Shutdown for &pid in &child_pids { if pid > 0 { libc::kill(pid, libc::SIGTERM); } } for &pid in &child_pids { if pid > 0 { libc::waitpid(pid, ptr::null_mut(), 0); } } libc::close(sv[0]); libc::close(sv[1]); libc::munmap(shared_counter as *mut c_void, std::mem::size_of::()); SHARED_BUSY = ptr::null_mut(); close_daemon(&mut daemon); 0 } // fd-passing helpers unsafe fn send_fd(sock: i32, fd: i32) -> i32 { let mut buf = [0u8; 1]; let mut iov = libc::iovec { iov_base: buf.as_mut_ptr() as *mut c_void, iov_len: 1 }; let cmsg_space = libc::CMSG_SPACE(std::mem::size_of::() as u32) as usize; let mut cmsg_buf = vec![0u8; cmsg_space]; let mut msg: libc::msghdr = std::mem::zeroed(); msg.msg_iov = &mut iov; msg.msg_iovlen = 1; msg.msg_control = cmsg_buf.as_mut_ptr() as *mut c_void; msg.msg_controllen = cmsg_space as _; let cmsg = libc::CMSG_FIRSTHDR(&msg); (*cmsg).cmsg_level = libc::SOL_SOCKET; (*cmsg).cmsg_type = libc::SCM_RIGHTS; (*cmsg).cmsg_len = libc::CMSG_LEN(std::mem::size_of::() as u32) as _; ptr::copy_nonoverlapping(&fd as *const i32 as *const u8, libc::CMSG_DATA(cmsg), std::mem::size_of::()); if libc::sendmsg(sock, &msg, 0) >= 0 { 0 } else { -1 } } unsafe fn recv_fd(sock: i32) -> i32 { let mut buf = [0u8; 1]; let mut iov = libc::iovec { iov_base: buf.as_mut_ptr() as *mut c_void, iov_len: 1 }; let cmsg_space = libc::CMSG_SPACE(std::mem::size_of::() as u32) as usize; let mut cmsg_buf = vec![0u8; cmsg_space]; let mut msg: libc::msghdr = std::mem::zeroed(); msg.msg_iov = &mut iov; msg.msg_iovlen = 1; msg.msg_control = cmsg_buf.as_mut_ptr() as *mut c_void; msg.msg_controllen = cmsg_space as _; let n = libc::recvmsg(sock, &mut msg, 0); if n <= 0 { return -1; } let cmsg = libc::CMSG_FIRSTHDR(&msg); if cmsg.is_null() || (*cmsg).cmsg_level != libc::SOL_SOCKET || (*cmsg).cmsg_type != libc::SCM_RIGHTS { return -1; } let mut fd: i32 = 0; ptr::copy_nonoverlapping(libc::CMSG_DATA(cmsg), &mut fd as *mut i32 as *mut u8, std::mem::size_of::()); fd } // ── Entry point ────────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn pool_main( argc: i32, argv: *mut *mut c_char, config: *mut PoolConfig, ) -> i32 { if argc != 4 { libc::fprintf(libc::fdopen(2, b"w\0".as_ptr() as *const c_char), b"Usage: %s \n\0".as_ptr() as *const c_char, if argc > 0 { *argv } else { b"pool\0".as_ptr() as *const c_char }); return 1; } let cfg = &mut *config; if cfg.initial_workers <= 0 { cfg.initial_workers = 1; } SHUTTING_DOWN.store(false, Ordering::Relaxed); BUSY_COUNT.store(0, Ordering::Relaxed); // SIGTERM handler let mut sa: libc::sigaction = std::mem::zeroed(); sa.sa_sigaction = pool_sigterm_handler as *const () as usize; libc::sigemptyset(&mut sa.sa_mask); libc::sigaction(libc::SIGTERM, &sa, ptr::null_mut()); let socket_path = *argv.add(1); let tmpdir = *argv.add(2); let shm_basename = *argv.add(3); match cfg.concurrency { PoolConcurrency::Threads => pool_main_threads(cfg, socket_path, tmpdir, shm_basename), PoolConcurrency::Fork => pool_main_fork(cfg, socket_path, tmpdir, shm_basename), PoolConcurrency::Single => pool_main_single(cfg, socket_path, tmpdir, shm_basename), } } ================================================ FILE: data/rust/morloc-runtime/src/router_ffi.rs ================================================ //! C ABI wrappers for router subsystems. //! Replaces router.c. Routes requests to per-program daemons. use std::ffi::{c_char, c_void, CStr, CString}; use std::ptr; use std::sync::atomic::{AtomicBool, Ordering}; use std::time::Instant; use crate::daemon_ffi::{ DaemonConfig, DaemonResponse, MorlocSocket, }; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; use crate::http_ffi::{DaemonMethod, DaemonRequest, HttpMethod, HttpRequest}; // -- Constants ---------------------------------------------------------------- /// Max size of sun_path in sockaddr_un (108 on Linux) const SUN_PATH_LEN: usize = 108; // Daemon startup polling (exponential backoff, ~5s total). // Sum of 100 * 1.25^i for i in 0..16 is ~4650ms. const DAEMON_POLL_INITIAL_MS: f64 = 100.0; const DAEMON_POLL_MULTIPLIER: f64 = 1.25; const DAEMON_POLL_MAX_RETRIES: usize = 16; // -- Global state ------------------------------------------------------------- static ROUTER_SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false); extern "C" fn router_signal_handler_fn(_sig: i32) { ROUTER_SHUTDOWN_REQUESTED.store(true, Ordering::Relaxed); } // -- C-compatible types ------------------------------------------------------- #[repr(C)] pub struct RouterProgram { pub name: *mut c_char, pub manifest_path: *mut c_char, pub manifest: *mut c_void, // manifest_t* pub daemon_pid: libc::pid_t, pub daemon_socket: [c_char; SUN_PATH_LEN], } #[repr(C)] pub struct Router { pub programs: *mut RouterProgram, pub n_programs: usize, pub fdb_path: *mut c_char, } // -- router_init -------------------------------------------------------------- #[no_mangle] pub unsafe extern "C" fn router_init( fdb_path: *const c_char, errmsg: *mut *mut c_char, ) -> *mut Router { clear_errmsg(errmsg); extern "C" { fn read_manifest(path: *const c_char, errmsg: *mut *mut c_char) -> *mut c_void; } let dir = libc::opendir(fdb_path); if dir.is_null() { let errno_msg = CStr::from_ptr(libc::strerror(crate::utility::errno_val())) .to_string_lossy(); let path_str = CStr::from_ptr(fdb_path).to_string_lossy(); set_errmsg( errmsg, &MorlocError::Other(format!( "Cannot open fdb directory '{}': {}", path_str, errno_msg )), ); return ptr::null_mut(); } let router = libc::calloc(1, std::mem::size_of::()) as *mut Router; (*router).fdb_path = libc::strdup(fdb_path); let mut cap: usize = 8; (*router).programs = libc::calloc(cap, std::mem::size_of::()) as *mut RouterProgram; (*router).n_programs = 0; loop { let entry = libc::readdir(dir); if entry.is_null() { break; } let name = CStr::from_ptr((*entry).d_name.as_ptr()); let name_str = name.to_string_lossy(); if name_str.len() < 10 || !name_str.ends_with(".manifest") { continue; } // Grow array if needed if (*router).n_programs >= cap { cap *= 2; (*router).programs = libc::realloc( (*router).programs as *mut c_void, cap * std::mem::size_of::(), ) as *mut RouterProgram; } let prog = &mut *(*router).programs.add((*router).n_programs); ptr::write_bytes(prog as *mut RouterProgram, 0, 1); // Extract program name (filename without .manifest) let prog_name_len = name_str.len() - 9; let prog_name = &name_str[..prog_name_len]; let c_prog_name = CString::new(prog_name).unwrap_or_default(); prog.name = libc::strdup(c_prog_name.as_ptr()); // Build full path let fdb_str = CStr::from_ptr(fdb_path).to_string_lossy(); let full_path = format!("{}/{}", fdb_str, name_str); let c_path = CString::new(full_path).unwrap_or_default(); prog.manifest_path = libc::strdup(c_path.as_ptr()); // Read and parse manifest let mut child_err: *mut c_char = ptr::null_mut(); prog.manifest = read_manifest(prog.manifest_path, &mut child_err); if !child_err.is_null() { let err_str = CStr::from_ptr(child_err).to_string_lossy(); let path_str = CStr::from_ptr(prog.manifest_path).to_string_lossy(); eprintln!("router: warning: failed to parse {}: {}", path_str, err_str); libc::free(child_err as *mut c_void); libc::free(prog.name as *mut c_void); libc::free(prog.manifest_path as *mut c_void); continue; } prog.daemon_pid = 0; // Set socket path let socket_path = format!("/tmp/morloc-router-{}.sock", prog_name); let c_socket = CString::new(socket_path).unwrap_or_default(); let socket_bytes = c_socket.as_bytes_with_nul(); let copy_len = socket_bytes.len().min(SUN_PATH_LEN); ptr::copy_nonoverlapping( socket_bytes.as_ptr() as *const c_char, prog.daemon_socket.as_mut_ptr(), copy_len, ); (*router).n_programs += 1; } libc::closedir(dir); // Empty fdb is fine -- programs can be added while the router is running router } // -- router_free -------------------------------------------------------------- #[no_mangle] pub unsafe extern "C" fn router_free(router: *mut Router) { if router.is_null() { return; } extern "C" { fn free_manifest(manifest: *mut c_void); } for i in 0..(*router).n_programs { let prog = &mut *(*router).programs.add(i); libc::free(prog.name as *mut c_void); libc::free(prog.manifest_path as *mut c_void); if !prog.manifest.is_null() { free_manifest(prog.manifest); } if prog.daemon_pid > 0 { libc::kill(prog.daemon_pid, libc::SIGTERM); } } libc::free((*router).programs as *mut c_void); libc::free((*router).fdb_path as *mut c_void); libc::free(router as *mut c_void); } // -- morloc-nexus path resolution --------------------------------------------- /// Locate the morloc-nexus executable. /// /// Tries, in order: /// 1. `$MORLOC_NEXUS` (explicit override) /// 2. `$MORLOC_HOME/bin/morloc-nexus` (deploy convention) /// 3. `morloc-nexus` on `$PATH` /// 4. `$HOME/.local/bin/morloc-nexus` (bare-metal developer install) /// /// Returns the path on the first candidate whose `access(_, X_OK)` succeeds, /// or the list of attempted paths on failure. unsafe fn find_morloc_nexus() -> Result> { fn is_executable(path: &str) -> bool { if let Ok(c) = CString::new(path) { unsafe { libc::access(c.as_ptr(), libc::X_OK) == 0 } } else { false } } fn getenv_str(name: &str) -> Option { let c_name = CString::new(name).ok()?; let p = unsafe { libc::getenv(c_name.as_ptr()) }; if p.is_null() { None } else { Some(unsafe { CStr::from_ptr(p) }.to_string_lossy().into_owned()) } } let mut tried: Vec = Vec::new(); // 1. $MORLOC_NEXUS if let Some(p) = getenv_str("MORLOC_NEXUS") { if is_executable(&p) { return Ok(p); } tried.push(format!("$MORLOC_NEXUS={}", p)); } // 2. $MORLOC_HOME/bin/morloc-nexus if let Some(h) = getenv_str("MORLOC_HOME") { let p = format!("{}/bin/morloc-nexus", h); if is_executable(&p) { return Ok(p); } tried.push(p); } // 3. Search $PATH if let Some(path) = getenv_str("PATH") { for dir in path.split(':') { if dir.is_empty() { continue; } let p = format!("{}/morloc-nexus", dir); if is_executable(&p) { return Ok(p); } } tried.push(format!("$PATH ({})", path)); } // 4. $HOME/.local/bin/morloc-nexus if let Some(h) = getenv_str("HOME") { let p = format!("{}/.local/bin/morloc-nexus", h); if is_executable(&p) { return Ok(p); } tried.push(p); } Err(tried) } // -- router_start_program ----------------------------------------------------- #[no_mangle] pub unsafe extern "C" fn router_start_program( prog: *mut RouterProgram, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); let nexus_path = match find_morloc_nexus() { Ok(p) => p, Err(tried) => { set_errmsg( errmsg, &MorlocError::Other(format!( "morloc-nexus binary not found; tried: {}", tried.join(", ") )), ); return false; } }; let c_nexus = CString::new(nexus_path.as_str()).unwrap_or_default(); let pid = libc::fork(); if pid == 0 { // Child: exec morloc-nexus with --daemon libc::setpgid(0, 0); let arg_nexus = CString::new("morloc-nexus").unwrap(); let arg_daemon = CString::new("--daemon").unwrap(); let arg_socket = CString::new("--socket").unwrap(); let socket_path = CStr::from_ptr((*prog).daemon_socket.as_ptr()); libc::execl( c_nexus.as_ptr(), arg_nexus.as_ptr(), (*prog).manifest_path, arg_daemon.as_ptr(), arg_socket.as_ptr(), socket_path.as_ptr(), ptr::null::(), ); // If exec fails let prog_name = CStr::from_ptr((*prog).name).to_string_lossy(); let errno_msg = CStr::from_ptr(libc::strerror(crate::utility::errno_val())) .to_string_lossy(); eprintln!( "router: failed to exec morloc-nexus for {}: {}", prog_name, errno_msg ); libc::_exit(1); } else if pid > 0 { (*prog).daemon_pid = pid; // Poll until the daemon socket is connectable (exponential backoff) let mut delay_ms = DAEMON_POLL_INITIAL_MS; let mut connected = false; for _attempt in 0..DAEMON_POLL_MAX_RETRIES { let ts = libc::timespec { tv_sec: 0, tv_nsec: (delay_ms * 1_000_000.0) as i64, }; libc::nanosleep(&ts, ptr::null_mut()); // Check if child died during startup let mut status: i32 = 0; let result = libc::waitpid(pid, &mut status, libc::WNOHANG); if result == pid { (*prog).daemon_pid = 0; let prog_name = CStr::from_ptr((*prog).name).to_string_lossy(); set_errmsg( errmsg, &MorlocError::Other(format!( "Daemon for '{}' exited during startup (status {})", prog_name, status )), ); return false; } // Try connecting to the daemon socket let test_sock = libc::socket(libc::AF_UNIX, libc::SOCK_STREAM, 0); if test_sock >= 0 { let mut addr: libc::sockaddr_un = std::mem::zeroed(); addr.sun_family = libc::AF_UNIX as libc::sa_family_t; let socket_path = (*prog).daemon_socket.as_ptr(); let path_bytes = CStr::from_ptr(socket_path).to_bytes(); let copy_len = path_bytes.len().min(addr.sun_path.len() - 1); ptr::copy_nonoverlapping( path_bytes.as_ptr() as *const c_char, addr.sun_path.as_mut_ptr(), copy_len, ); let rc = libc::connect( test_sock, &addr as *const libc::sockaddr_un as *const libc::sockaddr, std::mem::size_of::() as libc::socklen_t, ); libc::close(test_sock); if rc == 0 { connected = true; break; } } delay_ms *= DAEMON_POLL_MULTIPLIER; } if !connected { // Final check: did the daemon die? let mut status: i32 = 0; let result = libc::waitpid(pid, &mut status, libc::WNOHANG); if result == pid { (*prog).daemon_pid = 0; let prog_name = CStr::from_ptr((*prog).name).to_string_lossy(); set_errmsg( errmsg, &MorlocError::Other(format!( "Daemon for '{}' exited during startup (status {})", prog_name, status )), ); return false; } // Daemon alive but socket not yet connectable -- proceed anyway, // router_forward() will retry on connect failure. } true } else { let errno_msg = CStr::from_ptr(libc::strerror(crate::utility::errno_val())) .to_string_lossy(); set_errmsg( errmsg, &MorlocError::Other(format!("fork failed: {}", errno_msg)), ); false } } // -- router_forward ----------------------------------------------------------- #[no_mangle] pub unsafe extern "C" fn router_forward( router: *mut Router, program: *const c_char, request: *mut DaemonRequest, errmsg: *mut *mut c_char, ) -> *mut DaemonResponse { clear_errmsg(errmsg); extern "C" { fn daemon_parse_response( json: *const c_char, len: usize, errmsg: *mut *mut c_char, ) -> *mut DaemonResponse; } // Find program let program_name = CStr::from_ptr(program); let mut prog: *mut RouterProgram = ptr::null_mut(); for i in 0..(*router).n_programs { let p = (*router).programs.add(i); if CStr::from_ptr((*p).name) == program_name { prog = p; break; } } if prog.is_null() { set_errmsg( errmsg, &MorlocError::Other(format!( "Unknown program: {}", program_name.to_string_lossy() )), ); return ptr::null_mut(); } // Check if a previously-started daemon has exited (crash recovery) if (*prog).daemon_pid > 0 { let mut status: i32 = 0; let result = libc::waitpid((*prog).daemon_pid, &mut status, libc::WNOHANG); if result == (*prog).daemon_pid || result < 0 { let prog_name = CStr::from_ptr((*prog).name).to_string_lossy(); eprintln!( "router: daemon for '{}' exited (status {}), will restart", prog_name, status ); (*prog).daemon_pid = 0; } } // Start daemon if not running if (*prog).daemon_pid <= 0 { let mut child_err: *mut c_char = ptr::null_mut(); if !router_start_program(prog, &mut child_err) { if !child_err.is_null() { *errmsg = child_err; } else { set_errmsg( errmsg, &MorlocError::Other("Failed to start program daemon".into()), ); } return ptr::null_mut(); } } // Serialize request to JSON let req_json = serialize_request_to_json(request); let c_req = CString::new(req_json.as_str()).unwrap_or_default(); let req_len = req_json.len(); // Try to connect, retry once on failure let sock = connect_to_daemon(prog, errmsg); let sock = if sock < 0 { // Try restarting daemon (*prog).daemon_pid = 0; // Clear previous error if !(*errmsg).is_null() { libc::free(*errmsg as *mut c_void); *errmsg = ptr::null_mut(); } let mut child_err: *mut c_char = ptr::null_mut(); if !router_start_program(prog, &mut child_err) { if !child_err.is_null() { *errmsg = child_err; } return ptr::null_mut(); } let sock2 = connect_to_daemon(prog, errmsg); if sock2 < 0 { return ptr::null_mut(); } sock2 } else { sock }; // Send length-prefixed message let len_buf: [u8; 4] = [ ((req_len >> 24) & 0xFF) as u8, ((req_len >> 16) & 0xFF) as u8, ((req_len >> 8) & 0xFF) as u8, (req_len & 0xFF) as u8, ]; let n = libc::send( sock, len_buf.as_ptr() as *const c_void, 4, crate::utility::SEND_NOSIGNAL, ); if n != 4 { libc::close(sock); set_errmsg( errmsg, &MorlocError::Other("Failed to send request length to daemon".into()), ); return ptr::null_mut(); } let mut total_sent: usize = 0; while total_sent < req_len { let n = libc::send( sock, c_req.as_ptr().add(total_sent) as *const c_void, req_len - total_sent, crate::utility::SEND_NOSIGNAL, ); if n <= 0 { libc::close(sock); set_errmsg( errmsg, &MorlocError::Other("Failed to send request body to daemon".into()), ); return ptr::null_mut(); } total_sent += n as usize; } // Read response length let mut resp_len_buf = [0u8; 4]; let n = libc::recv( sock, resp_len_buf.as_mut_ptr() as *mut c_void, 4, libc::MSG_WAITALL, ); if n != 4 { libc::close(sock); set_errmsg( errmsg, &MorlocError::Other("Failed to read response length from daemon".into()), ); return ptr::null_mut(); } let resp_len = ((resp_len_buf[0] as u32) << 24) | ((resp_len_buf[1] as u32) << 16) | ((resp_len_buf[2] as u32) << 8) | (resp_len_buf[3] as u32); let resp_json = libc::malloc(resp_len as usize + 1) as *mut c_char; if resp_json.is_null() { libc::close(sock); set_errmsg( errmsg, &MorlocError::Other("Failed to allocate response buffer".into()), ); return ptr::null_mut(); } let mut total_recv: usize = 0; while total_recv < resp_len as usize { let n = libc::recv( sock, resp_json.add(total_recv) as *mut c_void, resp_len as usize - total_recv, 0, ); if n <= 0 { libc::free(resp_json as *mut c_void); libc::close(sock); set_errmsg( errmsg, &MorlocError::Other("Failed to read response body from daemon".into()), ); return ptr::null_mut(); } total_recv += n as usize; } *resp_json.add(resp_len as usize) = 0; libc::close(sock); let resp = daemon_parse_response(resp_json, resp_len as usize, errmsg); libc::free(resp_json as *mut c_void); resp } /// Helper: connect to a program daemon's unix socket with 60s timeouts. unsafe fn connect_to_daemon( prog: *mut RouterProgram, errmsg: *mut *mut c_char, ) -> i32 { let sock = libc::socket(libc::AF_UNIX, libc::SOCK_STREAM, 0); if sock < 0 { set_errmsg( errmsg, &MorlocError::Other("Failed to create socket".into()), ); return -1; } crate::utility::set_nosigpipe(sock); let tv = libc::timeval { tv_sec: 60, tv_usec: 0, }; libc::setsockopt( sock, libc::SOL_SOCKET, libc::SO_RCVTIMEO, &tv as *const libc::timeval as *const c_void, std::mem::size_of::() as libc::socklen_t, ); libc::setsockopt( sock, libc::SOL_SOCKET, libc::SO_SNDTIMEO, &tv as *const libc::timeval as *const c_void, std::mem::size_of::() as libc::socklen_t, ); let mut addr: libc::sockaddr_un = std::mem::zeroed(); addr.sun_family = libc::AF_UNIX as libc::sa_family_t; let socket_path = (*prog).daemon_socket.as_ptr(); let path_bytes = CStr::from_ptr(socket_path).to_bytes(); let copy_len = path_bytes.len().min(addr.sun_path.len() - 1); ptr::copy_nonoverlapping( path_bytes.as_ptr() as *const c_char, addr.sun_path.as_mut_ptr(), copy_len, ); if libc::connect( sock, &addr as *const libc::sockaddr_un as *const libc::sockaddr, std::mem::size_of::() as libc::socklen_t, ) < 0 { libc::close(sock); let prog_name = CStr::from_ptr((*prog).name).to_string_lossy(); set_errmsg( errmsg, &MorlocError::Other(format!( "Failed to connect to daemon for '{}'", prog_name )), ); return -1; } sock } /// Serialize a DaemonRequest to JSON using serde_json. unsafe fn serialize_request_to_json(request: *mut DaemonRequest) -> String { let mut map = serde_json::Map::new(); if !(*request).id.is_null() { let id = CStr::from_ptr((*request).id).to_string_lossy(); map.insert("id".into(), serde_json::Value::String(id.into_owned())); } let method_str = match (*request).method { DaemonMethod::Call => "call", DaemonMethod::Discover => "discover", DaemonMethod::Health => "health", DaemonMethod::Eval => "eval", DaemonMethod::Typecheck => "typecheck", DaemonMethod::Bind => "bind", DaemonMethod::Bindings => "bindings", DaemonMethod::Unbind => "unbind", }; map.insert( "method".into(), serde_json::Value::String(method_str.into()), ); if !(*request).command.is_null() { let cmd = CStr::from_ptr((*request).command).to_string_lossy(); map.insert( "command".into(), serde_json::Value::String(cmd.into_owned()), ); } if !(*request).args_json.is_null() { let args_str = CStr::from_ptr((*request).args_json).to_string_lossy(); // Try to parse as JSON value to embed directly if let Ok(v) = serde_json::from_str::(&args_str) { map.insert("args".into(), v); } } if !(*request).expr.is_null() { let expr = CStr::from_ptr((*request).expr).to_string_lossy(); map.insert("expr".into(), serde_json::Value::String(expr.into_owned())); } if !(*request).name.is_null() { let name = CStr::from_ptr((*request).name).to_string_lossy(); map.insert("name".into(), serde_json::Value::String(name.into_owned())); } serde_json::to_string(&map).unwrap_or_else(|_| "{}".into()) } // -- router_build_discovery --------------------------------------------------- #[no_mangle] pub unsafe extern "C" fn router_build_discovery(router: *mut Router) -> *mut c_char { // Walk the canonical Manifest C struct from manifest_ffi.rs. No // local mirror -- the in-memory layout is shared. use crate::manifest_ffi::Manifest as ManifestC; #[derive(serde::Serialize)] struct CommandInfo { name: String, r#type: String, return_type: String, } #[derive(serde::Serialize)] struct ProgramInfo { name: String, running: bool, #[serde(skip_serializing_if = "Option::is_none")] commands: Option>, } #[derive(serde::Serialize)] struct Discovery { programs: Vec, } let mut programs = Vec::with_capacity((*router).n_programs); for i in 0..(*router).n_programs { let prog = &*(*router).programs.add(i); let name = CStr::from_ptr(prog.name).to_string_lossy().into_owned(); let running = prog.daemon_pid > 0 && libc::kill(prog.daemon_pid, 0) == 0; let commands = if !prog.manifest.is_null() { let mv = prog.manifest as *const ManifestC; let mut cmds = Vec::with_capacity((*mv).n_commands); for c in 0..(*mv).n_commands { let cmd = &*(*mv).commands.add(c); let cmd_name = CStr::from_ptr(cmd.name).to_string_lossy().into_owned(); let cmd_type = if cmd.is_pure { "pure" } else { "remote" }; let ret_type = if !cmd.ret.type_desc.is_null() { CStr::from_ptr(cmd.ret.type_desc) .to_string_lossy() .into_owned() } else { String::new() }; cmds.push(CommandInfo { name: cmd_name, r#type: cmd_type.into(), return_type: ret_type, }); } Some(cmds) } else { None }; programs.push(ProgramInfo { name, running, commands, }); } let disco = Discovery { programs }; let json = serde_json::to_string(&disco).unwrap_or_else(|_| "{}".into()); let c = CString::new(json).unwrap_or_default(); libc::strdup(c.as_ptr()) } // -- Router HTTP request routing ---------------------------------------------- /// Route HTTP requests for the router. Sets *out_program to the target program /// name (caller-owned) for per-program requests, or NULL for router-level requests. unsafe fn router_http_to_request( req: *mut HttpRequest, out_program: *mut *mut c_char, errmsg: *mut *mut c_char, ) -> *mut DaemonRequest { clear_errmsg(errmsg); let dreq = libc::calloc(1, std::mem::size_of::()) as *mut DaemonRequest; if dreq.is_null() { set_errmsg( errmsg, &MorlocError::Other("Failed to allocate daemon_request_t".into()), ); return ptr::null_mut(); } *out_program = ptr::null_mut(); let path = CStr::from_ptr((*req).path.as_ptr()) .to_str() .unwrap_or(""); let method = (*req).method; let body_str = if !(*req).body.is_null() && (*req).body_len > 0 { std::str::from_utf8(std::slice::from_raw_parts( (*req).body as *const u8, (*req).body_len, )) .unwrap_or("") } else { "" }; // GET /health or GET /health/ if method == HttpMethod::Get && (path == "/health" || path.starts_with("/health/")) { (*dreq).method = DaemonMethod::Health; if path.starts_with("/health/") { let prog_name = &path[8..]; if !prog_name.is_empty() { let c = CString::new(prog_name).unwrap_or_default(); *out_program = libc::strdup(c.as_ptr()); } } return dreq; } // GET /programs or GET /discover if method == HttpMethod::Get && (path == "/programs" || path == "/discover") { (*dreq).method = DaemonMethod::Discover; return dreq; } // GET /discover/ if method == HttpMethod::Get && path.starts_with("/discover/") { let prog_name = &path[10..]; if !prog_name.is_empty() { let c = CString::new(prog_name).unwrap_or_default(); *out_program = libc::strdup(c.as_ptr()); (*dreq).method = DaemonMethod::Discover; return dreq; } } // POST /eval if method == HttpMethod::Post && path == "/eval" { (*dreq).method = DaemonMethod::Eval; if !body_str.is_empty() { if let Ok(v) = serde_json::from_str::(body_str) { if let Some(expr) = v.get("expr").and_then(|e| e.as_str()) { let c = CString::new(expr).unwrap_or_default(); (*dreq).expr = libc::strdup(c.as_ptr()); } } } if (*dreq).expr.is_null() { libc::free(dreq as *mut c_void); set_errmsg( errmsg, &MorlocError::Other("Missing 'expr' field in /eval request body".into()), ); return ptr::null_mut(); } return dreq; } // POST /call// if method == HttpMethod::Post && path.starts_with("/call/") { let rest = &path[6..]; let slash = rest.find('/'); match slash { Some(pos) if pos + 1 < rest.len() => { let prog_name = &rest[..pos]; let cmd_name = &rest[pos + 1..]; let c_prog = CString::new(prog_name).unwrap_or_default(); *out_program = libc::strdup(c_prog.as_ptr()); (*dreq).method = DaemonMethod::Call; let c_cmd = CString::new(cmd_name).unwrap_or_default(); (*dreq).command = libc::strdup(c_cmd.as_ptr()); // Parse body for args let trimmed = body_str.trim(); if trimmed.starts_with('[') { let c = CString::new(trimmed).unwrap_or_default(); (*dreq).args_json = libc::strdup(c.as_ptr()); } else if trimmed.starts_with('{') { if let Ok(v) = serde_json::from_str::(trimmed) { if let Some(args) = v.get("args") { let args_str = serde_json::to_string(args).unwrap_or_default(); let c = CString::new(args_str).unwrap_or_default(); (*dreq).args_json = libc::strdup(c.as_ptr()); } } } return dreq; } _ => { libc::free(dreq as *mut c_void); set_errmsg( errmsg, &MorlocError::Other("Expected /call//".into()), ); return ptr::null_mut(); } } } // OPTIONS (CORS) if method == HttpMethod::Options { (*dreq).method = DaemonMethod::Health; return dreq; } libc::free(dreq as *mut c_void); let method_str = match method { HttpMethod::Get => "GET", HttpMethod::Post => "POST", HttpMethod::Delete => "DELETE", HttpMethod::Options => "OPTIONS", }; set_errmsg( errmsg, &MorlocError::Other(format!("Unknown router endpoint: {} {}", method_str, path)), ); ptr::null_mut() } // -- Router event loop -------------------------------------------------------- const ROUTER_MAX_LISTENERS: usize = 3; #[no_mangle] pub unsafe extern "C" fn router_run(config: *mut DaemonConfig, router: *mut Router) { extern "C" { fn http_parse_request(fd: i32, errmsg: *mut *mut c_char) -> *mut HttpRequest; fn http_free_request(req: *mut HttpRequest); fn http_write_response( fd: i32, status: i32, content_type: *const c_char, body: *const c_char, body_len: usize, ) -> bool; fn daemon_dispatch( manifest: *mut c_void, request: *mut DaemonRequest, sockets: *mut MorlocSocket, shm_basename: *const c_char, ) -> *mut DaemonResponse; fn daemon_serialize_response( response: *mut DaemonResponse, out_len: *mut usize, ) -> *mut c_char; fn daemon_free_request(req: *mut DaemonRequest); fn daemon_free_response(resp: *mut DaemonResponse); fn daemon_set_eval_timeout(timeout_sec: i32); fn manifest_to_discovery_json(manifest: *const c_void) -> *mut c_char; } daemon_set_eval_timeout((*config).eval_timeout); // Install signal handlers ROUTER_SHUTDOWN_REQUESTED.store(false, Ordering::Relaxed); let handler: libc::sighandler_t = std::mem::transmute::(router_signal_handler_fn); libc::signal(libc::SIGTERM, handler); libc::signal(libc::SIGINT, handler); let mut fds = [libc::pollfd { fd: -1, events: 0, revents: 0, }; ROUTER_MAX_LISTENERS]; let mut nfds: usize = 0; let ct = b"application/json\0"; // HTTP listener if (*config).http_port > 0 { let http_fd = libc::socket(libc::AF_INET, libc::SOCK_STREAM, 0); if http_fd < 0 { eprintln!("router: failed to create http socket"); return; } let opt: i32 = 1; libc::setsockopt( http_fd, libc::SOL_SOCKET, libc::SO_REUSEADDR, &opt as *const i32 as *const c_void, std::mem::size_of::() as libc::socklen_t, ); let mut addr: libc::sockaddr_in = std::mem::zeroed(); addr.sin_family = libc::AF_INET as libc::sa_family_t; addr.sin_addr.s_addr = libc::INADDR_ANY; addr.sin_port = ((*config).http_port as u16).to_be(); if libc::bind( http_fd, &addr as *const libc::sockaddr_in as *const libc::sockaddr, std::mem::size_of::() as libc::socklen_t, ) < 0 { eprintln!( "router: failed to bind http port {}", (*config).http_port ); libc::close(http_fd); return; } libc::listen(http_fd, 16); eprintln!("router: listening on http port {}", (*config).http_port); fds[nfds].fd = http_fd; fds[nfds].events = libc::POLLIN as i16; nfds += 1; } // Unix socket if !(*config).unix_socket_path.is_null() { let sock_fd = libc::socket(libc::AF_UNIX, libc::SOCK_STREAM, 0); if sock_fd < 0 { eprintln!("router: failed to create unix socket"); return; } let mut addr: libc::sockaddr_un = std::mem::zeroed(); addr.sun_family = libc::AF_UNIX as libc::sa_family_t; let path_bytes = CStr::from_ptr((*config).unix_socket_path).to_bytes(); let copy_len = path_bytes.len().min(addr.sun_path.len() - 1); ptr::copy_nonoverlapping( path_bytes.as_ptr() as *const c_char, addr.sun_path.as_mut_ptr(), copy_len, ); libc::unlink((*config).unix_socket_path); if libc::bind( sock_fd, &addr as *const libc::sockaddr_un as *const libc::sockaddr, std::mem::size_of::() as libc::socklen_t, ) < 0 { eprintln!("router: failed to bind unix socket"); libc::close(sock_fd); return; } libc::listen(sock_fd, 16); eprintln!( "router: listening on unix socket {}", CStr::from_ptr((*config).unix_socket_path).to_string_lossy() ); fds[nfds].fd = sock_fd; fds[nfds].events = libc::POLLIN as i16; nfds += 1; } if nfds == 0 { eprintln!("router: no listeners configured"); return; } // Eagerly start all program daemons so /health reports ok immediately for i in 0..(*router).n_programs { let prog = &mut *(*router).programs.add(i); if (*prog).daemon_pid <= 0 { let mut child_err: *mut c_char = ptr::null_mut(); if router_start_program(prog, &mut child_err) { eprintln!( "router: started daemon for '{}'", CStr::from_ptr((*prog).name).to_string_lossy() ); } else { let err_msg = if !child_err.is_null() { let s = CStr::from_ptr(child_err).to_string_lossy().to_string(); libc::free(child_err as *mut c_void); s } else { "unknown error".to_string() }; eprintln!( "router: warning: failed to start daemon for '{}': {}", CStr::from_ptr((*prog).name).to_string_lossy(), err_msg ); } } } while !ROUTER_SHUTDOWN_REQUESTED.load(Ordering::Relaxed) { let ready = libc::poll(fds.as_mut_ptr(), nfds as libc::nfds_t, 1000); if ready < 0 { if crate::utility::errno_val() == libc::EINTR { continue; } eprintln!("router: poll error"); break; } if ready == 0 { continue; } for i in 0..nfds { if fds[i].revents & libc::POLLIN as i16 == 0 { continue; } let client_fd = libc::accept(fds[i].fd, ptr::null_mut(), ptr::null_mut()); if client_fd < 0 { continue; } let req_start = Instant::now(); crate::utility::set_nosigpipe(client_fd); let tv = libc::timeval { tv_sec: 30, tv_usec: 0, }; libc::setsockopt( client_fd, libc::SOL_SOCKET, libc::SO_RCVTIMEO, &tv as *const libc::timeval as *const c_void, std::mem::size_of::() as libc::socklen_t, ); libc::setsockopt( client_fd, libc::SOL_SOCKET, libc::SO_SNDTIMEO, &tv as *const libc::timeval as *const c_void, std::mem::size_of::() as libc::socklen_t, ); let mut err: *mut c_char = ptr::null_mut(); let http_req = http_parse_request(client_fd, &mut err); if !err.is_null() { let body = b"{\"status\":\"error\",\"error\":\"Bad request\"}\0"; http_write_response( client_fd, 400, ct.as_ptr() as *const c_char, body.as_ptr() as *const c_char, body.len() - 1, ); libc::free(err as *mut c_void); let elapsed = req_start.elapsed(); eprintln!("router: ??? ??? -> 400 ({:.1}ms)", elapsed.as_secs_f64() * 1000.0); libc::close(client_fd); continue; } // Extract method and path for access logging before request is consumed let log_method = match (*http_req).method { HttpMethod::Get => "GET", HttpMethod::Post => "POST", HttpMethod::Delete => "DELETE", HttpMethod::Options => "OPTIONS", }; let log_path_cstr = CStr::from_ptr((*http_req).path.as_ptr()); let log_path = log_path_cstr.to_str().unwrap_or("???").to_string(); let mut target_program: *mut c_char = ptr::null_mut(); let dreq = router_http_to_request(http_req, &mut target_program, &mut err); http_free_request(http_req); if !err.is_null() { let err_json = make_error_json(&CStr::from_ptr(err).to_string_lossy()); let c = CString::new(err_json.as_str()).unwrap_or_default(); http_write_response( client_fd, 404, ct.as_ptr() as *const c_char, c.as_ptr(), err_json.len(), ); libc::free(err as *mut c_void); let elapsed = req_start.elapsed(); eprintln!("router: {} {} -> 404 ({:.1}ms)", log_method, log_path, elapsed.as_secs_f64() * 1000.0); libc::close(client_fd); continue; } // Track response status for access log let mut resp_status: i32 = 200; // Router-level requests if target_program.is_null() { if (*dreq).method == DaemonMethod::Health { // Aggregate per-program health let mut all_ok = true; let mut prog_entries = Vec::new(); for i in 0..(*router).n_programs { let prog = &*(*router).programs.add(i); let name = CStr::from_ptr(prog.name).to_string_lossy(); let alive = prog.daemon_pid > 0 && libc::kill(prog.daemon_pid, 0) == 0; if !alive { all_ok = false; } let status_str = if alive { "ok" } else { "error" }; prog_entries.push(serde_json::json!({ "program": name.as_ref(), "status": status_str, })); } let overall = if all_ok { "ok" } else { "degraded" }; let body = serde_json::json!({ "status": overall, "programs": prog_entries, }).to_string(); let status_code = if all_ok { 200 } else { 503 }; resp_status = status_code; let c = CString::new(body.as_str()).unwrap_or_default(); http_write_response( client_fd, status_code, ct.as_ptr() as *const c_char, c.as_ptr(), body.len(), ); } else if (*dreq).method == DaemonMethod::Discover { let disco = router_build_discovery(router); let disco_len = libc::strlen(disco); http_write_response( client_fd, 200, ct.as_ptr() as *const c_char, disco, disco_len, ); libc::free(disco as *mut c_void); } else if (*dreq).method == DaemonMethod::Eval { // daemon_dispatch takes manifest as first arg, NULL is fine for eval let resp = daemon_dispatch(ptr::null_mut(), dreq, ptr::null_mut(), ptr::null()); let mut resp_len: usize = 0; let resp_json = daemon_serialize_response(resp, &mut resp_len); let status = if (*resp).success { 200 } else { 500 }; resp_status = status; http_write_response( client_fd, status, ct.as_ptr() as *const c_char, resp_json, resp_len, ); libc::free(resp_json as *mut c_void); daemon_free_response(resp); } daemon_free_request(dreq); let elapsed = req_start.elapsed(); eprintln!("router: {} {} -> {} ({:.1}ms)", log_method, log_path, resp_status, elapsed.as_secs_f64() * 1000.0); libc::close(client_fd); continue; } // Per-program request if (*dreq).method == DaemonMethod::Health { let mut found = false; for p in 0..(*router).n_programs { let rprog = &*(*router).programs.add(p); if CStr::from_ptr(rprog.name) == CStr::from_ptr(target_program) { found = true; let alive = rprog.daemon_pid > 0 && libc::kill(rprog.daemon_pid, 0) == 0; let prog_str = CStr::from_ptr(rprog.name).to_string_lossy(); let body = if alive { serde_json::json!({ "status": "ok", "program": prog_str.as_ref(), }).to_string() } else { resp_status = 503; serde_json::json!({ "status": "error", "program": prog_str.as_ref(), "error": "daemon not running", }).to_string() }; let c = CString::new(body.as_str()).unwrap_or_default(); http_write_response( client_fd, resp_status, ct.as_ptr() as *const c_char, c.as_ptr(), body.len(), ); break; } } if !found { resp_status = 404; let body = b"{\"status\":\"error\",\"error\":\"Unknown program\"}\0"; http_write_response( client_fd, 404, ct.as_ptr() as *const c_char, body.as_ptr() as *const c_char, body.len() - 1, ); } } else if (*dreq).method == DaemonMethod::Discover { let mut found = false; for p in 0..(*router).n_programs { let rprog = &*(*router).programs.add(p); if CStr::from_ptr(rprog.name) == CStr::from_ptr(target_program) { if !rprog.manifest.is_null() { let disco = manifest_to_discovery_json(rprog.manifest); let disco_len = libc::strlen(disco); http_write_response( client_fd, 200, ct.as_ptr() as *const c_char, disco, disco_len, ); libc::free(disco as *mut c_void); found = true; } break; } } if !found { resp_status = 404; let body = b"{\"status\":\"error\",\"error\":\"Unknown program\"}\0"; http_write_response( client_fd, 404, ct.as_ptr() as *const c_char, body.as_ptr() as *const c_char, body.len() - 1, ); } } else { // Forward to program daemon let resp = router_forward(router, target_program, dreq, &mut err); if !err.is_null() { resp_status = 500; let err_json = make_error_json(&CStr::from_ptr(err).to_string_lossy()); let c = CString::new(err_json.as_str()).unwrap_or_default(); http_write_response( client_fd, 500, ct.as_ptr() as *const c_char, c.as_ptr(), err_json.len(), ); libc::free(err as *mut c_void); } else { let mut resp_len: usize = 0; let resp_json = daemon_serialize_response(resp, &mut resp_len); let status = if (*resp).success { 200 } else { 500 }; resp_status = status; http_write_response( client_fd, status, ct.as_ptr() as *const c_char, resp_json, resp_len, ); libc::free(resp_json as *mut c_void); daemon_free_response(resp); } } libc::free(target_program as *mut c_void); daemon_free_request(dreq); let elapsed = req_start.elapsed(); eprintln!("router: {} {} -> {} ({:.1}ms)", log_method, log_path, resp_status, elapsed.as_secs_f64() * 1000.0); libc::close(client_fd); } } // Kill all program daemons for i in 0..(*router).n_programs { let prog = &*(*router).programs.add(i); if prog.daemon_pid > 0 { libc::kill(prog.daemon_pid, libc::SIGTERM); libc::unlink(prog.daemon_socket.as_ptr()); } } // Wait for children for i in 0..(*router).n_programs { let prog = &*(*router).programs.add(i); if prog.daemon_pid > 0 { libc::waitpid(prog.daemon_pid, ptr::null_mut(), 0); } } // Close listeners for i in 0..nfds { libc::close(fds[i].fd); } if !(*config).unix_socket_path.is_null() { libc::unlink((*config).unix_socket_path); } } /// Build a JSON error response string. fn make_error_json(error: &str) -> String { let map: serde_json::Map = [ ("status".into(), serde_json::Value::String("error".into())), ("error".into(), serde_json::Value::String(error.into())), ] .into_iter() .collect(); serde_json::to_string(&map).unwrap_or_else(|_| "{}".into()) } ================================================ FILE: data/rust/morloc-runtime/src/schema.rs ================================================ use crate::error::MorlocError; /// Morloc serial type identifiers, matching the C enum morloc_serial_type. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(u32)] pub enum SerialType { Nil = 0, Bool = 1, Sint8 = 2, Sint16 = 3, Sint32 = 4, Sint64 = 5, Uint8 = 6, Uint16 = 7, Uint32 = 8, Uint64 = 9, Float32 = 10, Float64 = 11, Tensor = 12, String = 13, Array = 14, Tuple = 15, Map = 16, Optional = 17, } /// Schema character codes for parsing schema strings. const SCHEMA_NIL: u8 = b'z'; const SCHEMA_BOOL: u8 = b'b'; const SCHEMA_SINT: u8 = b'i'; const SCHEMA_UINT: u8 = b'u'; const SCHEMA_FLOAT: u8 = b'f'; const SCHEMA_STRING: u8 = b's'; const SCHEMA_ARRAY: u8 = b'a'; const SCHEMA_TENSOR: u8 = b'T'; const SCHEMA_TUPLE: u8 = b't'; const SCHEMA_MAP: u8 = b'm'; const SCHEMA_OPTIONAL: u8 = b'?'; /// Recursive schema definition, mirroring the C Schema struct. #[derive(Debug, Clone)] pub struct Schema { pub serial_type: SerialType, /// Number of parameters (e.g., array has 1, tuple has N). pub size: usize, /// Byte width when stored in a fixed-width array. pub width: usize, /// Field offsets for tuples/records, or ndim storage for tensors. pub offsets: Vec, /// Optional type hint string. pub hint: Option, /// Child schemas (element type for arrays, field types for tuples, etc.). pub parameters: Vec, /// Field names for records (None for non-record types). pub keys: Vec, } impl Schema { pub fn primitive(serial_type: SerialType) -> Self { use crate::shm; let width = match serial_type { SerialType::Nil => 0, SerialType::Bool | SerialType::Sint8 | SerialType::Uint8 => 1, SerialType::Sint16 | SerialType::Uint16 => 2, SerialType::Sint32 | SerialType::Uint32 | SerialType::Float32 => 4, SerialType::Sint64 | SerialType::Uint64 | SerialType::Float64 => 8, SerialType::String => std::mem::size_of::(), _ => 0, }; Schema { serial_type, size: 0, width, offsets: Vec::new(), hint: None, parameters: Vec::new(), keys: Vec::new(), } } /// Returns true if this type has a fixed byte width (no variable-length data). pub fn is_fixed_width(&self) -> bool { match self.serial_type { SerialType::Nil | SerialType::Bool | SerialType::Sint8 | SerialType::Sint16 | SerialType::Sint32 | SerialType::Sint64 | SerialType::Uint8 | SerialType::Uint16 | SerialType::Uint32 | SerialType::Uint64 | SerialType::Float32 | SerialType::Float64 => true, SerialType::Tuple => self.parameters.iter().all(|p| p.is_fixed_width()), SerialType::Optional => false, _ => false, } } /// Alignment requirement for this type. pub fn alignment(&self) -> usize { match self.serial_type { SerialType::Nil => 1, SerialType::Bool | SerialType::Sint8 | SerialType::Uint8 => 1, SerialType::Sint16 | SerialType::Uint16 => 2, SerialType::Sint32 | SerialType::Uint32 | SerialType::Float32 => 4, SerialType::Sint64 | SerialType::Uint64 | SerialType::Float64 => 8, SerialType::String | SerialType::Array | SerialType::Map | SerialType::Tensor => { std::mem::size_of::() // pointer-sized alignment } SerialType::Tuple => { self.parameters .iter() .map(|p| p.alignment()) .max() .unwrap_or(1) } SerialType::Optional => { if let Some(inner) = self.parameters.first() { std::cmp::max(1, inner.alignment()) } else { 1 } } } } } /// Parse a schema string into a Schema tree. /// /// Positional format (no parentheses/commas): /// - `z` -> Nil, `b` -> Bool, `s` -> String /// - `i4` -> Sint32, `u8` -> Uint64, `f8` -> Float64 /// - `ai4` -> Array of Sint32 /// - `t2i4s` -> Tuple of (Sint32, String) /// - `m24namesi4` -> Map with keys "name"->String, "i4" (base-62 field count, then key-len + key + value for each) /// - `?i4` -> Optional Sint32 /// - `T2f8` -> 2D Tensor of Float64 /// - `i4` -> Sint32 with hint annotation pub fn parse_schema(input: &str) -> Result { let bytes = input.as_bytes(); let (schema, consumed) = parse_schema_r(bytes, 0)?; if consumed != bytes.len() { return Err(MorlocError::Schema(format!( "trailing characters after schema at position {consumed}" ))); } Ok(schema) } /// Recursive schema parser matching the C `parse_schema_r` format exactly. fn parse_schema_r(bytes: &[u8], pos: usize) -> Result<(Schema, usize), MorlocError> { if pos >= bytes.len() { return Err(MorlocError::Schema("unexpected end of schema".into())); } let c = bytes[pos]; let cur = pos + 1; match c { b'<' => { // Hint: <...> with nesting support, then parse the actual type let (hint, after_hint) = parse_hint(bytes, cur)?; let (mut schema, end) = parse_schema_r(bytes, after_hint)?; schema.hint = Some(hint); Ok((schema, end)) } SCHEMA_NIL => Ok((Schema::primitive(SerialType::Nil), cur)), SCHEMA_BOOL => Ok((Schema::primitive(SerialType::Bool), cur)), SCHEMA_STRING => { // String schema has one parameter (uint8) for array compatibility, // matching the C string_schema() constructor. Ok((Schema { serial_type: SerialType::String, size: 1, width: std::mem::size_of::(), offsets: Vec::new(), hint: None, parameters: vec![Schema::primitive(SerialType::Uint8)], keys: Vec::new(), }, cur)) } SCHEMA_SINT => parse_sized_int(bytes, cur, true), SCHEMA_UINT => parse_sized_int(bytes, cur, false), SCHEMA_FLOAT => parse_sized_float(bytes, cur), SCHEMA_ARRAY => { // Array: one child schema follows immediately let (child, end) = parse_schema_r(bytes, cur)?; Ok((make_array_schema(child), end)) } SCHEMA_OPTIONAL => { // Optional: one child schema follows immediately let (child, end) = parse_schema_r(bytes, cur)?; Ok((make_optional_schema(child), end)) } SCHEMA_TUPLE => { // Tuple: base-62 size char, then N child schemas if cur >= bytes.len() { return Err(MorlocError::Schema("expected tuple size".into())); } let n = decode_base62(bytes[cur])?; let mut params = Vec::with_capacity(n); let mut p = cur + 1; for _ in 0..n { let (child, end) = parse_schema_r(bytes, p)?; params.push(child); p = end; } Ok((make_tuple_schema(params), p)) } SCHEMA_MAP => { // Map/record: base-62 size char, then N (key_len_char + key_bytes + value_schema) if cur >= bytes.len() { return Err(MorlocError::Schema("expected map size".into())); } let n = decode_base62(bytes[cur])?; let mut params = Vec::with_capacity(n); let mut keys = Vec::with_capacity(n); let mut p = cur + 1; for _ in 0..n { // Read key: base-62 length char + that many bytes if p >= bytes.len() { return Err(MorlocError::Schema("expected map key length".into())); } let key_len = decode_base62(bytes[p])?; p += 1; if p + key_len > bytes.len() { return Err(MorlocError::Schema("map key extends past end".into())); } let key = std::str::from_utf8(&bytes[p..p + key_len]) .map_err(|_| MorlocError::Schema("invalid UTF-8 in map key".into()))? .to_string(); p += key_len; keys.push(key); // Read value schema let (child, end) = parse_schema_r(bytes, p)?; params.push(child); p = end; } Ok((make_map_schema(params, keys), p)) } SCHEMA_TENSOR => { // Tensor: base-62 ndim char, then element schema if cur >= bytes.len() { return Err(MorlocError::Schema("expected tensor ndim".into())); } let ndim = decode_base62(bytes[cur])?; let (child, end) = parse_schema_r(bytes, cur + 1)?; Ok((make_tensor_schema(ndim, child), end)) } _ => Err(MorlocError::Schema(format!( "unknown schema character '{}' at position {pos}", c as char ))), } } /// Parse hint with nested angle bracket support: `>` etc. fn parse_hint(bytes: &[u8], pos: usize) -> Result<(String, usize), MorlocError> { let mut depth: usize = 1; let start = pos; let mut cur = pos; while cur < bytes.len() { match bytes[cur] { b'<' => depth += 1, b'>' => { depth -= 1; if depth == 0 { let hint = std::str::from_utf8(&bytes[start..cur]) .unwrap_or("") .to_string(); return Ok((hint, cur + 1)); // skip closing '>' } } _ => {} } cur += 1; } Err(MorlocError::Schema("unclosed '<' in hint".into())) } fn parse_sized_int( bytes: &[u8], pos: usize, signed: bool, ) -> Result<(Schema, usize), MorlocError> { if pos >= bytes.len() { return Err(MorlocError::Schema("expected size after 'i'/'u'".into())); } // Size is a SINGLE base-62 character, not a multi-digit number let size = decode_base62(bytes[pos])?; let next = pos + 1; let st = match (signed, size) { (true, 1) => SerialType::Sint8, (true, 2) => SerialType::Sint16, (true, 4) => SerialType::Sint32, (true, 8) => SerialType::Sint64, (false, 1) => SerialType::Uint8, (false, 2) => SerialType::Uint16, (false, 4) => SerialType::Uint32, (false, 8) => SerialType::Uint64, _ => return Err(MorlocError::Schema(format!("invalid integer size {size}"))), }; Ok((Schema::primitive(st), next)) } fn parse_sized_float(bytes: &[u8], pos: usize) -> Result<(Schema, usize), MorlocError> { if pos >= bytes.len() { return Err(MorlocError::Schema("expected size after 'f'".into())); } // Size is a SINGLE base-62 character, not a multi-digit number let size = decode_base62(bytes[pos])?; let next = pos + 1; let st = match size { 4 => SerialType::Float32, 8 => SerialType::Float64, _ => return Err(MorlocError::Schema(format!("invalid float size {size}"))), }; Ok((Schema::primitive(st), next)) } /// Decode a single base-62 character to a number (0-63). /// 0-9 -> 0-9, a-z -> 10-35, A-Z -> 36-61, + -> 62, / -> 63 fn decode_base62(c: u8) -> Result { match c { b'0'..=b'9' => Ok((c - b'0') as usize), b'a'..=b'z' => Ok((c - b'a') as usize + 10), b'A'..=b'Z' => Ok((c - b'A') as usize + 36), b'+' => Ok(62), b'/' => Ok(63), _ => Err(MorlocError::Schema(format!( "invalid base-62 size character '{}'", c as char ))), } } fn encode_base62(n: usize) -> char { match n { 0..=9 => (b'0' + n as u8) as char, 10..=35 => (b'a' + (n - 10) as u8) as char, 36..=61 => (b'A' + (n - 36) as u8) as char, 62 => '+', 63 => '/', _ => '\x07', // bell - error } } // ── Schema constructors ──────────────────────────────────────────────────── fn make_array_schema(child: Schema) -> Schema { Schema { serial_type: SerialType::Array, size: 1, width: std::mem::size_of::(), offsets: Vec::new(), hint: None, parameters: vec![child], keys: Vec::new(), } } fn make_optional_schema(child: Schema) -> Schema { let align = child.alignment().max(1); let inner_offset = crate::shm::align_up(1, align); Schema { serial_type: SerialType::Optional, size: 1, width: inner_offset + child.width, offsets: vec![inner_offset], hint: None, parameters: vec![child], keys: Vec::new(), } } fn make_tuple_schema(params: Vec) -> Schema { let (width, offsets) = calculate_tuple_layout(¶ms); let size = params.len(); Schema { serial_type: SerialType::Tuple, size, width, offsets, hint: None, parameters: params, keys: Vec::new(), } } fn make_map_schema(params: Vec, keys: Vec) -> Schema { let (width, offsets) = calculate_tuple_layout(¶ms); let size = params.len(); Schema { serial_type: SerialType::Map, size, width, offsets, hint: None, parameters: params, keys, } } fn make_tensor_schema(ndim: usize, child: Schema) -> Schema { Schema { serial_type: SerialType::Tensor, size: 1, width: std::mem::size_of::(), offsets: vec![ndim], hint: None, parameters: vec![child], keys: Vec::new(), } } /// Calculate byte offsets for tuple fields (C struct layout with natural alignment). fn calculate_tuple_layout(params: &[Schema]) -> (usize, Vec) { let mut offsets = Vec::with_capacity(params.len()); let mut offset: usize = 0; let mut max_align: usize = 1; for param in params { let align = param.alignment(); max_align = std::cmp::max(max_align, align); // Align the offset offset = (offset + align - 1) & !(align - 1); offsets.push(offset); offset += param.width; } // Total width padded to max alignment let width = (offset + max_align - 1) & !(max_align - 1); (width, offsets) } /// Render a schema back to its string representation. pub fn schema_to_string(schema: &Schema) -> String { let mut buf = String::new(); schema_to_string_inner(schema, &mut buf); buf } fn schema_to_string_inner(schema: &Schema, buf: &mut String) { // Write hint if present if let Some(ref hint) = schema.hint { buf.push('<'); buf.push_str(hint); buf.push('>'); } match schema.serial_type { SerialType::Nil => buf.push('z'), SerialType::Bool => buf.push('b'), SerialType::Sint8 => buf.push_str("i1"), SerialType::Sint16 => buf.push_str("i2"), SerialType::Sint32 => buf.push_str("i4"), SerialType::Sint64 => buf.push_str("i8"), SerialType::Uint8 => buf.push_str("u1"), SerialType::Uint16 => buf.push_str("u2"), SerialType::Uint32 => buf.push_str("u4"), SerialType::Uint64 => buf.push_str("u8"), SerialType::Float32 => buf.push_str("f4"), SerialType::Float64 => buf.push_str("f8"), SerialType::String => buf.push('s'), SerialType::Array => { buf.push('a'); schema_to_string_inner(&schema.parameters[0], buf); } SerialType::Tuple => { buf.push('t'); buf.push(encode_base62(schema.size)); for p in &schema.parameters { schema_to_string_inner(p, buf); } } SerialType::Map => { buf.push('m'); buf.push(encode_base62(schema.size)); for (i, p) in schema.parameters.iter().enumerate() { if i < schema.keys.len() { let key = &schema.keys[i]; buf.push(encode_base62(key.len())); buf.push_str(key); } schema_to_string_inner(p, buf); } } SerialType::Optional => { buf.push('?'); schema_to_string_inner(&schema.parameters[0], buf); } SerialType::Tensor => { let ndim = schema.offsets.first().copied().unwrap_or(0); buf.push('T'); buf.push(encode_base62(ndim)); schema_to_string_inner(&schema.parameters[0], buf); } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_parse_primitives() { assert_eq!(parse_schema("z").unwrap().serial_type, SerialType::Nil); assert_eq!(parse_schema("b").unwrap().serial_type, SerialType::Bool); assert_eq!(parse_schema("i4").unwrap().serial_type, SerialType::Sint32); assert_eq!(parse_schema("u8").unwrap().serial_type, SerialType::Uint64); assert_eq!(parse_schema("f8").unwrap().serial_type, SerialType::Float64); assert_eq!(parse_schema("s").unwrap().serial_type, SerialType::String); } #[test] fn test_parse_array() { let s = parse_schema("ai4").unwrap(); assert_eq!(s.serial_type, SerialType::Array); assert_eq!(s.parameters.len(), 1); assert_eq!(s.parameters[0].serial_type, SerialType::Sint32); } #[test] fn test_parse_tuple() { let s = parse_schema("t3i4sf8").unwrap(); assert_eq!(s.serial_type, SerialType::Tuple); assert_eq!(s.parameters.len(), 3); } #[test] fn test_parse_nested() { let s = parse_schema("at2i4s").unwrap(); assert_eq!(s.serial_type, SerialType::Array); assert_eq!(s.parameters[0].serial_type, SerialType::Tuple); assert_eq!(s.parameters[0].parameters.len(), 2); } #[test] fn test_parse_map() { let s = parse_schema("m21as1bi4").unwrap(); assert_eq!(s.serial_type, SerialType::Map); assert_eq!(s.parameters.len(), 2); assert_eq!(s.keys[0], "a"); assert_eq!(s.keys[1], "b"); } #[test] fn test_parse_optional() { let s = parse_schema("?f8").unwrap(); assert_eq!(s.serial_type, SerialType::Optional); assert_eq!(s.parameters[0].serial_type, SerialType::Float64); } #[test] fn test_parse_tensor() { let s = parse_schema("T2f8").unwrap(); assert_eq!(s.serial_type, SerialType::Tensor); assert_eq!(s.offsets[0], 2); // ndim assert_eq!(s.parameters[0].serial_type, SerialType::Float64); } #[test] fn test_parse_with_hints() { let s = parse_schema("f8").unwrap(); assert_eq!(s.serial_type, SerialType::Float64); assert_eq!(s.hint.as_deref(), Some("float")); // Nested hints: > let s = parse_schema(">ai4").unwrap(); assert_eq!(s.serial_type, SerialType::Array); assert_eq!(s.hint.as_deref(), Some("std::vector<$1>")); } #[test] fn test_roundtrip() { let cases = ["z", "b", "i4", "u8", "f8", "s", "ai4", "t2i4s", "?i4", "T2f8"]; for case in cases { let schema = parse_schema(case).unwrap(); let rendered = schema_to_string(&schema); assert_eq!(rendered, case, "roundtrip failed for '{case}'"); } } #[test] fn test_roundtrip_map() { let input = "m24names4infoi4"; let schema = parse_schema(input).unwrap(); assert_eq!(schema.serial_type, SerialType::Map); assert_eq!(schema.keys, vec!["name", "info"]); let rendered = schema_to_string(&schema); assert_eq!(rendered, input); } } #[cfg(test)] mod compat_tests { use super::*; fn dump(label: &str, s: &Schema, depth: usize) { let indent = " ".repeat(depth); print!("{}{}: type={} size={} width={}", indent, label, s.serial_type as u32, s.size, s.width); if !s.offsets.is_empty() { print!(" offsets={:?}", s.offsets); } if let Some(ref h) = s.hint { print!(" hint=\"{}\"", h); } if !s.keys.is_empty() { print!(" keys={:?}", s.keys); } println!(); for (i, p) in s.parameters.iter().enumerate() { dump(&format!("param[{}]", i), p, depth + 1); } } #[test] fn test_schema_compat_with_c() { // These must match the C output exactly let cases = vec![ ("s", "type=13 size=1 width=16"), ("ai4", "type=14 size=1 width=16"), ("t2i4s", "type=15 size=2 width=24"), ("?i4", "type=17 size=1 width=8"), ("?s", "type=17 size=1 width=24"), ("T2f8", "type=12 size=1 width=32"), ]; for (input, expected_root) in &cases { let s = parse_schema(input).unwrap(); let got = format!("type={} size={} width={}", s.serial_type as u32, s.size, s.width); assert_eq!(&got, *expected_root, "Schema '{}' mismatch", input); } // Verify tuple offsets let t = parse_schema("t2i4s").unwrap(); assert_eq!(t.offsets, vec![0, 8], "t2i4s offsets"); // Verify optional offsets let o = parse_schema("?i4").unwrap(); assert_eq!(o.offsets, vec![4], "?i4 offsets"); let os = parse_schema("?s").unwrap(); assert_eq!(os.offsets, vec![8], "?s offsets"); // Verify string has uint8 parameter let s = parse_schema("s").unwrap(); assert_eq!(s.parameters.len(), 1); assert_eq!(s.parameters[0].serial_type, SerialType::Uint8); assert_eq!(s.parameters[0].width, 1); // Verify tensor let t = parse_schema("T2f8").unwrap(); assert_eq!(t.offsets, vec![2]); // ndim assert_eq!(t.width, 32); // sizeof(Tensor) } } ================================================ FILE: data/rust/morloc-runtime/src/shm.rs ================================================ //! Shared memory management with multi-volume support. //! //! Replaces shm.c / memory.h. Uses AtomicU32 + futex for cross-process locking //! instead of pthread_rwlock_t, providing crash-safety and portability. use crate::error::MorlocError; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::Mutex; /// Cross-platform file pre-allocation. /// Linux: posix_fallocate (allocates disk blocks). /// macOS: ftruncate (extends file, may be sparse). #[cfg(target_os = "linux")] unsafe fn preallocate_fd(fd: i32, size: i64) -> i32 { libc::posix_fallocate(fd, 0, size) } #[cfg(target_os = "macos")] unsafe fn preallocate_fd(fd: i32, size: i64) -> i32 { if libc::ftruncate(fd, size) == -1 { -1 } else { 0 } } // ── Constants ────────────────────────────────────────────────────────────── pub const SHM_MAGIC: u32 = 0xFECA_0DF0; pub const BLK_MAGIC: u32 = 0x0CB1_0DF0; pub const MAX_VOLUME_NUMBER: usize = 32; pub const MAX_FILENAME_SIZE: usize = 128; pub const MAX_PATH_SIZE: usize = 512; const LOCK_UNLOCKED: u32 = 0; const LOCK_LOCKED: u32 = 1; const SPIN_LIMIT: u32 = 40; #[cfg(target_os = "linux")] const LOCK_TIMEOUT_SECS: u64 = 5; // ── Pointer types ────────────────────────────────────────────────────────── /// Relative pointer: index into the multi-volume pool (cross-process safe). pub type RelPtr = isize; /// Volume-local pointer: offset within a single volume. pub type VolPtr = isize; /// Absolute pointer: virtual address in this process. pub type AbsPtr = *mut u8; pub const RELNULL: RelPtr = -1; pub const VOLNULL: VolPtr = -1; // ── Block alignment ──────────────────────────────────────────────────────── pub const BLOCK_ALIGN: usize = std::mem::align_of::(); #[inline] pub const fn align_up(x: usize, align: usize) -> usize { (x + align - 1) & !(align - 1) } // ── Shared memory header (lives in mmap'd region) ────────────────────────── #[repr(C)] pub struct ShmHeader { pub magic: u32, pub volume_name: [u8; MAX_FILENAME_SIZE], pub volume_index: i32, pub volume_size: usize, pub relative_offset: usize, pub lock: AtomicU32, pub cursor: VolPtr, } #[repr(C)] pub struct BlockHeader { pub magic: u32, pub reference_count: AtomicU32, pub size: usize, } const _: () = assert!( std::mem::size_of::() == std::mem::size_of::() + std::mem::size_of::() + std::mem::size_of::() ); // ── Voidstar data structures (used by serialization) ─────────────────────── /// Variable-length array/string representation in SHM. #[derive(Clone, Copy)] #[repr(C)] pub struct Array { pub size: usize, pub data: RelPtr, } /// N-dimensional dense tensor in SHM. #[repr(C)] pub struct Tensor { pub total_elements: usize, pub device_type: u32, pub device_id: u32, pub data: RelPtr, pub shape: RelPtr, } // ── Send wrapper for raw pointers ────────────────────────────────────────── #[derive(Clone, Copy)] struct SendPtr(*mut ShmHeader); // SAFETY: ShmHeader lives in mmap'd shared memory that outlives all threads. // Access is serialized via VOLUMES Mutex and per-volume AtomicU32 futex locks. unsafe impl Send for SendPtr {} impl SendPtr { const fn null() -> Self { SendPtr(std::ptr::null_mut()) } fn is_null(&self) -> bool { self.0.is_null() } fn ptr(&self) -> *mut ShmHeader { self.0 } fn set(&mut self, p: *mut ShmHeader) { self.0 = p; } } fn get_cstr_buf(buf: &[u8; MAX_FILENAME_SIZE]) -> &str { get_cstr(buf.as_slice()) } // ── Global state ─────────────────────────────────────────────────────────── static CURRENT_VOLUME: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); static VOLUMES: Mutex<[SendPtr; MAX_VOLUME_NUMBER]> = Mutex::new([SendPtr::null(); MAX_VOLUME_NUMBER]); static ALLOC_MUTEX: Mutex<()> = Mutex::new(()); static COMMON_BASENAME: Mutex<[u8; MAX_FILENAME_SIZE]> = Mutex::new([0u8; MAX_FILENAME_SIZE]); static FALLBACK_DIR: Mutex<[u8; MAX_FILENAME_SIZE]> = Mutex::new([0u8; MAX_FILENAME_SIZE]); fn set_cstr(buf: &mut [u8], s: &str) { let bytes = s.as_bytes(); let len = bytes.len().min(buf.len() - 1); buf[..len].copy_from_slice(&bytes[..len]); buf[len] = 0; } fn get_cstr(buf: &[u8]) -> &str { let end = buf.iter().position(|&b| b == 0).unwrap_or(buf.len()); std::str::from_utf8(&buf[..end]).unwrap_or("") } // ── Public API ───────────────────────────────────────────────────────────── /// Set fallback directory for file-backed SHM when /dev/shm is too small. pub fn shm_set_fallback_dir(dir: &str) { let mut fb = FALLBACK_DIR.lock().unwrap(); set_cstr(&mut *fb, dir); } /// Initialize a new SHM volume. pub fn shinit( shm_basename: &str, volume_index: usize, shm_size: usize, ) -> Result<*mut ShmHeader, MorlocError> { let full_size = shm_size + std::mem::size_of::(); let shm_name = format!("{}_{}", shm_basename, volume_index); // Store common basename { let mut cb = COMMON_BASENAME.lock().unwrap(); set_cstr(&mut *cb, shm_basename); } // Try POSIX shared memory first, fall back to file-backed let (fd, created, volume_label, actual_full_size) = try_open_shm(&shm_name, full_size)?; // SAFETY: mmap with MAP_SHARED on a valid fd obtained from shm_open/open above. // The returned pointer is checked against MAP_FAILED before use. let ptr = unsafe { libc::mmap( std::ptr::null_mut(), actual_full_size, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_SHARED, fd, 0, ) }; // SAFETY: fd is a valid file descriptor opened above. unsafe { libc::close(fd) }; if ptr == libc::MAP_FAILED { return Err(MorlocError::Shm(format!( "Failed to mmap volume '{}' ({} bytes)", volume_label, actual_full_size ))); } let shm = ptr as *mut ShmHeader; // Store in volumes array { let mut vols = VOLUMES.lock().unwrap(); vols[volume_index].set(shm); } let actual_data_size = actual_full_size - std::mem::size_of::(); if created { // SAFETY: shm points to the start of our mmap'd region of actual_full_size bytes. // We just created it, so we have exclusive access for initialization. unsafe { (*shm).magic = SHM_MAGIC; let mut name_buf = [0u8; MAX_FILENAME_SIZE]; set_cstr(&mut name_buf, &volume_label); (*shm).volume_name = name_buf; (*shm).volume_index = volume_index as i32; // Calculate relative offset from prior volumes let vols = VOLUMES.lock().unwrap(); let mut rel_offset = 0usize; for i in 0..volume_index { if !vols[i].is_null() { rel_offset += (*vols[i].ptr()).volume_size; } } (*shm).relative_offset = rel_offset; (*shm).volume_size = actual_data_size; (*shm).lock = AtomicU32::new(LOCK_UNLOCKED); (*shm).cursor = 0; // Initialize first block header let first_block = (shm as *mut u8).add(std::mem::size_of::()) as *mut BlockHeader; (*first_block).magic = BLK_MAGIC; (*first_block).reference_count = AtomicU32::new(0); (*first_block).size = actual_data_size - std::mem::size_of::(); } } Ok(shm) } /// Open an existing SHM volume (or return cached pointer). pub fn shopen(volume_index: usize) -> Result, MorlocError> { { let vols = VOLUMES.lock().unwrap(); if !vols[volume_index].is_null() { return Ok(Some(vols[volume_index].ptr())); } } let basename = { let cb = COMMON_BASENAME.lock().unwrap(); get_cstr_buf(&cb).to_string() }; if basename.is_empty() { return Ok(None); } let shm_name = format!("{}_{}", basename, volume_index); // Try POSIX SHM let name_cstr = std::ffi::CString::new(shm_name.as_str()).unwrap(); // SAFETY: name_cstr is a valid null-terminated CString. let fd = unsafe { libc::shm_open(name_cstr.as_ptr(), libc::O_RDWR, 0o666) }; let fd = if fd == -1 { // Try file-backed fallback let fb = FALLBACK_DIR.lock().unwrap(); let fallback = get_cstr_buf(&fb); if fallback.is_empty() { return Ok(None); } let file_path = format!("{}/{}", fallback, shm_name); let path_cstr = std::ffi::CString::new(file_path.as_str()).unwrap(); let fd2 = unsafe { libc::open(path_cstr.as_ptr(), libc::O_RDWR) }; if fd2 == -1 { return Ok(None); } fd2 } else { fd }; // SAFETY: zeroed memory is valid for libc::stat. fstat/close on valid fd. let mut sb: libc::stat = unsafe { std::mem::zeroed() }; if unsafe { libc::fstat(fd, &mut sb) } == -1 { unsafe { libc::close(fd) }; return Err(MorlocError::Shm(format!( "Cannot fstat SHM volume '{}'", shm_name ))); } let volume_size = sb.st_size as usize; // SAFETY: mmap with MAP_SHARED on a valid fd; result checked against MAP_FAILED. let ptr = unsafe { libc::mmap( std::ptr::null_mut(), volume_size, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_SHARED, fd, 0, ) }; // SAFETY: fd is a valid file descriptor opened above. unsafe { libc::close(fd) }; if ptr == libc::MAP_FAILED { return Err(MorlocError::Shm(format!( "Cannot mmap SHM volume '{}'", shm_name ))); } let shm = ptr as *mut ShmHeader; { let mut vols = VOLUMES.lock().unwrap(); vols[volume_index].set(shm); } Ok(Some(shm)) } /// Close and unlink all SHM volumes. pub fn shclose() -> Result<(), MorlocError> { let _lock = ALLOC_MUTEX.lock().unwrap(); let mut vols = VOLUMES.lock().unwrap(); for i in 0..MAX_VOLUME_NUMBER { let shm = if !vols[i].is_null() { vols[i].ptr() } else { continue; }; // SAFETY: shm is a valid mmap'd pointer stored in VOLUMES. // munmap/unlink on regions we own. Name read from valid ShmHeader. unsafe { let name = get_cstr(&(*shm).volume_name).to_string(); let full_size = (*shm).volume_size + std::mem::size_of::(); libc::munmap(shm as *mut libc::c_void, full_size); // Unlink: file-backed volumes start with '/', POSIX SHM does not if name.starts_with('/') { let cstr = std::ffi::CString::new(name.as_str()).unwrap(); libc::unlink(cstr.as_ptr()); } else { let cstr = std::ffi::CString::new(name.as_str()).unwrap(); libc::shm_unlink(cstr.as_ptr()); } } vols[i] = SendPtr::null(); } Ok(()) } /// Allocate `size` bytes from shared memory. pub fn shmalloc(size: usize) -> Result { // Allow 0-size: round up to minimum block alignment. // Needed for nil type (width=0) in morloc_eval. let size = if size == 0 { BLOCK_ALIGN } else { align_up(size, BLOCK_ALIGN) }; let _lock = ALLOC_MUTEX.lock().unwrap(); shmalloc_unlocked(size) } /// Copy data into a new SHM allocation. pub fn shmemcpy(src: *const u8, size: usize) -> Result { let dest = shmalloc(size)?; // SAFETY: dest is a freshly allocated SHM block of `size` bytes. // Caller guarantees src points to `size` readable bytes. unsafe { std::ptr::copy_nonoverlapping(src, dest, size) }; Ok(dest) } /// Allocate and zero-fill. pub fn shcalloc(nmemb: usize, size: usize) -> Result { let total = nmemb * size; let ptr = shmalloc(total)?; // SAFETY: ptr is a freshly allocated SHM block of `total` bytes. unsafe { std::ptr::write_bytes(ptr, 0, total) }; Ok(ptr) } /// Free a shared memory block (decrement reference count). pub fn shfree(ptr: AbsPtr) -> Result<(), MorlocError> { let _lock = ALLOC_MUTEX.lock().unwrap(); shfree_unlocked(ptr) } /// Increment reference count on a shared memory block. pub fn shincref(ptr: AbsPtr) -> Result<(), MorlocError> { if ptr.is_null() { return Err(MorlocError::Shm("Cannot incref NULL pointer".into())); } // SAFETY: ptr was returned by shmalloc, which places a BlockHeader immediately before // the returned data pointer. Magic check below validates the header. let blk = unsafe { &*(ptr.sub(std::mem::size_of::()) as *const BlockHeader) }; if blk.magic != BLK_MAGIC { return Err(MorlocError::Shm("Corrupted memory - invalid magic".into())); } blk.reference_count.fetch_add(1, Ordering::AcqRel); Ok(()) } /// Convert relative pointer to absolute pointer. pub fn rel2abs(ptr: RelPtr) -> Result { if ptr < 0 { return Err(MorlocError::Shm(format!("Illegal relptr value {}", ptr))); } let mut remaining = ptr as usize; // First try with volumes already mapped { let vols = VOLUMES.lock().unwrap(); for i in 0..MAX_VOLUME_NUMBER { if vols[i].is_null() { break; // No more volumes mapped } let shm = vols[i].ptr(); // SAFETY: shm is a valid mmap'd ShmHeader pointer from VOLUMES. let vol_size = unsafe { (*shm).volume_size }; if remaining < vol_size { // SAFETY: data region starts after ShmHeader; remaining < vol_size // guarantees the offset is within the mmap'd region. let base = unsafe { (shm as *const u8).add(std::mem::size_of::()) }; return Ok(unsafe { base.add(remaining) as AbsPtr }); } remaining -= vol_size; } } // If not found, try opening unmapped volumes remaining = ptr as usize; for i in 0..MAX_VOLUME_NUMBER { let shm = match shopen(i)? { Some(s) => s, None => { return Err(MorlocError::Shm(format!( "Failed to find volume for relptr {}", ptr ))); } }; // SAFETY: shm is a valid mmap'd ShmHeader pointer from shopen. let vol_size = unsafe { (*shm).volume_size }; if remaining < vol_size { // SAFETY: same as above - offset within mmap'd region. let base = unsafe { (shm as *const u8).add(std::mem::size_of::()) }; return Ok(unsafe { base.add(remaining) as AbsPtr }); } remaining -= vol_size; } Err(MorlocError::Shm(format!( "Shared memory pool does not contain index {}", ptr ))) } /// Convert absolute pointer to relative pointer. pub fn abs2rel(ptr: AbsPtr) -> Result { let vols = VOLUMES.lock().unwrap(); for i in 0..MAX_VOLUME_NUMBER { let shm = vols[i].ptr(); if shm.is_null() { continue; } // SAFETY: shm is a valid mmap'd ShmHeader from VOLUMES. We compute // data region bounds and check ptr falls within before computing offset. unsafe { let data_start = (shm as *const u8).add(std::mem::size_of::()); let data_end = data_start.add((*shm).volume_size); let p = ptr as *const u8; if p >= data_start && p < data_end { let offset = p.offset_from(data_start) as usize; return Ok(((*shm).relative_offset + offset) as RelPtr); } } } Err(MorlocError::Shm(format!( "Failed to find absptr {:?} in shared memory", ptr ))) } /// Find the ShmHeader for a given absolute pointer. pub fn abs2shm(ptr: AbsPtr) -> Result<*mut ShmHeader, MorlocError> { let vols = VOLUMES.lock().unwrap(); for i in 0..MAX_VOLUME_NUMBER { let shm = vols[i].ptr(); if shm.is_null() { continue; } // SAFETY: shm is a valid mmap'd ShmHeader from VOLUMES. unsafe { let data_start = (shm as *const u8).add(std::mem::size_of::()); let data_end = data_start.add((*shm).volume_size); let p = ptr as *const u8; if p >= data_start && p < data_end { return Ok(shm); } } } Err(MorlocError::Shm("Failed to find absptr in SHM".into())) } /// Total size of all SHM volumes. pub fn total_shm_size() -> usize { let vols = VOLUMES.lock().unwrap(); let mut total = 0; for i in 0..MAX_VOLUME_NUMBER { if !vols[i].is_null() { // SAFETY: non-null VOLUMES entries are valid mmap'd ShmHeader pointers. total += unsafe { (*vols[i].ptr()).volume_size }; } } total } // ── Internal helpers ─────────────────────────────────────────────────────── fn try_open_shm( shm_name: &str, full_size: usize, ) -> Result<(i32, bool, String, usize), MorlocError> { let name_cstr = std::ffi::CString::new(shm_name).unwrap(); // Try POSIX SHM let fd = unsafe { libc::shm_open( name_cstr.as_ptr(), libc::O_RDWR | libc::O_CREAT, 0o666, ) }; if fd >= 0 { let mut sb: libc::stat = unsafe { std::mem::zeroed() }; if unsafe { libc::fstat(fd, &mut sb) } == -1 { unsafe { libc::close(fd) }; return Err(MorlocError::Shm(format!("fstat failed for '{}'", shm_name))); } let created = sb.st_size == 0; if created { let err = unsafe { preallocate_fd(fd, full_size as i64) }; if err == 0 { return Ok((fd, true, shm_name.to_string(), full_size)); } // /dev/shm too small, clean up and try file-backed unsafe { libc::close(fd); libc::shm_unlink(name_cstr.as_ptr()); } } else { return Ok((fd, false, shm_name.to_string(), sb.st_size as usize)); } } // Try file-backed fallback let fb = FALLBACK_DIR.lock().unwrap(); let fallback = get_cstr_buf(&fb); if fallback.is_empty() { return Err(MorlocError::Shm(format!( "Failed to allocate SHM '{}': /dev/shm too small and no fallback directory", shm_name ))); } let file_path = format!("{}/{}", fallback, shm_name); drop(fb); let path_cstr = std::ffi::CString::new(file_path.as_str()).unwrap(); let fd = unsafe { libc::open(path_cstr.as_ptr(), libc::O_RDWR | libc::O_CREAT, 0o666) }; if fd == -1 { return Err(MorlocError::Shm(format!( "Failed to create file-backed volume '{}'", file_path ))); } let mut sb: libc::stat = unsafe { std::mem::zeroed() }; if unsafe { libc::fstat(fd, &mut sb) } == -1 { unsafe { libc::close(fd) }; return Err(MorlocError::Shm(format!("fstat failed for '{}'", file_path))); } let created = sb.st_size == 0; let actual_size = if created { let err = unsafe { preallocate_fd(fd, full_size as i64) }; if err != 0 { unsafe { libc::close(fd); libc::unlink(path_cstr.as_ptr()); } return Err(MorlocError::Shm(format!( "Failed to allocate file-backed volume '{}' ({} bytes)", file_path, full_size ))); } full_size } else { sb.st_size as usize }; Ok((fd, created, file_path, actual_size)) } fn shmalloc_unlocked(size: usize) -> Result { let mut shm: *mut ShmHeader = std::ptr::null_mut(); let blk = find_free_block(size, &mut shm)?; // Split and claim let final_blk = split_block(shm, blk, size)?; // SAFETY: final_blk is a valid BlockHeader in mmap'd SHM found by find_free_block. // The data region starts immediately after the header. unsafe { (*final_blk).reference_count.store(1, Ordering::Release); Ok((final_blk as *mut u8).add(std::mem::size_of::())) } } fn shfree_unlocked(ptr: AbsPtr) -> Result<(), MorlocError> { if ptr.is_null() { return Err(MorlocError::Shm("Cannot free NULL pointer".into())); } // SAFETY: ptr was returned by shmalloc, which places a BlockHeader // immediately before the data. Magic check validates correctness. let blk = unsafe { &*(ptr.sub(std::mem::size_of::()) as *const BlockHeader) }; if blk.magic != BLK_MAGIC { return Err(MorlocError::Shm("Corrupted memory".into())); } if blk.reference_count.load(Ordering::Acquire) == 0 { return Err(MorlocError::Shm("Reference count already 0".into())); } let prev = blk.reference_count.fetch_sub(1, Ordering::AcqRel); if prev == 1 { // SAFETY: ptr points to blk.size bytes of SHM data we own (refcount just hit 0). unsafe { std::ptr::write_bytes(ptr, 0, blk.size); } } Ok(()) } fn find_free_block( size: usize, shm_out: &mut *mut ShmHeader, ) -> Result<*mut BlockHeader, MorlocError> { let cv = CURRENT_VOLUME.load(Ordering::Relaxed); let vols = VOLUMES.lock().unwrap(); // Try current volume first let shm = vols[cv].ptr(); if !shm.is_null() { if let Some(blk) = find_free_block_in_volume(shm, size)? { *shm_out = shm; return Ok(blk); } } // Search all volumes for i in 0..MAX_VOLUME_NUMBER { let shm = vols[i].ptr(); if shm.is_null() { // Create a new volume drop(vols); let new_size = std::cmp::max(size * 2, 0xffff); let basename = { let cb = COMMON_BASENAME.lock().unwrap(); get_cstr_buf(&cb).to_string() }; let new_shm = shinit(&basename, i, new_size)?; CURRENT_VOLUME.store(i, Ordering::Relaxed); *shm_out = new_shm; let blk = unsafe { (new_shm as *mut u8).add(std::mem::size_of::()) as *mut BlockHeader }; return Ok(blk); } if let Some(blk) = find_free_block_in_volume(shm, size)? { CURRENT_VOLUME.store(i, Ordering::Relaxed); *shm_out = shm; return Ok(blk); } } Err(MorlocError::Shm(format!( "Could not find suitable block for {} bytes", size ))) } fn find_free_block_in_volume( shm: *mut ShmHeader, size: usize, ) -> Result, MorlocError> { unsafe { let shm_end = (shm as *const u8) .add(std::mem::size_of::()) .add((*shm).volume_size); shm_lock(&(*shm).lock)?; // Try cursor position first let cursor = (*shm).cursor; if cursor != VOLNULL { let blk = vol2abs_raw(cursor, shm); let blk = blk as *mut BlockHeader; if (*blk).magic == BLK_MAGIC && (*blk).reference_count.load(Ordering::Relaxed) == 0 && (*blk).size >= size { shm_unlock(&(*shm).lock); return Ok(Some(blk)); } } // Scan from cursor forward let start_blk = if cursor != VOLNULL { vol2abs_raw(cursor, shm) as *mut BlockHeader } else { vol2abs_raw(0, shm) as *mut BlockHeader }; if let Some(blk) = scan_volume(start_blk, size, shm_end as *const u8) { shm_unlock(&(*shm).lock); return Ok(Some(blk)); } // Wrap around: scan from beginning to cursor if cursor > 0 { let first_blk = vol2abs_raw(0, shm) as *mut BlockHeader; let cursor_end = vol2abs_raw(cursor, shm); if let Some(blk) = scan_volume(first_blk, size, cursor_end as *const u8) { shm_unlock(&(*shm).lock); return Ok(Some(blk)); } } shm_unlock(&(*shm).lock); Ok(None) } } /// Scan a volume region for a free block of at least `size` bytes, merging adjacent free blocks. /// /// # Safety /// `blk` must point to a valid BlockHeader within an mmap'd SHM volume. /// `end` must point to the byte past the end of the volume's data region. unsafe fn scan_volume( mut blk: *mut BlockHeader, size: usize, end: *const u8, ) -> Option<*mut BlockHeader> { let hdr_size = std::mem::size_of::(); while (blk as *const u8).add(hdr_size + size) <= end { if blk.is_null() || (*blk).magic != BLK_MAGIC { return None; } // Merge adjacent free blocks while (*blk).reference_count.load(Ordering::Relaxed) == 0 { let next = (blk as *mut u8).add(hdr_size + (*blk).size) as *mut BlockHeader; if (next as *const u8) >= end || (*next).magic != BLK_MAGIC || (*next).reference_count.load(Ordering::Relaxed) != 0 { break; } (*blk).size += hdr_size + (*next).size; } if (*blk).reference_count.load(Ordering::Relaxed) == 0 && (*blk).size >= size { return Some(blk); } blk = (blk as *mut u8).add(hdr_size + (*blk).size) as *mut BlockHeader; } None } fn split_block( shm: *mut ShmHeader, blk: *mut BlockHeader, size: usize, ) -> Result<*mut BlockHeader, MorlocError> { unsafe { if (*blk).size == size { return Ok(blk); } shm_lock(&(*shm).lock)?; let remaining = (*blk).size - size; (*blk).size = size; let hdr_size = std::mem::size_of::(); let new_free = (blk as *mut u8).add(hdr_size + size) as *mut BlockHeader; if remaining > hdr_size { (*new_free).magic = BLK_MAGIC; (*new_free).reference_count = AtomicU32::new(0); (*new_free).size = remaining - hdr_size; // Update cursor let data_start = (shm as *const u8).add(std::mem::size_of::()); (*shm).cursor = (new_free as *const u8).offset_from(data_start) as VolPtr; } else { (*blk).size += remaining; (*shm).cursor = VOLNULL; } shm_unlock(&(*shm).lock); Ok(blk) } } /// Convert a volume-local offset to an absolute pointer. /// /// # Safety /// `shm` must be a valid mmap'd ShmHeader. `ptr` must be within the volume's data region. #[inline] unsafe fn vol2abs_raw(ptr: VolPtr, shm: *const ShmHeader) -> *mut u8 { (shm as *const u8) .add(std::mem::size_of::()) .add(ptr as usize) as *mut u8 } // ── Futex-based lock ─────────────────────────────────────────────────────── /// Acquire a futex-based cross-process lock on shared memory. /// /// # Safety /// `lock` must point to an AtomicU32 in mmap'd shared memory that /// persists for the duration of the lock. The caller must call shm_unlock /// on the same lock when done. pub unsafe fn shm_lock(lock: &AtomicU32) -> Result<(), MorlocError> { if lock .compare_exchange_weak(LOCK_UNLOCKED, LOCK_LOCKED, Ordering::Acquire, Ordering::Relaxed) .is_ok() { return Ok(()); } for _ in 0..SPIN_LIMIT { std::hint::spin_loop(); if lock .compare_exchange_weak(LOCK_UNLOCKED, LOCK_LOCKED, Ordering::Acquire, Ordering::Relaxed) .is_ok() { return Ok(()); } } shm_lock_slow(lock) } #[cfg(target_os = "linux")] unsafe fn shm_lock_slow(lock: &AtomicU32) -> Result<(), MorlocError> { let timeout = libc::timespec { tv_sec: LOCK_TIMEOUT_SECS as i64, tv_nsec: 0, }; loop { let ptr = lock as *const AtomicU32 as *const u32; libc::syscall( libc::SYS_futex, ptr, libc::FUTEX_WAIT, LOCK_LOCKED, &timeout as *const libc::timespec, std::ptr::null::(), 0u32, ); if lock .compare_exchange_weak(LOCK_UNLOCKED, LOCK_LOCKED, Ordering::Acquire, Ordering::Relaxed) .is_ok() { return Ok(()); } if lock.load(Ordering::Relaxed) == LOCK_LOCKED { if lock .compare_exchange(LOCK_LOCKED, LOCK_UNLOCKED, Ordering::AcqRel, Ordering::Relaxed) .is_ok() { if lock .compare_exchange(LOCK_UNLOCKED, LOCK_LOCKED, Ordering::Acquire, Ordering::Relaxed) .is_ok() { return Ok(()); } } } } } /// macOS fallback: spin-yield loop (no futex available). #[cfg(target_os = "macos")] unsafe fn shm_lock_slow(lock: &AtomicU32) -> Result<(), MorlocError> { loop { std::thread::yield_now(); if lock .compare_exchange_weak(LOCK_UNLOCKED, LOCK_LOCKED, Ordering::Acquire, Ordering::Relaxed) .is_ok() { return Ok(()); } } } /// Release a futex-based cross-process lock on shared memory. /// /// # Safety /// `lock` must be the same AtomicU32 previously acquired via shm_lock. pub unsafe fn shm_unlock(lock: &AtomicU32) { lock.store(LOCK_UNLOCKED, Ordering::Release); #[cfg(target_os = "linux")] { let ptr = lock as *const AtomicU32 as *const u32; libc::syscall( libc::SYS_futex, ptr, libc::FUTEX_WAKE, 1, std::ptr::null::(), std::ptr::null::(), 0u32, ); } // macOS: no futex wake needed; spin-yield waiters will see the store. } // ── Pointer conversion helpers ───────────────────────────────────────────── #[inline] pub fn vol2rel(ptr: VolPtr, shm: &ShmHeader) -> RelPtr { shm.relative_offset as RelPtr + ptr } /// # Safety /// `shm` must be a valid mmap'd ShmHeader. `ptr` must be within the volume's data region. #[inline] pub unsafe fn vol2abs(ptr: VolPtr, shm: *const ShmHeader) -> AbsPtr { vol2abs_raw(ptr, shm) } // ── Tests ────────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; #[test] fn test_block_header_no_padding() { assert_eq!( std::mem::size_of::(), 4 + 4 + std::mem::size_of::() ); } #[test] fn test_align_up() { assert_eq!(align_up(0, 8), 0); assert_eq!(align_up(1, 8), 8); assert_eq!(align_up(7, 8), 8); assert_eq!(align_up(8, 8), 8); assert_eq!(align_up(9, 8), 16); } #[test] fn test_pointer_constants() { assert_eq!(RELNULL, -1); assert_eq!(VOLNULL, -1); } #[test] fn test_lock_unlock() { let lock = AtomicU32::new(LOCK_UNLOCKED); unsafe { shm_lock(&lock).unwrap(); assert_eq!(lock.load(Ordering::Relaxed), LOCK_LOCKED); shm_unlock(&lock); assert_eq!(lock.load(Ordering::Relaxed), LOCK_UNLOCKED); } } #[test] fn test_array_struct_size() { assert_eq!( std::mem::size_of::(), std::mem::size_of::() + std::mem::size_of::() ); } #[test] fn test_shinit_and_shmalloc() { // Use file-backed SHM via tmpdir to avoid /dev/shm permission issues in test let tmpdir = std::env::temp_dir(); let test_dir = tmpdir.join(format!("morloc_test_{}", std::process::id())); std::fs::create_dir_all(&test_dir).unwrap(); shm_set_fallback_dir(test_dir.to_str().unwrap()); let basename = format!("test_shm_{}", std::process::id()); let shm = shinit(&basename, 0, 4096).unwrap(); assert!(!shm.is_null()); assert_eq!(unsafe { (*shm).magic }, SHM_MAGIC); // Allocate some memory let ptr1 = shmalloc(64).unwrap(); assert!(!ptr1.is_null()); // Write and read back unsafe { std::ptr::write_bytes(ptr1, 0xAB, 64); assert_eq!(*ptr1, 0xAB); } // Convert to relptr and back let rel = abs2rel(ptr1).unwrap(); assert!(rel >= 0); let abs = rel2abs(rel).unwrap(); assert_eq!(abs, ptr1); // Free shfree(ptr1).unwrap(); // Cleanup shclose().unwrap(); let _ = std::fs::remove_dir_all(&test_dir); } } ================================================ FILE: data/rust/morloc-runtime/src/slurm_ffi.rs ================================================ //! C ABI wrappers for SLURM job submission. //! Replaces slurm.c. use std::ffi::{c_char, c_void, CStr, CString}; use std::ptr; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; const MAX_SLURM_COMMAND_LENGTH: usize = 1024; const DEFAULT_XXHASH_SEED: u64 = 0; // ── C-compatible types ─────────────────────────────────────────────────────── #[repr(C)] pub struct Resources { pub memory: i32, // in Gb pub time: i32, // walltime in seconds pub cpus: i32, pub gpus: i32, } // ── parse_slurm_time ───────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn parse_slurm_time( time_str: *const c_char, errmsg: *mut *mut c_char, ) -> usize { clear_errmsg(errmsg); let s = CStr::from_ptr(time_str).to_string_lossy(); let mut days: i32 = 0; let hours: i32; let minutes: i32; let seconds: i32; // Try D-HH:MM:SS format if let Some(dash_pos) = s.find('-') { days = match s[..dash_pos].parse() { Ok(d) => d, Err(_) => { set_errmsg(errmsg, &MorlocError::Other(format!("Failed to scan slurm walltime string '{}'", s))); return 0; } }; let rest = &s[dash_pos + 1..]; let parts: Vec<&str> = rest.split(':').collect(); if parts.len() != 3 { set_errmsg(errmsg, &MorlocError::Other(format!("Failed to scan slurm walltime string '{}'", s))); return 0; } hours = parts[0].parse().unwrap_or(-1); minutes = parts[1].parse().unwrap_or(-1); seconds = parts[2].parse().unwrap_or(-1); } else { // Try HH:MM:SS format let parts: Vec<&str> = s.split(':').collect(); if parts.len() != 3 { set_errmsg(errmsg, &MorlocError::Other(format!("Failed to scan slurm walltime string '{}'", s))); return 0; } hours = parts[0].parse().unwrap_or(-1); minutes = parts[1].parse().unwrap_or(-1); seconds = parts[2].parse().unwrap_or(-1); } if days < 0 || hours < 0 || minutes < 0 || seconds < 0 { set_errmsg(errmsg, &MorlocError::Other(format!("Negative time component in '{}'", s))); return 0; } if hours > 23 || minutes > 59 || seconds > 59 { set_errmsg(errmsg, &MorlocError::Other(format!("Invalid time component in '{}' (HH<=23 MM<=59 SS<=59)", s))); return 0; } if days > 3650 { set_errmsg(errmsg, &MorlocError::Other("Do you really want to run this job for more than 10 years?".into())); return 0; } (seconds + 60 * minutes + 60 * 60 * hours + 60 * 60 * 24 * days) as usize } #[no_mangle] pub unsafe extern "C" fn write_slurm_time(seconds: i32) -> *mut c_char { let mut rem = seconds; let days = rem / (60 * 60 * 24); rem -= days * 60 * 60 * 24; let hours = rem / (60 * 60); rem -= hours * 60 * 60; let minutes = rem / 60; rem -= minutes * 60; let s = format!("{}-{:02}:{:02}:{:02}", days, hours, minutes, rem); match CString::new(s) { Ok(cs) => cs.into_raw(), Err(_) => ptr::null_mut(), } } // ── parse_morloc_call_arguments ────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn parse_morloc_call_arguments( packet: *mut u8, args: *mut *mut u8, nargs: *mut usize, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); *nargs = 0; let header = &*(packet as *const crate::packet::PacketHeader); let packet_size = 32 + header.offset as usize + header.length as usize; if header.command_type() != crate::packet::PACKET_TYPE_CALL { set_errmsg(errmsg, &MorlocError::Packet("Unexpected packet type (BUG)".into())); return false; } // First pass: count args let mut pos = 32 + header.offset as usize; while pos < packet_size { let arg_header = &*(packet.add(pos) as *const crate::packet::PacketHeader); pos += 32 + arg_header.offset as usize + arg_header.length as usize; *nargs += 1; } // Second pass: set pointers pos = 32 + header.offset as usize; for i in 0..*nargs { *args.add(i) = packet.add(pos); let arg_header = &*(packet.add(pos) as *const crate::packet::PacketHeader); pos += 32 + arg_header.offset as usize + arg_header.length as usize; } true } // ── slurm_job_is_complete ──────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn slurm_job_is_complete(job_id: u32) -> bool { let cmd = format!("sacct -j {} --format=State --noheader\0", job_id); let sacct = libc::popen(cmd.as_ptr() as *const c_char, b"r\0".as_ptr() as *const c_char); if sacct.is_null() { return false; } let mut state = [0u8; 64]; let mut done = false; while !libc::fgets(state.as_mut_ptr() as *mut c_char, 64, sacct).is_null() { let s = std::str::from_utf8(&state).unwrap_or(""); if s.contains("COMPLETED") || s.contains("FAILED") || s.contains("CANCELLED") { done = true; break; } } libc::pclose(sacct); done } // ── shell_escape ───────────────────────────────────────────────────────────── fn shell_escape(input: &str) -> String { let mut out = String::with_capacity(input.len() + 10); out.push('\''); for ch in input.chars() { if ch == '\'' { out.push_str("'\\''"); } else { out.push(ch); } } out.push('\''); out } // ── submit_morloc_slurm_job ────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn submit_morloc_slurm_job( nexus_path: *const c_char, socket_basename: *const c_char, call_packet_filename: *const c_char, result_cache_filename: *const c_char, output_filename: *const c_char, error_filename: *const c_char, resources: *const Resources, errmsg: *mut *mut c_char, ) -> u32 { clear_errmsg(errmsg); macro_rules! check_null { ($ptr:expr, $name:expr) => { if $ptr.is_null() { set_errmsg(errmsg, &MorlocError::Other(format!("{} undefined", $name))); return 0; } }; } check_null!(nexus_path, "nexus path"); check_null!(socket_basename, "socket basename"); check_null!(call_packet_filename, "call packet filename"); check_null!(result_cache_filename, "result cache filename"); check_null!(output_filename, "slurm output filename"); check_null!(error_filename, "slurm error filename"); let res = &*resources; let nexus = CStr::from_ptr(nexus_path).to_string_lossy(); let call = CStr::from_ptr(call_packet_filename).to_string_lossy(); let socket = CStr::from_ptr(socket_basename).to_string_lossy(); let result_cache = CStr::from_ptr(result_cache_filename).to_string_lossy(); let output = CStr::from_ptr(output_filename).to_string_lossy(); let error = CStr::from_ptr(error_filename).to_string_lossy(); let time_str_raw = write_slurm_time(res.time); let time_str = CStr::from_ptr(time_str_raw).to_string_lossy().into_owned(); libc::free(time_str_raw as *mut c_void); let mem_arg = format!("--mem={}G", res.memory); let time_arg = format!("--time={}", time_str); let cpus_arg = format!("--cpus-per-task={}", res.cpus); let gpus_arg = format!("--gres=gpu:{}", res.gpus); let esc_nexus = shell_escape(&nexus); let esc_call = shell_escape(&call); let esc_socket = shell_escape(&socket); let esc_result = shell_escape(&result_cache); let wrap_cmd = format!( "{} --call-packet {} --socket-base {} --output-file {} --output-form packet", esc_nexus, esc_call, esc_socket, esc_result ); if wrap_cmd.len() >= MAX_SLURM_COMMAND_LENGTH { set_errmsg(errmsg, &MorlocError::Other("Wrap command too long".into())); return 0; } let wrap_arg = format!("--wrap={}", wrap_cmd); // Fork/exec sbatch let mut pipefd = [0i32; 2]; if libc::pipe(pipefd.as_mut_ptr()) == -1 { set_errmsg(errmsg, &MorlocError::Other("Failed to create pipe for sbatch".into())); return 0; } let pid = libc::fork(); if pid == -1 { libc::close(pipefd[0]); libc::close(pipefd[1]); set_errmsg(errmsg, &MorlocError::Other("Failed to fork for sbatch".into())); return 0; } if pid == 0 { // Child libc::close(pipefd[0]); libc::dup2(pipefd[1], libc::STDOUT_FILENO); libc::close(pipefd[1]); let sbatch = CString::new("sbatch").unwrap(); let parsable = CString::new("--parsable").unwrap(); let o_flag = CString::new("-o").unwrap(); let e_flag = CString::new("-e").unwrap(); let c_output = CString::new(output.as_ref()).unwrap(); let c_error = CString::new(error.as_ref()).unwrap(); let c_mem = CString::new(mem_arg).unwrap(); let c_time = CString::new(time_arg).unwrap(); let c_cpus = CString::new(cpus_arg).unwrap(); let c_gpus = CString::new(gpus_arg).unwrap(); let c_wrap = CString::new(wrap_arg).unwrap(); libc::execlp( sbatch.as_ptr(), sbatch.as_ptr(), parsable.as_ptr(), o_flag.as_ptr(), c_output.as_ptr(), e_flag.as_ptr(), c_error.as_ptr(), c_mem.as_ptr(), c_time.as_ptr(), c_cpus.as_ptr(), c_gpus.as_ptr(), c_wrap.as_ptr(), ptr::null::(), ); libc::_exit(127); } // Parent libc::close(pipefd[1]); let mut buf = [0u8; 64]; let nread = libc::read(pipefd[0], buf.as_mut_ptr() as *mut c_void, 63); libc::close(pipefd[0]); let mut status: i32 = 0; libc::waitpid(pid, &mut status, 0); if !libc::WIFEXITED(status) || libc::WEXITSTATUS(status) != 0 { set_errmsg(errmsg, &MorlocError::Other("sbatch exited with error".into())); return 0; } if nread <= 0 { set_errmsg(errmsg, &MorlocError::Other("Failed to read sbatch output".into())); return 0; } let output_str = std::str::from_utf8(&buf[..nread as usize]).unwrap_or(""); match output_str.trim().parse::() { Ok(job_id) => job_id, Err(_) => { set_errmsg(errmsg, &MorlocError::Other("Failed to parse job ID from sbatch output".into())); 0 } } } // ── remote_call ────────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn remote_call( midx: i32, socket_basename: *const c_char, cache_path: *const c_char, resources: *const Resources, arg_packets: *const *const u8, nargs: usize, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); // Use extern C declarations for functions from other modules extern "C" { fn read_schema_from_packet_meta(packet: *const u8, errmsg: *mut *mut c_char) -> *mut c_char; fn parse_schema(schema_str: *const c_char, errmsg: *mut *mut c_char) -> *mut crate::cschema::CSchema; fn free_schema(schema: *mut crate::cschema::CSchema); fn get_morloc_data_packet_value(data: *const u8, schema: *const crate::cschema::CSchema, errmsg: *mut *mut c_char) -> *mut u8; fn hash_voidstar(data: *const u8, schema: *const crate::cschema::CSchema, seed: u64, hash: *mut u64, errmsg: *mut *mut c_char) -> bool; fn mix(a: u64, b: u64) -> u64; fn mkdir_p(path: *const c_char, errmsg: *mut *mut c_char) -> i32; fn check_cache_packet(key: u64, cache_path: *const c_char, errmsg: *mut *mut c_char) -> *mut c_char; fn get_cache_packet(key: u64, cache_path: *const c_char, errmsg: *mut *mut c_char) -> *mut u8; fn put_cache_packet(data: *const u8, schema: *const crate::cschema::CSchema, key: u64, cache_path: *const c_char, errmsg: *mut *mut c_char) -> *mut c_char; fn make_cache_filename(hash: u64, cache_path: *const c_char, errmsg: *mut *mut c_char) -> *mut c_char; fn make_cache_filename_ext(hash: u64, cache_path: *const c_char, ext: *const c_char, errmsg: *mut *mut c_char) -> *mut c_char; fn make_morloc_remote_call_packet(midx: u32, arg_packets: *const *const u8, nargs: usize, errmsg: *mut *mut c_char) -> *mut u8; fn morloc_packet_size(packet: *const u8, errmsg: *mut *mut c_char) -> usize; fn read_binary_file(filename: *const c_char, file_size: *mut usize, errmsg: *mut *mut c_char) -> *mut u8; fn write_atomic(filename: *const c_char, data: *const u8, size: usize, errmsg: *mut *mut c_char) -> i32; fn get_morloc_data_packet_error_message(data: *const u8, errmsg: *mut *mut c_char) -> *mut c_char; } let seed = midx as u64; let mut err: *mut c_char = ptr::null_mut(); // Cleanup tracking let mut return_packet: *mut u8 = ptr::null_mut(); let mut arg_hashes: Vec = vec![0; nargs]; let mut arg_voidstars: Vec<*mut u8> = vec![ptr::null_mut(); nargs]; let mut arg_schemas: Vec<*mut crate::cschema::CSchema> = vec![ptr::null_mut(); nargs]; let mut cached_arg_filenames: Vec<*mut c_char> = vec![ptr::null_mut(); nargs]; let mut cached_arg_packets: Vec<*mut u8> = vec![ptr::null_mut(); nargs]; let mut function_hash = mix(seed, DEFAULT_XXHASH_SEED); // Hash each argument for i in 0..nargs { let schema_str = read_schema_from_packet_meta(*arg_packets.add(i), &mut err); if schema_str.is_null() || !err.is_null() { goto_cleanup!(errmsg, err, arg_schemas, cached_arg_filenames, cached_arg_packets, return_packet); } arg_schemas[i] = parse_schema(schema_str, &mut err); if !err.is_null() { goto_cleanup!(errmsg, err, arg_schemas, cached_arg_filenames, cached_arg_packets, return_packet); } arg_voidstars[i] = get_morloc_data_packet_value(*arg_packets.add(i), arg_schemas[i], &mut err); if !err.is_null() { goto_cleanup!(errmsg, err, arg_schemas, cached_arg_filenames, cached_arg_packets, return_packet); } let mut h: u64 = 0; hash_voidstar(arg_voidstars[i], arg_schemas[i], DEFAULT_XXHASH_SEED, &mut h, &mut err); if !err.is_null() { goto_cleanup!(errmsg, err, arg_schemas, cached_arg_filenames, cached_arg_packets, return_packet); } arg_hashes[i] = h; function_hash = mix(function_hash, h); } mkdir_p(cache_path, &mut err); if !err.is_null() { goto_cleanup!(errmsg, err, arg_schemas, cached_arg_filenames, cached_arg_packets, return_packet); } // Check if result is cached let mut result_cache_filename = check_cache_packet(function_hash, cache_path, &mut err); if !err.is_null() { libc::free(err as *mut c_void); err = ptr::null_mut(); } if !result_cache_filename.is_null() { return_packet = get_cache_packet(function_hash, cache_path, &mut err); if !err.is_null() { goto_cleanup!(errmsg, err, arg_schemas, cached_arg_filenames, cached_arg_packets, return_packet); } } else { result_cache_filename = make_cache_filename(function_hash, cache_path, &mut err); if !err.is_null() { goto_cleanup!(errmsg, err, arg_schemas, cached_arg_filenames, cached_arg_packets, return_packet); } // Cache arguments for i in 0..nargs { cached_arg_filenames[i] = check_cache_packet(arg_hashes[i], cache_path, &mut err); if cached_arg_filenames[i].is_null() { if !err.is_null() { libc::free(err as *mut c_void); err = ptr::null_mut(); } cached_arg_filenames[i] = put_cache_packet(arg_voidstars[i], arg_schemas[i], arg_hashes[i], cache_path, &mut err); if !err.is_null() { goto_cleanup!(errmsg, err, arg_schemas, cached_arg_filenames, cached_arg_packets, return_packet); } } } // Read cached arg packets for i in 0..nargs { let mut file_size: usize = 0; cached_arg_packets[i] = read_binary_file(cached_arg_filenames[i], &mut file_size, &mut err); if !err.is_null() { goto_cleanup!(errmsg, err, arg_schemas, cached_arg_filenames, cached_arg_packets, return_packet); } } // Build call packet let cached_ptrs: Vec<*const u8> = cached_arg_packets.iter().map(|p| *p as *const u8).collect(); let call_packet = make_morloc_remote_call_packet(midx as u32, cached_ptrs.as_ptr(), nargs, &mut err); if !err.is_null() { libc::free(call_packet as *mut c_void); goto_cleanup!(errmsg, err, arg_schemas, cached_arg_filenames, cached_arg_packets, return_packet); } let call_packet_size = morloc_packet_size(call_packet, &mut err); // Hash call packet with xxhash let call_packet_hash = crate::hash::xxh64_with_seed(std::slice::from_raw_parts(call_packet, call_packet_size), DEFAULT_XXHASH_SEED); let call_ext = CString::new("-call.dat").unwrap(); let call_packet_filename = make_cache_filename_ext(call_packet_hash, cache_path, call_ext.as_ptr(), &mut err); // Write call packet to disk write_atomic(call_packet_filename, call_packet, call_packet_size, &mut err); libc::free(call_packet as *mut c_void); let out_ext = CString::new(".out").unwrap(); let err_ext = CString::new(".err").unwrap(); let output_filename = make_cache_filename_ext(function_hash, cache_path, out_ext.as_ptr(), &mut err); let error_filename = make_cache_filename_ext(function_hash, cache_path, err_ext.as_ptr(), &mut err); // Submit SLURM job let nexus_c = CString::new("./nexus").unwrap(); let pid = submit_morloc_slurm_job( nexus_c.as_ptr(), socket_basename, call_packet_filename, result_cache_filename, output_filename, error_filename, resources, &mut err, ); libc::free(call_packet_filename as *mut c_void); libc::free(output_filename as *mut c_void); libc::free(error_filename as *mut c_void); if !err.is_null() { goto_cleanup!(errmsg, err, arg_schemas, cached_arg_filenames, cached_arg_packets, return_packet); } // Wait for job completion while !slurm_job_is_complete(pid) { libc::sleep(1); } let mut return_packet_size: usize = 0; return_packet = read_binary_file(result_cache_filename, &mut return_packet_size, &mut err); let failure = get_morloc_data_packet_error_message(return_packet, &mut err); if !failure.is_null() { libc::fprintf( libc::fdopen(libc::STDERR_FILENO, b"w\0".as_ptr() as *const c_char), b"Failed, deleting result %s\n\0".as_ptr() as *const c_char, result_cache_filename, ); libc::unlink(result_cache_filename); libc::free(failure as *mut c_void); } } // Cleanup for i in 0..nargs { if !arg_schemas[i].is_null() { free_schema(arg_schemas[i]); } if !cached_arg_filenames[i].is_null() { libc::free(cached_arg_filenames[i] as *mut c_void); } if !cached_arg_packets[i].is_null() { libc::free(cached_arg_packets[i] as *mut c_void); } } if !result_cache_filename.is_null() { libc::free(result_cache_filename as *mut c_void); } return_packet } // Cleanup macro for goto-like pattern macro_rules! goto_cleanup { ($errmsg:expr, $err:expr, $schemas:expr, $filenames:expr, $packets:expr, $return_packet:expr) => {{ *$errmsg = $err; for i in 0..$schemas.len() { if !$schemas[i].is_null() { extern "C" { fn free_schema(s: *mut crate::cschema::CSchema); } free_schema($schemas[i]); } if !$filenames[i].is_null() { libc::free($filenames[i] as *mut c_void); } if !$packets[i].is_null() { libc::free($packets[i] as *mut c_void); } } return $return_packet; }}; } use goto_cleanup; ================================================ FILE: data/rust/morloc-runtime/src/utility.rs ================================================ //! File I/O and string utility functions. //! Replaces utility.c. use std::ffi::{c_char, c_void, CStr}; use std::io::Write; use std::ptr; use crate::error::{clear_errmsg, set_errmsg, MorlocError}; // ── Cross-platform helpers ───────────────────────────────────────────────── /// Return the current errno value (cross-platform). #[cfg(target_os = "linux")] #[inline] pub unsafe fn errno_val() -> i32 { *libc::__errno_location() } #[cfg(target_os = "macos")] #[inline] pub unsafe fn errno_val() -> i32 { *libc::__error() } /// Suppress SIGPIPE on send(). Linux: per-call flag. macOS: use set_nosigpipe() on the socket. #[cfg(target_os = "linux")] pub const SEND_NOSIGNAL: i32 = libc::MSG_NOSIGNAL; #[cfg(target_os = "macos")] pub const SEND_NOSIGNAL: i32 = 0; /// Set SO_NOSIGPIPE on a socket (macOS). No-op on Linux (uses MSG_NOSIGNAL per-call). #[allow(unused_variables)] pub unsafe fn set_nosigpipe(fd: i32) { #[cfg(target_os = "macos")] { let val: libc::c_int = 1; libc::setsockopt( fd, libc::SOL_SOCKET, libc::SO_NOSIGPIPE, &val as *const _ as *const libc::c_void, std::mem::size_of::() as libc::socklen_t, ); } } // ── File operations ──────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn file_exists(filename: *const c_char) -> bool { if filename.is_null() { return false; } let path = CStr::from_ptr(filename).to_string_lossy(); std::path::Path::new(path.as_ref()).exists() } #[no_mangle] pub unsafe extern "C" fn mkdir_p(path: *const c_char, errmsg: *mut *mut c_char) -> i32 { clear_errmsg(errmsg); if path.is_null() { set_errmsg(errmsg, &MorlocError::Other("NULL path".into())); return -1; } let p = CStr::from_ptr(path).to_string_lossy(); match std::fs::create_dir_all(p.as_ref()) { Ok(_) => 0, Err(e) => { set_errmsg( errmsg, &MorlocError::Io(e), ); -1 } } } #[no_mangle] pub unsafe extern "C" fn delete_directory(path: *const c_char) { if path.is_null() { return; } let p = CStr::from_ptr(path).to_string_lossy(); let _ = std::fs::remove_dir_all(p.as_ref()); } #[no_mangle] pub unsafe extern "C" fn has_suffix(x: *const c_char, suffix: *const c_char) -> bool { if x.is_null() || suffix.is_null() { return false; } let xs = CStr::from_ptr(x).to_string_lossy(); let ss = CStr::from_ptr(suffix).to_string_lossy(); xs.ends_with(ss.as_ref()) } #[no_mangle] pub unsafe extern "C" fn write_atomic( filename: *const c_char, data: *const u8, size: usize, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); if filename.is_null() || (data.is_null() && size != 0) { set_errmsg(errmsg, &MorlocError::Other("invalid arguments".into())); return -1; } let path_str = CStr::from_ptr(filename).to_string_lossy(); let path = std::path::Path::new(path_str.as_ref()); // Get parent directory let dir = path.parent().unwrap_or(std::path::Path::new(".")); // Create temp file in same directory let tmp_path = dir.join(format!("morloc-tmp_{}", std::process::id())); let result = (|| -> Result<(), std::io::Error> { // Write to temp file let mut f = std::fs::File::create(&tmp_path)?; if size > 0 { let bytes = std::slice::from_raw_parts(data, size); f.write_all(bytes)?; } f.sync_all()?; drop(f); // Atomic rename std::fs::rename(&tmp_path, path)?; // Sync parent directory if let Ok(dir_f) = std::fs::File::open(dir) { let _ = dir_f.sync_all(); } Ok(()) })(); match result { Ok(_) => 0, Err(e) => { let _ = std::fs::remove_file(&tmp_path); set_errmsg(errmsg, &MorlocError::Io(e)); -1 } } } // ── Binary I/O ───────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn read_binary_file( filename: *const c_char, file_size: *mut usize, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); if filename.is_null() { set_errmsg(errmsg, &MorlocError::Other("NULL filename".into())); return ptr::null_mut(); } let path = CStr::from_ptr(filename).to_string_lossy(); match std::fs::read(path.as_ref()) { Ok(data) => { *file_size = data.len(); let buf = libc::malloc(data.len()) as *mut u8; if buf.is_null() { set_errmsg(errmsg, &MorlocError::Other("malloc failed".into())); return ptr::null_mut(); } std::ptr::copy_nonoverlapping(data.as_ptr(), buf, data.len()); buf } Err(e) => { set_errmsg(errmsg, &MorlocError::Io(e)); ptr::null_mut() } } } #[no_mangle] pub unsafe extern "C" fn read_binary_fd( file: *mut libc::FILE, file_size: *mut usize, errmsg: *mut *mut c_char, ) -> *mut u8 { clear_errmsg(errmsg); if file.is_null() { set_errmsg(errmsg, &MorlocError::Other("NULL file".into())); return ptr::null_mut(); } // Try seek-based size detection if libc::fseek(file, 0, libc::SEEK_END) == 0 { let size = libc::ftell(file) as usize; if size > 0 { libc::rewind(file); let buf = libc::malloc(size) as *mut u8; if buf.is_null() { set_errmsg(errmsg, &MorlocError::Other("malloc failed".into())); return ptr::null_mut(); } let read = libc::fread(buf as *mut c_void, 1, size, file); if read == size { *file_size = size; return buf; } libc::free(buf as *mut c_void); } } // Streaming read for non-seekable files let chunk_size: usize = 0xffff; let mut buf: *mut u8 = ptr::null_mut(); let mut allocated: usize = 0; loop { let new_buf = libc::realloc(buf as *mut c_void, allocated + chunk_size) as *mut u8; if new_buf.is_null() { libc::free(buf as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("realloc failed".into())); return ptr::null_mut(); } buf = new_buf; let read = libc::fread(buf.add(allocated) as *mut c_void, 1, chunk_size, file); allocated += read; if read < chunk_size { if libc::feof(file) != 0 { *file_size = allocated; return buf; } if libc::ferror(file) != 0 { libc::free(buf as *mut c_void); set_errmsg(errmsg, &MorlocError::Other("read error".into())); return ptr::null_mut(); } } } } #[no_mangle] pub unsafe extern "C" fn write_binary_fd( fd: i32, buf: *const c_char, count: usize, errmsg: *mut *mut c_char, ) -> i32 { clear_errmsg(errmsg); let mut total: usize = 0; while total < count { let written = libc::write(fd, buf.add(total) as *const c_void, count - total); if written < 0 { set_errmsg( errmsg, &MorlocError::Other(format!("write failed: {}", std::io::Error::last_os_error())), ); return -1; } total += written as usize; } 0 } #[no_mangle] pub unsafe extern "C" fn print_binary( buf: *const c_char, count: usize, errmsg: *mut *mut c_char, ) -> i32 { write_binary_fd(libc::STDOUT_FILENO, buf, count, errmsg) } // ── Display ──────────────────────────────────────────────────────────────── #[no_mangle] pub unsafe extern "C" fn hex(ptr: *const c_void, size: usize) { if ptr.is_null() || size == 0 { return; } let bytes = std::slice::from_raw_parts(ptr as *const u8, size); for (i, b) in bytes.iter().enumerate() { if i > 0 && i % 8 == 0 { eprint!(" "); } eprint!("{:02X}", b); if i < size - 1 { eprint!(" "); } } } #[no_mangle] pub unsafe extern "C" fn print_hex_dump( data: *const u8, size: usize, errmsg: *mut *mut c_char, ) -> bool { clear_errmsg(errmsg); if data.is_null() && size > 0 { set_errmsg(errmsg, &MorlocError::Other("NULL data".into())); return false; } let bytes = if size > 0 { std::slice::from_raw_parts(data, size) } else { &[] }; for (i, b) in bytes.iter().enumerate() { if i > 0 && i % 4 == 0 { if i % 24 == 0 { println!(); } else { print!(" "); } } print!("{:02X}", b); } if !bytes.is_empty() { println!(); } true } // ── xxHash wrapper and mix ───────────────────────────────────────────────── /// Mix two 64-bit hash values. Matches the C implementation in cache.c. #[no_mangle] pub extern "C" fn mix(a: u64, b: u64) -> u64 { const PRIME64_1: u64 = 0x9E3779B185EBCA87; const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F; let mut a = a ^ b.wrapping_mul(PRIME64_1); a = (a << 31) | (a >> 33); a.wrapping_mul(PRIME64_2) } #[no_mangle] pub unsafe extern "C" fn morloc_xxh64( input: *const c_void, length: usize, seed: u64, ) -> u64 { if input.is_null() || length == 0 { return crate::hash::xxh64_with_seed(&[], seed); } let data = std::slice::from_raw_parts(input as *const u8, length); crate::hash::xxh64_with_seed(data, seed) } // ── String utilities ─────────────────────────────────────────────────────── /// dirname - returns pointer into the input string (modifies it in-place) /// Matches the C behavior: returns "." for empty/NULL, strips trailing slashes #[no_mangle] pub unsafe extern "C" fn dirname(path: *mut c_char) -> *mut c_char { // Return a pointer to the static string "." for empty/null paths and paths with no slash. static DOT: [u8; 2] = [b'.', 0]; let dot_ptr = DOT.as_ptr() as *mut c_char; if path.is_null() || *path == 0 { return dot_ptr; } let len = libc::strlen(path); let mut end = path.add(len - 1); // Remove trailing slashes while end > path && *end == b'/' as c_char { *end = 0; end = end.sub(1); } // Find last slash let last_slash = libc::strrchr(path, b'/' as i32); if last_slash.is_null() { return dot_ptr; } if last_slash == path { *path.add(1) = 0; // root case "/" } else { *last_slash = 0; } path } ================================================ FILE: data/rust/morloc-runtime/src/voidstar.rs ================================================ //! Shared voidstar operations: relptr adjustment, binary serialization, //! schema-aware free, and flatten-to-buffer. //! //! These functions operate on the morloc voidstar binary format in SHM. //! They are used by packet.rs, cli.rs, and json.rs. use crate::error::MorlocError; use crate::schema::{Schema, SerialType}; use crate::shm::{self, AbsPtr, Array, RelPtr, Tensor}; // ── adjust_voidstar_relptrs ──────────────────────────────────────────────── /// Adjust all relptrs in a voidstar blob by adding base_rel. /// Used after copying a flattened blob into SHM. pub fn adjust_relptrs( data: AbsPtr, schema: &Schema, base_rel: RelPtr, ) -> Result<(), MorlocError> { // SAFETY: data points to a voidstar blob in SHM. We adjust relptrs in-place; // all pointer arithmetic stays within the blob's bounds as defined by schema. unsafe { match schema.serial_type { SerialType::String | SerialType::Array => { let arr = &mut *(data as *mut Array); arr.data += base_rel; if !schema.parameters.is_empty() && !schema.parameters[0].is_fixed_width() { let arr_data = shm::rel2abs(arr.data)?; let w = schema.parameters[0].width; for i in 0..arr.size { adjust_relptrs(arr_data.add(i * w), &schema.parameters[0], base_rel)?; } } } SerialType::Tuple | SerialType::Map => { for i in 0..schema.parameters.len() { adjust_relptrs(data.add(schema.offsets[i]), &schema.parameters[i], base_rel)?; } } SerialType::Optional => { if *data != 0 && !schema.parameters.is_empty() { let off = schema.offsets.first().copied() .unwrap_or_else(|| shm::align_up(1, schema.parameters[0].alignment().max(1))); adjust_relptrs(data.add(off), &schema.parameters[0], base_rel)?; } } SerialType::Tensor => { let t = &mut *(data as *mut Tensor); if t.total_elements > 0 { t.shape += base_rel; t.data += base_rel; } } _ => {} } } Ok(()) } // ── read_voidstar_binary ─────────────────────────────────────────────────── /// Read a flat voidstar binary blob into SHM, adjusting relptrs. pub fn read_binary(blob: &[u8], schema: &Schema) -> Result { let base = shm::shmalloc(blob.len())?; // SAFETY: base is freshly allocated with blob.len() bytes. unsafe { std::ptr::copy_nonoverlapping(blob.as_ptr(), base, blob.len()) }; let base_rel = shm::abs2rel(base)?; adjust_relptrs(base, schema, base_rel)?; Ok(base) } // ── shfree_by_schema ─────────────────────────────────────────────────────── /// Zero metadata for nested structures so the parent block can be cleanly freed. /// Does NOT call shfree on sub-pointers (they're cursor-packed in the same block). pub fn free_by_schema(ptr: AbsPtr, schema: &Schema) -> Result<(), MorlocError> { // SAFETY: ptr points to voidstar data in SHM with layout described by schema. // We zero metadata at schema.width offsets within the structure. unsafe { match schema.serial_type { SerialType::String | SerialType::Array => { let arr = &*(ptr as *const Array); if arr.data > 0 && !schema.parameters.is_empty() && !schema.parameters[0].is_fixed_width() { let arr_data = shm::rel2abs(arr.data)?; let w = schema.parameters[0].width; for i in 0..arr.size { free_by_schema(arr_data.add(i * w), &schema.parameters[0])?; } } } SerialType::Tuple | SerialType::Map => { for i in 0..schema.parameters.len() { free_by_schema(ptr.add(schema.offsets[i]), &schema.parameters[i])?; } } SerialType::Tensor => {} // inline, freed by parent _ => {} } std::ptr::write_bytes(ptr, 0, schema.width); } Ok(()) } // ── flatten_voidstar_to_buffer ───────────────────────────────────────────── /// Flatten a voidstar structure in SHM into a self-contained byte buffer. /// Relptrs in the output are offsets from position 0 of the buffer. pub fn flatten_to_buffer(data: AbsPtr, schema: &Schema) -> Result, MorlocError> { let total = crate::ffi::calc_voidstar_size_inner(data, schema)?; let mut buf = vec![0u8; total]; // SAFETY: data points to at least schema.width bytes in SHM; buf has total >= schema.width bytes. unsafe { std::ptr::copy_nonoverlapping(data, buf.as_mut_ptr(), schema.width) }; // Phase 2: fix up relptrs and copy variable-length data let mut cursor = schema.width; flatten_fixup(&mut buf, 0, data, schema, &mut cursor)?; Ok(buf) } fn flatten_fixup( buf: &mut [u8], buf_offset: usize, data: AbsPtr, schema: &Schema, cursor: &mut usize, ) -> Result<(), MorlocError> { // SAFETY: buf is sized by calc_voidstar_size_inner to hold the entire flattened structure. // data points to corresponding SHM data. cursor tracks write position within buf. unsafe { match schema.serial_type { SerialType::String | SerialType::Array => { let orig_arr = &*(data as *const Array); let buf_arr = &mut *(buf.as_mut_ptr().add(buf_offset) as *mut Array); if orig_arr.size == 0 { buf_arr.data = 0; return Ok(()); } let orig_data = shm::rel2abs(orig_arr.data)?; let elem_schema = &schema.parameters[0]; let align = elem_schema.alignment(); *cursor = shm::align_up(*cursor, align); buf_arr.data = *cursor as RelPtr; let elem_w = elem_schema.width; let total_bytes = elem_w * orig_arr.size; buf[*cursor..*cursor + total_bytes].copy_from_slice( std::slice::from_raw_parts(orig_data, total_bytes) ); let elem_start = *cursor; *cursor += total_bytes; if !elem_schema.is_fixed_width() { for i in 0..orig_arr.size { flatten_fixup( buf, elem_start + i * elem_w, orig_data.add(i * elem_w), elem_schema, cursor, )?; } } } SerialType::Tuple | SerialType::Map => { for i in 0..schema.parameters.len() { flatten_fixup( buf, buf_offset + schema.offsets[i], data.add(schema.offsets[i]), &schema.parameters[i], cursor, )?; } } SerialType::Optional => { let tag = *buf.as_ptr().add(buf_offset); if tag != 0 && !schema.parameters.is_empty() { let off = schema.offsets.first().copied() .unwrap_or_else(|| shm::align_up(1, schema.parameters[0].alignment().max(1))); flatten_fixup( buf, buf_offset + off, data.add(off), &schema.parameters[0], cursor, )?; } } SerialType::Tensor => { let orig = &*(data as *const Tensor); let buf_t = &mut *(buf.as_mut_ptr().add(buf_offset) as *mut Tensor); if orig.total_elements == 0 { buf_t.shape = 0; buf_t.data = 0; return Ok(()); } let ndim = schema.offsets.first().copied().unwrap_or(0); let orig_shape = shm::rel2abs(orig.shape)?; *cursor = shm::align_up(*cursor, std::mem::align_of::()); buf_t.shape = *cursor as RelPtr; let shape_bytes = ndim * std::mem::size_of::(); buf[*cursor..*cursor + shape_bytes].copy_from_slice( std::slice::from_raw_parts(orig_shape, shape_bytes) ); *cursor += shape_bytes; let orig_data = shm::rel2abs(orig.data)?; let elem_w = schema.parameters[0].width; let elem_align = schema.parameters[0].alignment(); *cursor = shm::align_up(*cursor, elem_align); buf_t.data = *cursor as RelPtr; let data_bytes = orig.total_elements * elem_w; buf[*cursor..*cursor + data_bytes].copy_from_slice( std::slice::from_raw_parts(orig_data, data_bytes) ); *cursor += data_bytes; } _ => {} // primitives already copied by parent } } Ok(()) } // ── write_voidstar_binary (to fd) ────────────────────────────────────────── /// Flatten voidstar and write to a file descriptor. Returns bytes written. pub fn write_binary_to_fd(fd: i32, data: AbsPtr, schema: &Schema) -> Result { let buf = flatten_to_buffer(data, schema)?; // SAFETY: buf is a valid byte slice; fd is a valid file descriptor from the caller. let written = unsafe { libc::write(fd, buf.as_ptr() as *const std::ffi::c_void, buf.len()) }; if written < 0 { return Err(MorlocError::Io(std::io::Error::last_os_error())); } Ok(written as usize) } ================================================ FILE: exe/morloc-codegen-generic/Main.hs ================================================ {- | Module : Main Description : Generic pool code generator for morloc Copyright : (c) Zebulun Arendsee, 2016-2026 License : Apache-2.0 Standalone executable that assembles pool files for dynamically-typed interpreted languages. Receives a serialized IProgram via a binary file and a language descriptor via lang.yaml. Usage: morloc-codegen-generic Reads: - argv[1]: path to lang.yaml (language descriptor) - argv[2]: path to binary-encoded IProgram Writes to stdout: - JSON CodegenManifest with pool_code and build_commands -} module Main (main) where import qualified Data.Aeson as Aeson import qualified Data.Binary as Binary import qualified Data.ByteString.Lazy as BL import qualified Data.Text as T import qualified Data.Text.IO as TIO import GHC.IO.Encoding (setLocaleEncoding, utf8) import System.Environment (getArgs) import System.Exit (exitFailure) import System.FilePath (takeDirectory, ()) import System.IO (hPutStrLn, stderr) import Morloc.CodeGenerator.Grammars.Translator.Generic (CodegenManifest (..), printProgram) import Morloc.CodeGenerator.Grammars.Translator.Imperative (IProgram) import Morloc.CodeGenerator.LanguageDescriptor (LangDescriptor (..), loadLangDescriptor) import Morloc.Data.Doc (render) main :: IO () main = do setLocaleEncoding utf8 args <- getArgs case args of [langYamlPath, iprogramPath] -> run langYamlPath iprogramPath _ -> do hPutStrLn stderr "Usage: morloc-codegen-generic " exitFailure run :: FilePath -> FilePath -> IO () run langYamlPath iprogramPath = do -- load language descriptor descResult <- loadLangDescriptor langYamlPath desc <- case descResult of Left err -> do hPutStrLn stderr $ "Failed to load " ++ langYamlPath ++ ": " ++ err exitFailure Right d -> return d -- load pool template from disk if not inline desc' <- if T.null (ldPoolTemplate desc) then do let langDir = takeDirectory langYamlPath ext = ldExtension desc poolPath = langDir "pool." ++ ext poolText <- TIO.readFile poolPath return desc {ldPoolTemplate = poolText} else return desc -- deserialize IProgram binaryData <- BL.readFile iprogramPath let program = Binary.decode binaryData :: IProgram -- assemble pool file let poolCode = render (printProgram desc' program) -- output manifest as JSON let manifest = CodegenManifest { cgmPoolCode = poolCode , cgmBuildCommands = [] } BL.putStr (Aeson.encode manifest) ================================================ FILE: executable/CppPrinter.hs ================================================ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} {- | Module : CppPrinter Description : Pretty-print the imperative IR as C++ source code Copyright : (c) Zebulun Arendsee, 2016-2026 License : Apache-2.0 Maintainer : z@morloc.io Converts 'IStmt' and 'IExpr' IR nodes into C++ source text. Handles type rendering, struct definitions, forward declarations, and C++ idioms (templates, shared_ptr, std::variant). -} module CppPrinter ( printExpr , printStmt , printStmts -- * Pool-level rendering , printDispatch , printProgram -- * Struct/serializer rendering , printStructTypedef , printSerializer , printDeserializer , printTemplateHeader , printRecordTemplate ) where import Morloc.CodeGenerator.Grammars.Common (DispatchEntry (..), manNamer) import Morloc.CodeGenerator.Grammars.Translator.Imperative import Morloc.CodeGenerator.Namespace (MDoc) import Morloc.Data.Doc import Morloc.DataFiles as DF import Morloc.Quasi printExpr :: IExpr -> MDoc printExpr (IVar v) = pretty v printExpr (IBoolLit True) = "true" printExpr (IBoolLit False) = "false" printExpr (INullLit (Just t)) = "std::optional<" <> renderIType t <> ">()" printExpr (INullLit Nothing) = "std::nullopt" printExpr (IIntLit Nothing i) = viaShow i printExpr (IIntLit (Just t) i) | t == "int" = viaShow i | otherwise = "static_cast<" <> pretty t <> ">(" <> viaShow i <> ")" printExpr (IRealLit Nothing r) = viaShow r printExpr (IRealLit (Just t) r) | t == "double" = viaShow r | otherwise = "static_cast<" <> pretty t <> ">(" <> viaShow r <> ")" printExpr (IStrLit s) = [idoc|std::string(#{textEsc' s})|] printExpr (IListLit es) = encloseSep "{" "}" "," (map printExpr es) printExpr (ITupleLit es) = "std::make_tuple" <> tupled (map printExpr es) printExpr (IRecordLit _ _ entries) = encloseSep "{" "}" "," (map (printExpr . snd) entries) printExpr (IAccess e (IIdx i)) = "std::get<" <> pretty i <> ">(" <> printExpr e <> ")" printExpr (IAccess e (IKey _)) = printExpr e -- should not be reached for C++ printExpr (IAccess e (IField f)) = printExpr e <> "." <> pretty f printExpr (ISerCall schema e) = [idoc|_put_value(#{printExpr e}, "#{pretty schema}")|] printExpr (IDesCall schema (Just rawtype) e) = [idoc|_get_value<#{renderIType rawtype}>(#{printExpr e}, "#{pretty schema}")|] printExpr (IDesCall schema Nothing e) = [idoc|_get_value(#{printExpr e}, "#{pretty schema}")|] printExpr (IPack packer e) = pretty packer <> parens (printExpr e) printExpr (ICall f Nothing argGroups) = pretty f <> hsep (map (tupled . map printExpr) argGroups) printExpr (ICall f (Just ts) argGroups) = pretty f <> encloseSep "<" ">" "," (map renderIType ts) <> hsep (map (tupled . map printExpr) argGroups) printExpr (IForeignCall _ _ _) = error "use IRawExpr for C++ foreign calls" printExpr (IRemoteCall _ _ _ _) = error "use IRawExpr for C++ remote calls" printExpr (ILambda args body) = "[&](" <> hsep (punctuate "," ["auto" <+> pretty a | a <- args]) <> "){return " <> printExpr body <> ";}" printExpr (IRawExpr d) = pretty d printExpr (IDoBlock e) = "[&](){return " <> printExpr e <> ";}" printExpr (IEval e) = printExpr e <> "()" printExpr (IIntrinsicHash schema e) = [idoc|_mlc_hash(#{printExpr e}, "#{pretty schema}")|] printExpr (IIntrinsicSave fmt schema e path) | fmt == "json" = [idoc|_mlc_save_json(#{printExpr e}, "#{pretty schema}", #{printExpr path})|] | fmt == "voidstar" = [idoc|_mlc_save_voidstar(#{printExpr e}, "#{pretty schema}", #{printExpr path})|] | otherwise = [idoc|_mlc_save(#{printExpr e}, "#{pretty schema}", #{printExpr path})|] printExpr (IIntrinsicLoad schema (Just t) path) = [idoc|_mlc_load<#{renderIType t}>("#{pretty schema}", #{printExpr path})|] printExpr (IIntrinsicLoad schema Nothing path) = [idoc|_mlc_load("#{pretty schema}", #{printExpr path})|] printExpr (IIntrinsicShow schema e) = [idoc|_mlc_show(#{printExpr e}, "#{pretty schema}")|] printExpr (IIntrinsicRead schema (Just t) e) = [idoc|_mlc_read<#{renderIType t}>("#{pretty schema}", #{printExpr e})|] printExpr (IIntrinsicRead schema Nothing e) = [idoc|_mlc_read("#{pretty schema}", #{printExpr e})|] printStmt :: IStmt -> MDoc printStmt (IAssign v Nothing e) = "auto" <+> pretty v <+> "=" <+> printExpr e <> ";" printStmt (IAssign v (Just t) e) = renderIType t <+> pretty v <+> "=" <+> printExpr e <> ";" -- C++ uses an indexed for loop with push_back printStmt (IMapList resultVar resultType iterVar collection bodyStmts yieldExpr) = vsep [ resultDecl , block 4 [idoc|for(size_t #{pretty iterVar}_idx = 0; #{pretty iterVar}_idx < #{printExpr collection}.size(); #{pretty iterVar}_idx++)|] ( vsep ( [idoc|auto #{pretty iterVar} = #{printExpr collection}[#{pretty iterVar}_idx];|] : map printStmt bodyStmts ++ [[idoc|#{pretty resultVar}.push_back(#{printExpr yieldExpr});|]] ) ) ] where resultDecl = case resultType of Just t -> [idoc|#{renderIType t} #{pretty resultVar};|] Nothing -> printStmt (IAssign resultVar Nothing (IListLit [])) printStmt (IIf resultVar resultType condExpr thenStmts thenExpr elseStmts elseExpr) = vsep [ resultDecl , block 4 [idoc|if(#{printExpr condExpr})|] (vsep (map printStmt thenStmts ++ [[idoc|#{pretty resultVar} = #{printExpr thenExpr};|]])) , block 4 "else" (vsep (map printStmt elseStmts ++ [[idoc|#{pretty resultVar} = #{printExpr elseExpr};|]])) ] where resultDecl = case resultType of Just t -> [idoc|#{renderIType t} #{pretty resultVar};|] Nothing -> [idoc|auto #{pretty resultVar};|] printStmt (IReturn e) = "return(" <> printExpr e <> ");" printStmt (IExprStmt e) = printExpr e <> ";" printStmt (IFunDef _ _ _ _) = error "IFunDef not yet implemented for C++ printer" printStmts :: [IStmt] -> [MDoc] printStmts = map printStmt -- | Render C++ dispatch functions from structured dispatch entries. printDispatch :: [DispatchEntry] -> [DispatchEntry] -> MDoc printDispatch locals remotes = [idoc|uint8_t* local_dispatch(uint32_t mid, const uint8_t** args){ switch(mid){ #{align (vsep localCases)} default: std::ostringstream oss; oss << "Invalid local manifold id: " << mid; throw std::runtime_error(oss.str()); } } uint8_t* remote_dispatch(uint32_t mid, const uint8_t** args){ switch(mid){ #{align (vsep remoteCases)} default: std::ostringstream oss; oss << "Invalid remote manifold id: " << mid; throw std::runtime_error(oss.str()); } }|] where localCases = map (makeCase "") locals remoteCases = map (makeCase "_remote") remotes makeCase :: MDoc -> DispatchEntry -> MDoc makeCase suffix (DispatchEntry i n) = "case" <+> pretty i <> ":" <+> "return" <+> manNamer i <> suffix <> tupled ["args[" <> pretty j <> "]" | j <- take n ([0 ..] :: [Int])] <> ";" -- | Assemble a complete C++ pool file from an IProgram and C++-specific extras. printProgram :: [MDoc] -> [MDoc] -> IProgram -> MDoc printProgram serialization signatures prog = format (DF.embededFileText (DF.poolTemplate "cpp")) "// <<>>" [ vsep (map pretty (ipSources prog)) , vsep serialization , vsep signatures , vsep (map pretty (ipManifolds prog)) , printDispatch (ipLocalDispatch prog) (ipRemoteDispatch prog) ] printTemplateHeader :: [MDoc] -> MDoc printTemplateHeader [] = "" printTemplateHeader ts = "template" <+> encloseSep "<" ">" "," ["class" <+> t | t <- ts] printRecordTemplate :: [MDoc] -> MDoc printRecordTemplate [] = "" printRecordTemplate ts = encloseSep "<" ">" "," ts -- | Render a C++ struct definition. printStructTypedef :: [MDoc] -> -- template parameters (e.g., ["T"]) MDoc -> -- the name of the structure (e.g., "Person") [(MDoc, MDoc)] -> -- key and type for all fields MDoc printStructTypedef params rname fields = vsep [template, struct] where template = printTemplateHeader params struct = block 4 ("struct" <+> rname) (vsep [t <+> k <> ";" | (k, t) <- fields]) <> ";" -- | Render a C++ serializer (toAnything) for a struct. printSerializer :: [MDoc] -> -- template parameters MDoc -> -- type of thing being serialized [(MDoc, MDoc)] -> -- key and type for all fields MDoc printSerializer params rtype fields = [idoc| #{printTemplateHeader params} void* toAnything(void* dest, void** cursor, const Schema* schema, const #{rtype}& obj) { return toAnything(dest, cursor, schema, std::make_tuple#{arguments}); } |] where arguments = tupled ["obj." <> key | (key, _) <- fields] -- | Render a C++ deserializer (fromAnything + get_shm_size) for a struct. printDeserializer :: Bool -> -- build object with constructor [MDoc] -> -- template parameters MDoc -> -- type of thing being deserialized [(MDoc, MDoc)] -> -- key and type for all fields MDoc printDeserializer _ params rtype fields = [idoc| #{printTemplateHeader params} #{block 4 header body} #{printTemplateHeader params} #{block 4 headerGetSize bodyGetSize} |] where header = [idoc|#{rtype} fromAnything(const Schema* schema, const void * anything, #{rtype}* dummy = nullptr, const void* base_ptr = nullptr)|] body = vsep $ [[idoc|#{rtype} obj;|]] <> zipWith assignFields [0 ..] fields <> ["return obj;"] assignFields :: Int -> (MDoc, MDoc) -> MDoc assignFields idx (keyName, keyType) = vsep [ [idoc|#{keyType}* elemental_dumby_#{keyName} = nullptr;|] , [idoc|obj.#{keyName} = fromAnything(schema->parameters[#{pretty idx}], (char*)anything + schema->offsets[#{pretty idx}], elemental_dumby_#{keyName}, base_ptr);|] ] headerGetSize = [idoc|size_t get_shm_size(const Schema* schema, const #{rtype}& data)|] bodyGetSize = vsep $ ["size_t size = 0;"] <> [getSize idx key | (idx, (key, _)) <- zip [0 ..] fields] <> ["return size;"] getSize :: Int -> MDoc -> MDoc getSize idx key = [idoc|size += get_shm_size(schema->parameters[#{pretty idx}], data.#{key});|] ================================================ FILE: executable/CppTranslator.hs ================================================ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE ViewPatterns #-} {- | Module : CppTranslator Description : Translate 'SerialManifold' trees into C++ pool source code Copyright : (c) Zebulun Arendsee, 2016-2026 License : Apache-2.0 Maintainer : z@morloc.io Stateful C++ translator using the two-phase IR architecture: lower the 'SerialManifold' tree into 'IStmt'/'IExpr' via 'LowerConfig', then print via 'CppPrinter'. Handles C++-specific concerns like compilation flags, include paths, struct generation, and template instantiation. -} module CppTranslator ( translate , cppLang ) where import Control.Monad.Identity (Identity, runIdentity) import qualified Control.Monad.State as CMS import qualified CppPrinter as CP import qualified Data.Char as DC import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import Morloc.CodeGenerator.Grammars.Common import Morloc.CodeGenerator.Grammars.Macro (expandMacro) import Morloc.CodeGenerator.Grammars.Translator.Imperative ( IType (..) , LowerConfig (..) , buildProgramM , defaultFoldRules , expandDeserialize , expandSerialize , toIType ) import Morloc.CodeGenerator.Namespace import Morloc.CodeGenerator.Serial ( serialAstToType , shallowType ) import Morloc.Data.Doc import qualified Morloc.Data.GMap as GMap import qualified Morloc.Data.Map as Map import qualified Morloc.Data.Text as MT import qualified Morloc.Language as ML import qualified Morloc.Monad as MM import qualified Morloc.Version as MV import Morloc.Quasi import qualified Morloc.System as MS import qualified Morloc.TypeEval as TE -- HACK: repeating these here is hacky -- This same data is repeated in cpp/lang.yaml cppLang :: ML.Lang cppLang = ML.Lang "cpp" "cpp" serialType :: MDoc serialType = "uint8_t*" data CallSemantics = Copy | Reference | ConstPtr class HasCppType a where cppTypeOf :: a -> CppTranslator MDoc cppArgOf :: CallSemantics -> Arg a -> CppTranslator MDoc setCallSemantics :: CallSemantics -> MDoc -> MDoc setCallSemantics Copy typestr = typestr setCallSemantics Reference typestr = "const" <+> typestr <> "&" setCallSemantics ConstPtr typestr = "const" <+> typestr chooseCallSemantics :: TypeM -> CallSemantics chooseCallSemantics Passthrough = ConstPtr -- const uint8_t* packet chooseCallSemantics (Serial _) = ConstPtr -- const uint8_t* packet chooseCallSemantics (Native _) = Reference -- for now, primitives should be pass by copy chooseCallSemantics (Function _ _) = Copy -- currently not used instance HasCppType TypeM where cppTypeOf (Serial _) = return serialType cppTypeOf (Native c) = cppTypeOf c cppTypeOf Passthrough = return serialType cppTypeOf (Function ts t) = do t' <- cppTypeOf t ts' <- mapM cppTypeOf ts return $ "std::function<" <> t' <> tupled ts' <> ">" cppArgOf s (Arg i t) = do typeStr <- cppTypeOf t let typeStrQualified = setCallSemantics s typeStr return $ case t of (Serial _) -> typeStrQualified <+> svarNamer i (Native _) -> typeStrQualified <+> nvarNamer i Passthrough -> typeStrQualified <+> svarNamer i (Function _ _) -> typeStrQualified <+> nvarNamer i instance HasCppType NativeManifold where cppTypeOf = cppTypeOf . typeMof cppArgOf s r = cppArgOf s $ fmap typeMof r instance {-# OVERLAPPABLE #-} (HasTypeF e) => HasCppType e where cppTypeOf = f . typeFof where f (UnkF (FV _ x)) = return $ pretty x f (VarF (FV _ x)) = return $ pretty x f (FunF ts t) = do t' <- f t ts' <- mapM f ts return $ "std::function<" <> t' <> tupled ts' <> ">" f (NatLitF _) = return mempty f (AppF t ts) = do t' <- f t let runtimeTs = [x | x <- ts, not (isNatLitF x)] ts' <- mapM f runtimeTs return . pretty $ expandMacro (render t') (map render ts') f t@(NamF _ (FV gc (CV "struct")) _ rs) = do recmap <- CMS.gets translatorRecmap -- handle autogenerated structs case lookup (FV gc (CV "struct"), map fst rs) recmap of (Just rec) -> do params <- typeParams (zip (map snd (recFields rec)) (map snd rs)) return $ recName rec <> params Nothing -> error $ "Record missing from recmap: " <> show t <> " from map: " <> show recmap f (NamF _ (FV _ (CV "arrow")) _ _) = return "mlc::ArrowTable" f (NamF _ (FV _ s) ps _) = do ps' <- mapM f ps return $ pretty s <> CP.printRecordTemplate ps' f (EffectF _ t) = do t' <- f t return $ "std::function<" <> t' <> "()" <> ">" f (OptionalF t) = do t' <- f t return $ "std::optional<" <> t' <> ">" isNatLitF (NatLitF _) = True isNatLitF _ = False cppArgOf s (Arg i t) = do t' <- cppTypeOf (typeFof t) return $ setCallSemantics s t' <+> nvarNamer i data CppTranslatorState = CppTranslatorState { translatorCounter :: Int , translatorRecmap :: RecMap , translatorSignatureSet :: Set.Set Int , translatorLocalManifoldSet :: Set.Set Int , translatorRemoteManifoldSet :: Set.Set Int , translatorCurrentManifold :: Int , translatorEffectLabels :: Map.Map Int (Set.Set Text) } instance Defaultable CppTranslatorState where defaultValue = CppTranslatorState { translatorCounter = 0 , translatorRecmap = [] , translatorSignatureSet = Set.empty , translatorLocalManifoldSet = Set.empty , translatorRemoteManifoldSet = Set.empty , translatorCurrentManifold = -1 -- -1 indicates we are not inside a manifold , translatorEffectLabels = Map.empty } type CppTranslator a = CMS.StateT CppTranslatorState Identity a type CppTranslatorM = CMS.StateT CppTranslatorState Identity getCounter :: CppTranslator Int getCounter = do s <- CMS.get let i = translatorCounter s CMS.put $ s {translatorCounter = translatorCounter s + 1} return i resetCounter :: CppTranslator () resetCounter = do s <- CMS.get CMS.put $ s {translatorCounter = 0} translate :: [Source] -> [SerialManifold] -> MorlocMonad Script translate srcs es = do -- scopeMap :: GMap Int MVar (Map.Map Lang Scope) scopeMap <- MM.gets stateConcreteTypedefs -- universalScopeMap :: GMap Int MVar Scope universalScopeMap <- MM.gets stateUniversalConcreteTypedefs effectMap <- MM.gets stateManifoldEffects -- Canonicalize C++ source paths once up front so that the #include -- directives emitted by makeCppCode and the -I flags emitted by -- makeTheMaker see exactly the same absolute paths. Before this, -- `#include "./src/foo.hpp"` could not be resolved against -- `-I/abs/src` because the `src/` prefix was duplicated. (srcs', _, _) <- handleFlagsAndPaths srcs let recmap = unifyRecords . concatMap collectRecords $ es translatorState = defaultValue {translatorRecmap = recmap, translatorEffectLabels = effectMap} code = CMS.evalState (makeCppCode srcs' es universalScopeMap scopeMap) translatorState maker <- makeTheMaker srcs' poolSubdir <- MM.getModuleName return $ Script { scriptBase = "pool" , scriptLang = cppLang , scriptCode = "." :/ Dir "pools" [Dir poolSubdir [File "pool.cpp" (Code (T.replace "__MORLOC_VERSION__" (MT.pack MV.versionStr) (render code)))]] , scriptMake = maker } makeCppCode :: [Source] -> [SerialManifold] -> Map.Map Lang Scope -> GMap Int MVar (Map.Map Lang Scope) -> CppTranslator MDoc makeCppCode srcs es univeralScopeMap scopeMap = do -- ([MDoc], [MDoc]) (srcDecl, srcSerial) <- generateSourcedSerializers univeralScopeMap scopeMap es -- write include statements for sources let includeDocs = map translateSource (unique . mapMaybe srcPath $ srcs) signatures <- concat <$> mapM makeSignature es (autoDecl, autoSerial) <- generateAnonymousStructs let serializationCode = autoDecl ++ srcDecl ++ autoSerial ++ srcSerial -- build the program (translates each manifold tree) program <- buildProgramM includeDocs es translateSegment -- create and return complete pool script return $ CP.printProgram serializationCode signatures program metaTypedefs :: GMap Int MVar (Map.Map Lang Scope) -> Int -> -- manifold index Scope metaTypedefs tmap i = case GMap.lookup i tmap of (GMapJust langmap) -> case Map.lookup cppLang langmap of (Just scope) -> Map.filter (not . null) scope Nothing -> Map.empty _ -> Map.empty -- | Collect TVar names of all named (non-anonymous) record types used -- in a SerialManifold tree. collectNamedRecordTVars :: SerialManifold -> Set.Set TVar collectNamedRecordTVars e0 = runIdentity $ foldWithSerialManifoldM fm e0 where fm = defaultValue { opFoldWithNativeExprM = nativeExpr , opFoldWithSerialExprM = serialExpr } nativeExpr _ (DeserializeN_ t s xs) = return $ Set.unions [xs, seekNamedRecs t, seekNamedRecs (serialAstToType s)] nativeExpr efull e = return $ foldlNE Set.union (seekNamedRecs (typeFof efull)) e serialExpr _ (SerializeS_ s xs) = return $ Set.union (seekNamedRecs (serialAstToType s)) xs serialExpr _ e = return $ foldlSE Set.union Set.empty e seekNamedRecs :: TypeF -> Set.Set TVar seekNamedRecs (NamF _ (FV v (CV c)) _ rs) | c /= "struct" = Set.insert v (Set.unions (map (seekNamedRecs . snd) rs)) seekNamedRecs (NamF _ _ _ rs) = Set.unions (map (seekNamedRecs . snd) rs) seekNamedRecs (FunF ts t) = Set.unions (map seekNamedRecs (t : ts)) seekNamedRecs (AppF t ts) = Set.unions (map seekNamedRecs (t : ts)) seekNamedRecs (EffectF _ t) = seekNamedRecs t seekNamedRecs (OptionalF t) = seekNamedRecs t seekNamedRecs _ = Set.empty makeTheMaker :: [Source] -> MorlocMonad [SysCommand] makeTheMaker srcs = do poolSubdir <- MM.getModuleName let outfile = pretty $ "pools" poolSubdir ML.makeExecutablePoolName cppLang let src = pretty $ "pools" poolSubdir ML.makeSourcePoolName cppLang (_, flags, includes) <- handleFlagsAndPaths srcs bconf <- MM.gets stateBuildConfig let sanitizeFlags = case buildConfigSanitize bconf of Just True -> ["-fsanitize=alignment", "-fno-sanitize-recover=alignment"] _ -> [] let incs = "-I." : [pretty ("-I" <> i) | i <- includes] let flags' = map pretty (flags ++ sanitizeFlags) let cmd = SysRun . Code . render $ [idoc|g++ -O2 -o #{outfile} #{src} #{hsep flags'} #{hsep incs}|] return [cmd] makeSignature :: SerialManifold -> CppTranslator [MDoc] makeSignature = foldWithSerialManifoldM fm where fm = defaultValue { opFoldWithSerialManifoldM = serialManifold , opFoldWithNativeManifoldM = nativeManifold } serialManifold (SerialManifold m _ form _ _) _ = manifoldSignature m serialType form nativeManifold e@(NativeManifold m _ form _) _ = do typestr <- cppTypeOf e manifoldSignature m typestr form manifoldSignature :: (HasTypeM t) => Int -> MDoc -> ManifoldForm (Or TypeS TypeF) t -> CppTranslator [MDoc] manifoldSignature i typestr form = do s <- CMS.get if Set.member i (translatorSignatureSet s) then return [] else do let formArgs = typeMofForm form args <- mapM (\r@(Arg _ t) -> cppArgOf (chooseCallSemantics t) r) formArgs CMS.put (s {translatorSignatureSet = Set.insert i (translatorSignatureSet s)}) return [typestr <+> manNamer i <> tupled args <> ";"] tupleKey :: Int -> MDoc -> MDoc tupleKey i v = [idoc|std::get<#{pretty i}>(#{v})|] recordAccess :: MDoc -> MDoc -> MDoc recordAccess record field = record <> "." <> field cppLowerConfig :: LowerConfig CppTranslatorM cppLowerConfig = LowerConfig { lcSrcName = \src -> pretty (srcName src) , lcTypeOf = \t -> Just . toIType <$> cppTypeOf t , lcSerialAstType = serializeTypeOf , lcDeserialAstType = \s -> Just . toIType <$> cppTypeOf (shallowType s) , lcRawDeserialAstType = rawTypeOf , lcTypeMOf = \_ -> return Nothing , lcPackerName = \src -> pretty (srcName src) , lcUnpackerName = \src -> pretty (srcName src) , lcRecordAccessor = \_ _ -> recordAccess , lcDeserialRecordAccessor = \i _ v -> tupleKey i v , lcTupleAccessor = tupleKey , lcNewIndex = getCounter , lcPrintExpr = CP.printExpr , lcPrintStmt = CP.printStmt , lcEvalPattern = \t p xs -> do state <- CMS.get return $ evaluatePattern state t p xs , lcListConstructor = \_ _ es -> encloseSep "{" "}" "," es , lcTupleConstructor = \_ -> ((<>) "std::make_tuple" . tupled) , lcRecordConstructor = \recType _ _ _ rs -> do t <- cppTypeOf recType idx <- getCounter let v' = "a" <> pretty idx decl = t <+> v' <+> "=" <+> encloseSep "{" "}" "," (map snd rs) <> ";" return $ defaultValue {poolExpr = v', poolPriorLines = [decl]} , lcForeignCall = \socketFile mid args -> let argList = [dquotes socketFile, pretty mid] <> args <> ["NULL"] in [idoc|foreign_call#{tupled argList}|] , lcRemoteCall = \socketFile mid res args -> do let resMem = pretty $ fromMaybe (-1) (remoteResourcesMemory res) resTime = pretty $ maybe (-1) unTimeInSeconds (remoteResourcesTime res) resCPU = pretty $ fromMaybe (-1) (remoteResourcesThreads res) resGPU = pretty $ fromMaybe 0 (remoteResourcesGpus res) cacheDir = ".morloc-cache" argList = encloseSep "{" "}" "," args setup = [idoc|resources_t resources = {#{resMem}, #{resTime}, #{resCPU}, #{resGPU}}; const uint8_t* args[] = #{argList}; char* errmsg = NULL;|] call = [idoc|remote_call( #{pretty mid}, #{dquotes socketFile}, #{dquotes cacheDir}, &resources, args, #{pretty (length args)}, &errmsg ); PROPAGATE_ERROR(errmsg)|] return $ defaultValue {poolExpr = call, poolPriorLines = [setup]} , lcMakeLet = \namer letIndex mt e1 e2 -> do typestr <- case mt of (Just t) -> cppTypeOf t Nothing -> return serialType return $ makeLet namer letIndex typestr e1 e2 , lcReturn = \e -> "return(" <> e <> ");" , lcMakeIf = \origExpr condDocs thenDocs elseDocs -> do idx <- getCounter let v = helperNamer idx typeStr <- cppTypeOf origExpr let condE = poolExpr condDocs thenE = poolExpr thenDocs elseE = poolExpr elseDocs thenBlock = poolPriorLines thenDocs <> [v <+> "=" <+> thenE <> ";"] elseBlock = poolPriorLines elseDocs <> [v <+> "=" <+> elseE <> ";"] decl = typeStr <+> v <> ";" ifStmt = vsep [ decl , "if" <+> parens condE <+> "{" , indent 4 (vsep thenBlock) , "} else {" , indent 4 (vsep elseBlock) , "}" ] return $ PoolDocs { poolCompleteManifolds = poolCompleteManifolds condDocs <> poolCompleteManifolds thenDocs <> poolCompleteManifolds elseDocs , poolExpr = v , poolPriorLines = poolPriorLines condDocs <> [ifStmt] , poolPriorExprs = poolPriorExprs condDocs <> poolPriorExprs thenDocs <> poolPriorExprs elseDocs } , lcMakeDoBlock = \t stmts expr -> let isUnit = case t of EffectF _ (VarF (FV tv _)) -> tv == TV "Unit" VarF (FV tv _) -> tv == TV "Unit" _ -> False in (,) [] $ case (isUnit, stmts) of (True, []) -> "[&](){" <> expr <> "; return mlc::Unit{};}" (True, _) -> "[&](){" <> nest 4 (line <> vsep (stmts <> [expr <> ";", "return mlc::Unit{};"])) <> line <> "}" (False, []) -> "[&](){return " <> expr <> ";}" (False, _) -> "[&](){" <> nest 4 (line <> vsep (stmts <> ["return " <> expr <> ";"])) <> line <> "}" , lcSerialize = \v s -> serialize v s , lcDeserialize = \t v s -> do typestr <- cppTypeOf t deserialize v typestr s , lcMakeFunction = \mname args manifoldType priorLines body headForm -> do callIndex <- CMS.gets translatorCurrentManifold state <- CMS.get let effectLabels = Map.findWithDefault Set.empty callIndex (translatorEffectLabels state) let alreadyDone = case headForm of (Just HeadManifoldFormRemoteWorker) -> Set.member callIndex (translatorRemoteManifoldSet state) _ -> Set.member callIndex (translatorLocalManifoldSet state) if alreadyDone then return Nothing else do case headForm of (Just HeadManifoldFormRemoteWorker) -> CMS.modify (\s -> s {translatorRemoteManifoldSet = Set.insert callIndex (translatorRemoteManifoldSet s)}) _ -> CMS.modify (\s -> s {translatorLocalManifoldSet = Set.insert callIndex (translatorLocalManifoldSet s)}) returnTypeStr <- returnType manifoldType typedArgs <- mapM (\r@(Arg _ t) -> cppArgOf (chooseCallSemantics t) r) args let fullName = mname <> mnameExt headForm decl = returnTypeStr <+> fullName <> tupled typedArgs enrichError = case headForm of Just HeadManifoldFormRemoteWorker -> True _ -> Set.member "Error" effectLabels tryBody = block 4 "try" (vsep $ priorLines <> [body]) catchBody | enrichError = let throwStatement = vsep [ [idoc|std::string error_message = "Error raised in C++ pool by #{mname}:\n" + std::string(e.what());|] , [idoc|throw std::runtime_error(error_message);|] ] in block 4 "catch (const std::exception& e)" throwStatement | otherwise = block 4 "catch (...)" "throw;" return . Just . block 4 decl . vsep $ [tryBody <+> catchBody] , lcMakeLambda = \mname contextArgs boundArgs -> let vs' = take (length boundArgs) (map (\j -> "std::placeholders::_" <> viaShow j) ([1 ..] :: [Int])) in [idoc|std::bind(#{cat (punctuate "," (mname : (contextArgs ++ vs')))})|] } where -- For serialization, records become tuples (that's what _put_value/toAnything expects) serializeTypeOf :: SerialAST -> CppTranslator (Maybe IType) serializeTypeOf (SerialObject _ _ _ rs) = Just . toIType <$> recordToCppTuple (map snd rs) serializeTypeOf s = Just . toIType <$> cppTypeOf (serialAstToType s) rawTypeOf :: SerialAST -> CppTranslator (Maybe IType) rawTypeOf (SerialObject _ _ _ rs) = Just . toIType <$> recordToCppTuple (map snd rs) rawTypeOf s = Just . toIType <$> cppTypeOf (serialAstToType s) makeLet :: (Int -> MDoc) -> Int -> MDoc -> PoolDocs -> PoolDocs -> PoolDocs makeLet namer letIndex typestr (PoolDocs ms1 e1 rs1 pes1) (PoolDocs ms2 e2 rs2 pes2) = let letAssignment = [idoc|#{typestr} #{namer letIndex} = #{e1};|] rs = rs1 <> [letAssignment] <> rs2 in PoolDocs { poolCompleteManifolds = ms1 <> ms2 , poolExpr = e2 , poolPriorLines = rs , poolPriorExprs = pes1 <> pes2 } mnameExt :: Maybe HeadManifoldForm -> MDoc mnameExt (Just HeadManifoldFormRemoteWorker) = "_remote" mnameExt _ = "" returnType :: TypeM -> CppTranslator MDoc returnType (Function _ t) = cppTypeOf t returnType t = cppTypeOf t -- Use `#include "foo.h"` rather than `#include ` translateSource :: -- | Path to a header (e.g., `$MORLOC_HOME/src/foo.h`) Path -> MDoc translateSource path = "#include" <+> (dquotes . pretty) path serialize :: MDoc -> SerialAST -> CppTranslator PoolDocs serialize v s = do (expr, stmts) <- expandSerialize cppLowerConfig v s return $ PoolDocs { poolCompleteManifolds = [] , poolExpr = CP.printExpr expr , poolPriorLines = map CP.printStmt stmts , poolPriorExprs = [] } -- reverse of serialize, parameters are the same deserialize :: MDoc -> MDoc -> SerialAST -> CppTranslator (MDoc, [MDoc]) deserialize varname0 typestr0 s0 = do (expr, stmts) <- expandDeserialize cppLowerConfig varname0 s0 let rendered = CP.printExpr expr if null stmts then return (rendered, []) else do schemaVar <- helperNamer <$> getCounter let final = [idoc|#{typestr0} #{schemaVar} = #{rendered};|] return (schemaVar, map CP.printStmt stmts ++ [final]) recordToCppTuple :: [SerialAST] -> CppTranslator MDoc recordToCppTuple ts = do tsDocs <- mapM (cppTypeOf . serialAstToType) ts return $ "std::tuple" <> encloseSep "<" ">" "," tsDocs translateSegment :: SerialManifold -> CppTranslator MDoc translateSegment m0 = do resetCounter e <- surroundFoldSerialManifoldM manifoldIndexer (defaultFoldRules cppLowerConfig) m0 return $ renderPoolDocs e where manifoldIndexer = makeManifoldIndexer (CMS.gets translatorCurrentManifold) (\i -> CMS.modify (\s -> s {translatorCurrentManifold = i})) -- handle string interpolation evaluatePattern :: CppTranslatorState -> TypeF -> Pattern -> [MDoc] -> MDoc evaluatePattern _ _ (PatternText s ss) xs = "interweave_strings" <> tupled [fragments, insertions] where fragments = encloseSep "{" "}" ", " (map (dquotes . pretty . escapeQuotes "\"" "\\\"" . escapeStringLit) (s : ss)) insertions = encloseSep "{" "}" ", " xs -- handle getters evaluatePattern _ _ (PatternStruct (ungroup -> [ss])) [m] = writeSelector m ss evaluatePattern _ _ (PatternStruct (ungroup -> sss)) [m] = encloseSep "{" "}" "," (map (writeSelector m) sss) evaluatePattern state0 t0 (PatternStruct s0) (m0 : xs0) = patternSetter makeTuple makeRecord accessTuple accessRecord m0 t0 s0 xs0 where makeTuple (AppF _ ts) xs = let tupleTypes = CMS.evalState (mapM cppTypeOf ts) state0 in "std::tuple" <> encloseSep "<" ">" "," tupleTypes <> tupled xs makeTuple _ _ = error "Unreachable" makeRecord _ xs = encloseSep "{" "}" ", " xs accessTuple _ m i = "std::get<" <> pretty i <> ">(" <> m <> ")" accessRecord _ d k = d <> "." <> pretty k evaluatePattern _ _ (PatternStruct _) [] = error "Unreachable illegal pattern" writeSelector :: MDoc -> [Either Int Text] -> MDoc writeSelector d [] = d writeSelector d (Right k : rs) = writeSelector (d <> "." <> pretty k) rs writeSelector d (Left i : rs) = writeSelector ("std::get<" <> pretty i <> ">" <> parens d) rs typeParams :: [(Maybe TypeF, TypeF)] -> CppTranslator MDoc typeParams ts = CP.printRecordTemplate <$> mapM cppTypeOf [t | (Nothing, t) <- ts] generateAnonymousStructs :: CppTranslator ([MDoc], [MDoc]) generateAnonymousStructs = do recmap <- CMS.gets translatorRecmap xs <- mapM makeSerializers (reverse . map snd $ recmap) return (concatMap fst xs, concatMap snd xs) where makeSerializers :: RecEntry -> CppTranslator ([MDoc], [MDoc]) makeSerializers rec = do let templateTerms = map (("T" <>) . pretty) ([1 ..] :: [Int]) rs' = zip templateTerms (recFields rec) let params = [t | (t, (_, Nothing)) <- rs'] rname = recName rec rtype = rname <> CP.printRecordTemplate [v | (v, (_, Nothing)) <- rs'] let fieldNames = [k | (_, (k, _)) <- rs'] fieldTypes <- mapM (\(t, v) -> maybeM t cppTypeOf v) [(t', v') | (t', (_, v')) <- rs'] let fields = [(pretty k, v) | (k, v) <- zip fieldNames fieldTypes] let structDecl = CP.printStructTypedef params rname fields serializer = CP.printSerializer params rtype fields deserializer = CP.printDeserializer False params rtype fields return ([structDecl], [serializer, deserializer]) -- monadic form of `maybe` function maybeM :: (Monad m) => a -> (b -> m a) -> Maybe b -> m a maybeM _ f (Just x) = f x maybeM x _ Nothing = return x generateSourcedSerializers :: Map.Map Lang Scope -> GMap Int MVar (Map.Map Lang Scope) -> [SerialManifold] -> -- all segments that can be called in this pool CppTranslator ( [MDoc] , [MDoc] ) generateSourcedSerializers univeralScopeMap scopeMap es0 = do perManifold <- Map.unions <$> mapM (foldSerialManifoldM fm) es0 scope <- case Map.lookup cppLang univeralScopeMap of (Just scope) -> return scope Nothing -> return Map.empty -- Supplement per-manifold typedefs with universal scope entries for named -- record types that appear in this pool but are missing from the per-manifold -- scope (happens in secondary C++ pools called via foreign_call). let usedTypes = Set.unions (map collectNamedRecordTVars es0) missingTypes = Set.difference usedTypes (Map.keysSet perManifold) supplemental = Map.filterWithKey (\k _ -> Set.member k missingTypes) scope typedef = Map.unionWith mergeScopes perManifold supplemental foldl groupQuad ([], []) . concat . Map.elems <$> Map.mapWithKeyM (makeSerials scope) typedef where -- given the universal map of scopes, pull out every one that is used in this subtree fm = defaultValue { opSerialManifoldM = \(SerialManifold_ i _ _ _ e) -> return $ Map.unionWith mergeScopes (metaTypedefs scopeMap i) e , opNativeManifoldM = \(NativeManifold_ i _ _ e) -> return $ Map.unionWith mergeScopes (metaTypedefs scopeMap i) e } -- there are likely to be repeats in the scopes, we only want the unique ones mergeScopes xs ys = unique (xs <> ys) groupQuad :: ([a], [a]) -> (a, a) -> ([a], [a]) groupQuad (xs, ys) (x, y) = (x : xs, y : ys) makeSerials :: Scope -> TVar -> [([Either (TVar, Kind) TypeU], TypeU, ArgDoc, Bool)] -> CppTranslator [(MDoc, MDoc)] makeSerials s v xs = catMaybes <$> mapM (makeSerial s v) xs makeSerial :: Scope -> TVar -> ([Either (TVar, Kind) TypeU], TypeU, ArgDoc, Bool) -> CppTranslator (Maybe (MDoc, MDoc)) makeSerial _ _ (_, NamU _ (TV "struct") _ _, _, _) = return Nothing makeSerial _ _ (_, NamU _ (TV "arrow") _ _, _, _) = return Nothing makeSerial scope _ (ps, NamU r (TV v) _ rs, _, _) = do params <- mapM (either (\(p, _) -> return $ "T" <> pretty p) (\_ -> return "XXX_FIXME")) ps let templateTerms = ["T" <> pretty p | Left (p, _) <- ps] rtype = pretty v <> CP.printRecordTemplate templateTerms rs' = map (second (evaluateTypeU scope)) rs fields = [(pretty k, showDefType ps (typeOf t)) | (k, t) <- rs'] serializer = CP.printSerializer params rtype fields deserializer = CP.printDeserializer (r == NamObject) params rtype fields return $ Just (serializer, deserializer) makeSerial _ _ _ = return Nothing evaluateTypeU :: Scope -> TypeU -> TypeU evaluateTypeU scope t = case TE.evaluateType scope t of (Left e) -> error $ show e (Right t') -> t' showDefType :: [Either (TVar, Kind) TypeU] -> Type -> MDoc showDefType ps (UnkT v) | any (\p -> either (\(tv, _) -> tv == v) (const False) p) ps = "T" <> pretty v | otherwise = pretty v showDefType ps (VarT v) | any (\p -> either (\(tv, _) -> tv == v) (const False) p) ps = "T" <> pretty v | otherwise = pretty v showDefType _ (FunT _ _) = error "Cannot serialize functions" showDefType _ (NamT _ v _ _) = pretty v showDefType _ (NatLitT _) = mempty showDefType ps (AppT (VarT (TV v)) ts) = pretty $ expandMacro v (map (render . showDefType ps) runtimeTs) where runtimeTs = [t | t <- ts, not (isNatLitT t)] showDefType _ (AppT _ _) = error "AppT is only OK with VarT, for now" showDefType _ (EffectT _ _) = error "Cannot show EffectT" showDefType _ (NatAddT _ _) = mempty showDefType _ (NatMulT _ _) = mempty showDefType _ (NatSubT _ _) = mempty showDefType _ (NatDivT _ _) = mempty showDefType ps (OptionalT t) = "std::optional<" <> showDefType ps t <> ">" isNatLitT (NatLitT _) = True isNatLitT _ = False -- C++ specific source handling (flags, headers, libraries) handleFlagsAndPaths :: [Source] -> MorlocMonad ([Source], [Text], [Path]) handleFlagsAndPaths srcs = do state <- MM.get let gccversion = gccVersionFlag . foldl max 0 . map packageCppVersion $ statePackageMeta state let explicitLibs = map ("-l" <>) . unique . concatMap packageDependencies $ statePackageMeta state (srcs', libflags, paths) <- fmap unzip3 . mapM flagAndPath . unique $ [s | s <- srcs, srcLang s == cppLang] home <- MM.asks configHome let mlcInclude = ["-I" <> home <> "/include"] mlcPch = ["-include", "morloc_pch.hpp"] mlcLib = ["-L" <> home <> "/lib", "-Wl,-rpath," <> home <> "/lib", "-lmorloc", "-lcppmorloc", "-lpthread"] return ( filter (isJust . srcPath) srcs' , [gccversion] <> explicitLibs ++ (map MT.pack . concat) (mlcPch : mlcInclude : mlcLib : libflags) , unique (catMaybes paths) ) gccVersionFlag :: Int -> Text gccVersionFlag i | i <= 17 = "-std=c++17" | otherwise = "-std=c++" <> MT.show' i flagAndPath :: Source -> MorlocMonad (Source, [String], Maybe Path) flagAndPath src@(Source _ srcL (Just p) _ _ _ _ _ _) | srcL == cppLang = case (MS.takeDirectory p, MS.dropExtensions (MS.takeFileName p), MS.takeExtensions p) of (".", base, "") -> do header <- lookupHeader base libFlags <- lookupLib base return (src {srcPath = Just header}, libFlags, Just (MS.takeDirectory header)) (dir, base, _) -> do libFlags <- lookupLib base absDir <- liftIO $ MS.canonicalizePath dir absPath <- liftIO $ MS.canonicalizePath p return (src {srcPath = Just absPath}, libFlags, Just absDir) where lookupHeader :: String -> MorlocMonad Path lookupHeader base = do home <- MM.asks configHome let allPaths = getHeaderPaths home base [".h", ".hpp", ".hxx"] existingPaths <- liftIO . fmap catMaybes . mapM getFile $ allPaths case existingPaths of (x : _) -> liftIO $ MS.canonicalizePath x [] -> MM.throwSystemError $ "Header file " <> pretty base <> ".* not found" lookupLib :: String -> MorlocMonad [String] lookupLib base = do home <- MM.asks configHome let libnamebase = filter DC.isAlphaNum (map DC.toLower base) let libname = "lib" <> libnamebase <> ".so" let allPaths = getLibraryPaths home base libname existingPaths <- liftIO . fmap catMaybes . mapM getFile $ allPaths case existingPaths of (libpath : _) -> do libdir <- liftIO . MS.canonicalizePath . MS.takeDirectory $ libpath return [ "-Wl,-rpath=" <> libdir , "-L" <> libdir , "-l" <> libnamebase ] [] -> return [] flagAndPath src@(Source _ srcL Nothing _ _ _ _ _ _) | srcL == cppLang = return (src, [], Nothing) flagAndPath _ = MM.throwSystemError $ "flagAndPath should only be called for C++ functions" getFile :: Path -> IO (Maybe Path) getFile x = do exists <- MS.doesFileExist x return $ if exists then Just x else Nothing getHeaderPaths :: Path -> String -> [String] -> [Path] getHeaderPaths lib base exts = [path <> ext | path <- paths, ext <- exts] where paths = map MS.joinPath [ [base] , ["include", base] , [base, base] , [lib, "include", base] , [lib, "src", base, base] , ["/usr/include", base] , ["/usr/local/include", base] ] getLibraryPaths :: Path -> String -> String -> [Path] getLibraryPaths lib base sofile = map MS.joinPath [ [sofile] , ["lib", sofile] , [base, sofile] , [lib, "lib", sofile] , [lib, "src", base, sofile] , [lib, "src", base, "lib", sofile] ] ================================================ FILE: executable/Main.hs ================================================ {- | Module : Main Description : Executable main module Copyright : (c) Zebulun Arendsee, 2016-2026 License : Apache-2.0 Maintainer : z@morloc.io -} module Main where import GHC.IO.Encoding (setLocaleEncoding, utf8) import Options.Applicative import Subcommands (runMorloc) import UI main :: IO () main = do setLocaleEncoding utf8 runMorloc =<< execParser opts ================================================ FILE: executable/Subcommands.hs ================================================ {-# LANGUAGE OverloadedStrings #-} {- | Module : Subcommands Description : Dispatch CLI subcommands and inject the translator callback Copyright : (c) Zebulun Arendsee, 2016-2026 License : Apache-2.0 Maintainer : z@morloc.io Implements each CLI subcommand (make, typecheck, install, init, dump) and defines the 'TranslateFn' callback that routes C++ to 'CppTranslator' and other languages to the generic translator. This is the dependency injection point that keeps translator code out of the library. -} module Subcommands (runMorloc) where import Control.Exception (SomeException, bracket, finally, try) import Data.Time.Clock (getCurrentTime) import Data.Time.Format (formatTime, defaultTimeLocale) import qualified CppTranslator import qualified Data.Aeson as JSON import qualified Data.ByteString.Lazy as BL import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.Text as T import qualified Data.Text.IO as TIO import Morloc (generatePools) import qualified Morloc as M import Morloc.CodeGenerator.Emit (TranslateFn) import qualified Morloc.CodeGenerator.Grammars.Translator.Generic as Generic import Morloc.CodeGenerator.Grammars.Translator.PseudoCode (pseudocodeSerialManifold) import Morloc.CodeGenerator.Namespace (SerialManifold (..)) import qualified Morloc.CodeGenerator.SystemConfig as MSC import qualified Morloc.Completion as Completion import qualified Morloc.Config as Config import Morloc.Data.Doc import qualified Morloc.Data.GMap as GMap import qualified Morloc.Data.Text as MT import qualified Morloc.Frontend.API as F import Morloc.Module (OverwriteProtocol (..), findMainLocFile) import qualified Morloc.Module as Mod import qualified Morloc.Monad as MM import Morloc.Namespace.Expr import Morloc.Namespace.Prim import Morloc.Namespace.State import Morloc.Namespace.Type import qualified Morloc.ProgramBuilder.Install as Install import Morloc.Typecheck.Internal (prettyTypeU) import System.Directory ( createDirectoryIfMissing , doesDirectoryExist , doesFileExist , getCurrentDirectory , listDirectory , removeDirectoryRecursive , removeFile , setCurrentDirectory ) import System.Exit (exitFailure, exitSuccess) import System.FilePath (dropExtension, takeDirectory, takeFileName) import System.IO (hPutStrLn, stderr) import System.IO.Temp (createTempDirectory) import qualified System.Process as SP import UI decodePackageMeta :: BL.ByteString -> Maybe PackageMeta decodePackageMeta = JSON.decode -- | Route each language to its translator. translator :: TranslateFn translator lang srcs es | lang == CppTranslator.cppLang = CppTranslator.translate srcs es | otherwise = Generic.translate lang srcs es runMorloc :: CliCommand -> IO () runMorloc args = do config <- getConfig args buildConfig <- Config.loadBuildConfig config let verbose = getVerbosity args runPassed <- case args of (CmdMake g) -> cmdMake g verbose config buildConfig (CmdInstall g) -> cmdInstall g verbose config buildConfig (CmdTypecheck g) -> cmdTypecheck g verbose config buildConfig (CmdDump g) -> cmdDump g verbose config buildConfig (CmdInit g) -> cmdInit g config (CmdList g) -> cmdList g config (CmdUninstall g) -> cmdUninstall g config (CmdNew g) -> cmdNew g (CmdEval g) -> cmdEval g verbose config buildConfig case runPassed of True -> exitSuccess False -> exitFailure -- | read the global morloc config file or return a default one getConfig :: CliCommand -> IO Config.Config getConfig (CmdMake g) = getConfig' (makeConfig g) (makeVanilla g) getConfig (CmdInstall g) = getConfig' (installConfig g) (installVanilla g) getConfig (CmdTypecheck g) = getConfig' (typecheckConfig g) (typecheckVanilla g) getConfig (CmdDump g) = getConfig' (dumpConfig g) (dumpVanilla g) getConfig (CmdInit g) = getConfig' (initConfig g) (initVanilla g) getConfig (CmdList g) = getConfig' (listConfig g) (listVanilla g) getConfig (CmdUninstall g) = getConfig' (uninstallConfig g) (uninstallVanilla g) getConfig (CmdEval g) = getConfig' (evalConfig g) (evalVanilla g) getConfig (CmdNew _) = getConfig' "" False getConfig' :: String -> Bool -> IO Config.Config getConfig' _ True = Config.loadMorlocConfig Nothing getConfig' "" _ = Config.loadMorlocConfig Nothing getConfig' filename _ = Config.loadMorlocConfig (Just filename) getVerbosity :: CliCommand -> Int getVerbosity (CmdMake g) = makeVerbose g getVerbosity (CmdInstall g) = installVerbose g getVerbosity (CmdTypecheck g) = typecheckVerbose g getVerbosity (CmdDump g) = dumpVerbose g getVerbosity (CmdInit g) = if initQuiet g then 0 else 1 getVerbosity (CmdList g) = listVerbose g getVerbosity (CmdEval g) = evalVerbose g getVerbosity (CmdUninstall _) = 0 getVerbosity (CmdNew _) = 0 readScript :: Bool -> String -> IO (Maybe Path, Code) readScript True code = return (Nothing, Code (MT.pack code)) readScript _ filename = do code <- MT.readFile filename return (Just filename, Code code) -- | Typecheck callback for module installation typecheckModuleFn :: FilePath -> MorlocMonad [(T.Text, T.Text)] typecheckModuleFn mainFile = do code <- liftIO $ MT.readFile mainFile -- Save current state, run typecheck in a clean sub-state savedState <- MM.get result <- MM.catchError ( do xs <- M.typecheckFrontend (Just mainFile) (Code code) st <- MM.get return [ (render (pretty v), render (pretty t)) | AnnoS (Idx i t) _ _ <- xs , Just v <- [Map.lookup i (stateName st)] ] ) (\_ -> return []) -- Restore state so module typechecking doesn't pollute the parent state MM.put savedState return result -- | Install a module cmdInstall :: InstallCommand -> Int -> Config.Config -> BuildConfig -> IO Bool cmdInstall args verbosity conf buildConfig = do userSources <- Map.fromList <$> mapM (\modstr -> do name <- Mod.extractModuleName modstr return (name, modstr)) moduleTexts let cmdInstall' = mapM ( \modstr -> Mod.installModule (installForce args) (installUseSSH args) libpath (Config.configPlaneCore conf) mayTypecheck userSources Set.empty Mod.ExplicitInstall modstr ) moduleTexts passed <- MM.runMorlocMonad Nothing verbosity conf buildConfig cmdInstall' >>= MM.writeMorlocReturn if passed && installBuild args then buildInstalledModules args verbosity conf buildConfig moduleTexts libpath else return passed where libpath = Config.configLibrary conf Config.configPlane conf moduleTexts = map MT.pack (installModuleStrings args) mayTypecheck = if installNoTypecheck args then Nothing else Just typecheckModuleFn -- | Build and install executables for installed modules buildInstalledModules :: InstallCommand -> Int -> Config.Config -> BuildConfig -> [T.Text] -> FilePath -> IO Bool buildInstalledModules args verbosity conf buildConfig moduleTexts libpath = do results <- mapM buildOne moduleTexts return (and results) where force = installForce args == ForceOverwrite buildOne modstr = do name <- T.unpack <$> Mod.extractModuleName modstr let moduleDir = libpath name mainFile <- findMainLocFile moduleDir name case mainFile of Nothing -> do putStrLn $ "Warning: no main.loc found for '" <> name <> "', skipping build" return True Just locFile -> do origDir <- getCurrentDirectory setCurrentDirectory moduleDir buildResult <- buildModuleExecutable locFile name verbosity conf buildConfig force `finally` setCurrentDirectory origDir return buildResult buildModuleExecutable locFile _name verbosity' config buildConfig' forceOverwrite = do code <- MT.readFile locFile makeAndInstall (Just locFile) Nothing (Code code) [] verbosity' config buildConfig' forceOverwrite -- | Compile a morloc program and optionally install it. -- Shared by `morloc make --install` and `morloc install --build`. makeAndInstall :: Maybe Path -> Maybe String -> Code -> [T.Text] -> Int -> Config.Config -> BuildConfig -> Bool -> IO Bool makeAndInstall path outfile code extraIncludes verbosity config buildConfig force = do let action = do MM.modify (\s -> s {stateInstall = True, stateInstallForce = force}) M.writeProgram translator path code result <- MM.runMorlocMonad outfile verbosity config buildConfig action passed <- MM.writeMorlocReturn result if passed then do let (_, finalState) = result -- Merge include fields from all loaded packages. -- Nothing = include everything (default mode). -- Just [...] = strict allowlist mode. pkgIncludes = map packageInclude (statePackageMeta finalState) mergedIncludes | not (null extraIncludes) = -- CLI --include flags force strict mode Just (concatMap (fromMaybe []) pkgIncludes ++ extraIncludes) | all (== Nothing) pkgIncludes = Nothing | otherwise = Just (concatMap (fromMaybe []) pkgIncludes) allSources = concat (GMap.elems (stateSources finalState)) directSourcePaths = [ p | Source{srcPath = Just p} <- allSources ] case stateInstallDir finalState of Nothing -> do putStrLn "Error: install directory was not set during compilation" return False Just installDir -> do let installName = takeFileName installDir packageRoot = case fmap takeDirectory path of Just "" -> "." Just d -> d Nothing -> "." -- Atomic install: clean up installDir on any failure so the user -- is not left with partial state requiring --force on retry. installResult <- try (do -- Only validate coverage in strict mode (explicit include patterns) case mergedIncludes of Just pats -> do Install.validateIncludeCoverage packageRoot pats directSourcePaths Nothing -> return () Install.installProgram (Config.configHome config) installDir installName mergedIncludes force ) :: IO (Either SomeException ()) case installResult of Right () -> return True Left e -> do dirExists <- doesDirectoryExist installDir if dirExists then do removeDirectoryRecursive installDir hPutStrLn stderr $ "Cleaned up partial install: " <> installDir else return () hPutStrLn stderr $ show e return False else return False -- | build a Morloc program, generating the nexus and pool files cmdMake :: MakeCommand -> Int -> Config.Config -> BuildConfig -> IO Bool cmdMake args verbosity config buildConfig = do (path, code) <- readScript (makeExpression args) (makeScript args) outfile <- case makeOutfile args of "" -> return Nothing x -> return . Just $ x if makeInstall args then makeAndInstall path outfile code (map T.pack (makeInclude args)) verbosity config buildConfig (makeForce args) else do let action = do MM.modify (\s -> s {stateInstall = False}) M.writeProgram translator path code result <- MM.runMorlocMonad outfile verbosity config buildConfig action passed <- MM.writeMorlocReturn result return passed -- | Evaluate a morloc expression cmdEval :: EvalCommand -> Int -> Config.Config -> BuildConfig -> IO Bool cmdEval args verbosity config buildConfig = do let rawExpr = evalExpression args code = MT.pack (preprocessEvalInput rawExpr) tmpBase = Config.configTmpDir config saveName = evalSave args extraArgs = evalArgs args isSave = not (null saveName) exeName = if isSave then saveName else "eval" createDirectoryIfMissing True tmpBase bracket (do origDir <- getCurrentDirectory tmpDir <- createTempDirectory tmpBase "morloc-eval-" setCurrentDirectory tmpDir return (origDir, tmpDir)) (\(origDir, tmpDir) -> do setCurrentDirectory origDir cleanupTmpDir tmpDir) (\(_origDir, tmpDir) -> do let action = do MM.modify (\s -> s {stateEvalMode = True}) if isSave then MM.modify (\s -> s {stateInstall = True}) else return () M.writeProgram translator Nothing (Code code) result <- MM.runMorlocMonad (Just exeName) verbosity config buildConfig action passed <- MM.writeMorlocReturn result if not passed then return False else if isSave then do let (_, finalState) = result pkgIncludes = map packageInclude (statePackageMeta finalState) mergedIncludes | all (== Nothing) pkgIncludes = Nothing | otherwise = Just (concatMap (fromMaybe []) pkgIncludes) case stateInstallDir finalState of Nothing -> do putStrLn "Error: install directory was not set during compilation" return False Just installDir -> do evalInstallResult <- try (do Install.installProgram (Config.configHome config) installDir saveName mergedIncludes True writeEvalMeta (Config.configHome config) saveName rawExpr ) :: IO (Either SomeException ()) case evalInstallResult of Right () -> return True Left e -> do dirExists <- doesDirectoryExist installDir if dirExists then do removeDirectoryRecursive installDir hPutStrLn stderr $ "Cleaned up partial install: " <> installDir else return () hPutStrLn stderr $ show e return False else do let exe = tmpDir exeName subcommand <- getFirstSubcommand exe let cmdArgs = subcommand : extraArgs runResult <- try (SP.callProcess exe cmdArgs) :: IO (Either SomeException ()) case runResult of Right () -> return True Left e -> do putStrLn $ "Error running expression: " ++ show e return False) where cleanupTmpDir dir = do exists <- doesDirectoryExist dir if exists then removeDirectoryRecursive dir else return () -- | Extract the first subcommand name from the manifest embedded in a wrapper script. -- Falls back to "__expr__" if the manifest cannot be parsed. getFirstSubcommand :: FilePath -> IO String getFirstSubcommand wrapperPath = do result <- try (readFile wrapperPath) :: IO (Either SomeException String) case result of Left _ -> return "__expr__" Right contents -> do let marker = "### MANIFEST ###" afterMarker = drop 1 $ dropWhile (/= marker) (lines contents) manifestStr = unlines afterMarker case JSON.eitherDecode (BL.fromStrict (MT.encodeUtf8 (MT.pack manifestStr))) of Right pm -> case pmCommands pm of (cmd : _) -> return (T.unpack (pcName cmd)) [] -> return "__expr__" Left _ -> return "__expr__" -- | Write metadata about the saved eval expression writeEvalMeta :: FilePath -> String -> String -> IO () writeEvalMeta cfgHome name expr = do now <- getCurrentTime let fdbDir = cfgHome "fdb" metaPath = fdbDir name ++ ".eval-meta" timestamp = formatTime defaultTimeLocale "%Y-%m-%dT%H:%M:%SZ" now json = "{\"expression\":" ++ jsonEscape expr ++ ",\"timestamp\":\"" ++ timestamp ++ "\"}" createDirectoryIfMissing True fdbDir writeFile metaPath json where jsonEscape s = "\"" ++ concatMap escChar s ++ "\"" escChar '"' = "\\\"" escChar '\\' = "\\\\" escChar '\n' = "\\n" escChar '\t' = "\\t" escChar c = [c] -- | Preprocess eval input: replace top-level semicolons with newlines. -- Semicolons inside explicit brace blocks (depth > 0) are preserved. -- Leading whitespace after each replacement is stripped so the layout -- rule treats each statement as a new top-level declaration. preprocessEvalInput :: String -> String preprocessEvalInput = go (0 :: Int) where go _ [] = [] go depth ('{' : rest) = '{' : go (depth + 1) rest go depth ('}' : rest) = '}' : go (max 0 (depth - 1)) rest go 0 (';' : rest) = '\n' : go 0 (dropWhile (== ' ') rest) go depth ('"' : rest) = '"' : goString depth rest go depth (c : rest) = c : go depth rest goString depth [] = go depth [] goString depth ('"' : rest) = '"' : go depth rest goString depth ('\\' : c : rest) = '\\' : c : goString depth rest goString depth (c : rest) = c : goString depth rest cmdTypecheck :: TypecheckCommand -> Int -> Config.Config -> BuildConfig -> IO Bool cmdTypecheck args _ config buildConfig = do (path, code) <- readScript (typecheckExpression args) (typecheckScript args) let verbosity = typecheckVerbose args if typecheckType args then case F.readType (unCode code) of (Left err') -> do putStrLn err' return False (Right x) -> do print x return True else if typecheckRealize args then do (passed, result) <- MM.runMorlocMonad Nothing verbosity config buildConfig ( M.typecheck path code >>= (generatePools . snd) ) |>> writeTypecheckOutput verbosity putDoc (result <> "\n") return passed else do (passed, result) <- MM.runMorlocMonad Nothing verbosity config buildConfig (M.typecheckFrontend path code) |>> writeFrontendTypecheckOutput verbosity putDoc (result <> "\n") return passed writeFrontendTypecheckOutput :: Int -> ((Either MorlocError [AnnoS (Indexed TypeU) Many Int], [MT.Text]), MorlocState) -> (Bool, MDoc) writeFrontendTypecheckOutput _ ((Left e, _), st) = (False, MM.makeMorlocError st e) writeFrontendTypecheckOutput 0 ((Right xs, _), st) = (True, vsep (map (writeFrontendTypes st) xs)) writeFrontendTypecheckOutput 1 x = writeFrontendTypecheckOutput 0 x -- no difference in verbosity writeFrontendTypecheckOutput _ _ = (False, "I don't know how to be that verbose") writeFrontendTypes :: MorlocState -> AnnoS (Indexed TypeU) Many Int -> MDoc writeFrontendTypes st (AnnoS (Idx i t) _ _) = case Map.lookup i (stateName st) of (Just v) -> pretty v <+> "::" <+> prettyTypeU t Nothing -> "? ::" <+> prettyTypeU t writeTypecheckOutput :: Int -> ((Either MorlocError [(Lang, [SerialManifold])], [MT.Text]), MorlocState) -> (Bool, MDoc) writeTypecheckOutput _ ((Left e, _), st) = (False, MM.makeMorlocError st e) writeTypecheckOutput _ ((Right pools, _), _) = (True, vsep $ map (uncurry writePool) pools) writePool :: Lang -> [SerialManifold] -> MDoc writePool lang manifolds = pretty lang <+> "pool:" <> "\n" <> vsep (map pseudocodeSerialManifold manifolds) <> "\n" cmdDump :: DumpCommand -> Int -> Config.Config -> BuildConfig -> IO Bool cmdDump args _ config buildConfig = do (path, code) <- readScript (dumpExpression args) (dumpScript args) let verbosity = dumpVerbose args ((x, _), st) <- MM.runMorlocMonad Nothing verbosity config buildConfig (F.parse path code) case x of (Left e) -> do putDoc $ MM.makeMorlocError st e return False (Right e) -> do putDoc $ prettyDAG e return True cmdInit :: InitCommand -> Config.Config -> IO Bool cmdInit ic config = MSC.configureAll (not (initQuiet ic)) (initForce ic) (initSlurmSupport ic) (initSanitize ic) config cmdNew :: NewCommand -> IO Bool cmdNew args = do let pkgFile = "package.yaml" exists <- doesFileExist pkgFile if exists then do hPutStrLn stderr "Error: package.yaml already exists. Remove it first or use a different directory." return False else do name <- if null (newName args) then takeFileName <$> getCurrentDirectory else return (newName args) writeFile pkgFile $ unlines [ "name: " ++ name , "version: 0.1.0" , "homepage: null" , "synopsis: null" , "description: null" , "category: null" , "license: MIT" , "author: null" , "maintainer: null" , "github: null" , "bug-reports: null" , "dependencies: []" , "# Uncomment to restrict which files are copied during install." , "# By default, all files are included (filtered by .morlocignore)." , "# include:" , "# - \"*.py\"" , "# - \"src/\"" ] hPutStrLn stderr $ "Created package.yaml for '" ++ name ++ "'" return True prettyDAG :: DAG MVar e ExprI -> MDoc prettyDAG m0 = vsep (map prettyEntry (Map.toList m0)) where prettyEntry :: (MVar, (ExprI, [(MVar, e)])) -> MDoc prettyEntry (k, (n, _)) = block 4 (pretty k) (vsep [pretty n]) -- ====================================================================== -- List command -- ====================================================================== -- Lightweight JSON types for reading manifests data ModuleManifest = ModuleManifest { mmName :: T.Text , mmVersion :: T.Text , mmSynopsis :: T.Text , mmExports :: [(T.Text, T.Text)] , mmMorlocDeps :: [T.Text] , mmReason :: T.Text } data ProgramManifest = ProgramManifest { pmName :: T.Text , pmCommands :: [ProgramCommand] } data ProgramCommand = ProgramCommand { pcName :: T.Text , pcReturnType :: T.Text , _pcArgSchemas :: [T.Text] } instance JSON.FromJSON ModuleManifest where parseJSON = JSON.withObject "ModuleManifest" $ \o -> ModuleManifest <$> o JSON..:? "name" JSON..!= "" <*> o JSON..:? "version" JSON..!= "" <*> o JSON..:? "synopsis" JSON..!= "" <*> (o JSON..:? "exports" JSON..!= [] >>= mapM parseExport) <*> o JSON..:? "morloc_dependencies" JSON..!= [] <*> o JSON..:? "install_reason" JSON..!= "" where parseExport = JSON.withObject "Export" $ \o -> (,) <$> o JSON..: "name" <*> o JSON..: "type" instance JSON.FromJSON ProgramManifest where parseJSON = JSON.withObject "ProgramManifest" $ \o -> ProgramManifest <$> o JSON..:? "name" JSON..!= "" <*> o JSON..:? "commands" JSON..!= [] instance JSON.FromJSON ProgramCommand where parseJSON = JSON.withObject "ProgramCommand" $ \o -> ProgramCommand <$> o JSON..: "name" <*> o JSON..:? "return_type" JSON..!= "" <*> o JSON..:? "arg_schemas" JSON..!= [] -- | Check if pattern is a subsequence of the target string (case-insensitive) subsequenceMatch :: String -> String -> Bool subsequenceMatch [] _ = True subsequenceMatch _ [] = False subsequenceMatch (p : ps) (t : ts) | toLower p == toLower t = subsequenceMatch ps ts | otherwise = subsequenceMatch (p : ps) ts cmdList :: ListCommand -> Config.Config -> IO Bool cmdList args config = do let fdbDir = Config.configHome config "fdb" libDir = Config.configLibrary config Config.configPlane config verbose = listVerbose args kind = listKind args pat = listPattern args -- Load module manifests allModules <- if kind /= Just ListPrograms then do mods <- loadModuleManifests fdbDir discovered <- discoverModules libDir fdbDir return (mods ++ discovered) else return [] -- Load program manifests allPrograms <- if kind /= Just ListModules then loadProgramManifests fdbDir else return [] -- Filter by pattern let modules = case pat of Nothing -> allModules Just p -> filter (\m -> subsequenceMatch p (T.unpack (mmName m))) allModules programs = case pat of Nothing -> allPrograms Just p -> filter (\m -> subsequenceMatch p (T.unpack (pmName m))) allPrograms -- For verbose mode, fill in exports from .loc files when manifest has none modules' <- if verbose > 0 then mapM (fillModuleExports libDir) modules else return modules -- Print results if null modules' && null programs then putStrLn "No installed modules or programs found." else do if not (null modules') then do putStrLn "Modules:" mapM_ (printModule verbose) modules' else return () if not (null programs) then do if not (null modules') then putStrLn "" else return () putStrLn "Programs:" mapM_ (printProgram verbose) programs else return () return True -- | If a module has no exports in its manifest, scan its .loc file for type signatures fillModuleExports :: FilePath -> ModuleManifest -> IO ModuleManifest fillModuleExports libDir m | not (null (mmExports m)) = return m | otherwise = do let modDir = libDir T.unpack (mmName m) modName = T.unpack (mmName m) mainFile <- findMainLocFile modDir modName case mainFile of Nothing -> return m Just f -> do sigs <- extractTypeSignatures f return m {mmExports = sigs} -- | Extract top-level type signatures from a .loc file extractTypeSignatures :: FilePath -> IO [(T.Text, T.Text)] extractTypeSignatures path = do result <- try (TIO.readFile path) :: IO (Either SomeException T.Text) case result of Left _ -> return [] Right content -> return . map parseSig . filter isTypeSig . T.lines $ content where isTypeSig ln = let stripped = T.stripStart ln in not (T.null stripped) && T.head stripped /= '-' -- not a comment && T.head stripped /= '{' -- not a block comment && T.isInfixOf " :: " stripped && not (T.isPrefixOf "type " stripped) && not (T.isPrefixOf "source " stripped) && not (T.isPrefixOf "import " stripped) && not (T.isPrefixOf "module " stripped) && not (T.isPrefixOf "class " stripped) && not (T.isPrefixOf "instance " stripped) parseSig ln = let (sigName, rest) = T.breakOn " :: " (T.stripStart ln) typ = T.strip (T.drop 4 rest) -- drop " :: " in (T.strip sigName, typ) loadModuleManifests :: FilePath -> IO [ModuleManifest] loadModuleManifests fdbDir = do result <- try (listDirectory fdbDir) :: IO (Either SomeException [FilePath]) case result of Left _ -> return [] Right entries -> do let moduleFiles = filter (".module" `isSuffixOf`) entries catMaybes <$> mapM ( \f -> do r <- try (BL.readFile (fdbDir f)) :: IO (Either SomeException BL.ByteString) case r of Left _ -> return Nothing Right bs -> case JSON.eitherDecode bs of Right m -> return (Just m) Left _ -> return Nothing ) moduleFiles loadProgramManifests :: FilePath -> IO [ProgramManifest] loadProgramManifests fdbDir = do result <- try (listDirectory fdbDir) :: IO (Either SomeException [FilePath]) case result of Left _ -> return [] Right entries -> do let manifestFiles = filter (".manifest" `isSuffixOf`) entries catMaybes <$> mapM ( \f -> do r <- try (BL.readFile (fdbDir f)) :: IO (Either SomeException BL.ByteString) case r of Left _ -> return Nothing Right bs -> case JSON.eitherDecode bs of Right m -> let m' = if T.null (pmName m) then m {pmName = T.pack (dropExtension (takeFileName f))} else m in return (Just m') Left _ -> return Nothing ) manifestFiles -- | Discover modules in the library that lack manifests discoverModules :: FilePath -> FilePath -> IO [ModuleManifest] discoverModules libDir fdbDir = do libExists <- doesDirectoryExist libDir if not libExists then return [] else do entries <- listDirectory libDir catMaybes <$> mapM ( \name -> do let manifestPath = fdbDir name ++ ".module" moduleDir = libDir name hasManifest <- doesFileExist manifestPath isDir <- doesDirectoryExist moduleDir if hasManifest || not isDir then return Nothing else do -- Try to read package.yaml for basic info let pkgYaml = moduleDir "package.yaml" pkgExists <- doesFileExist pkgYaml if pkgExists then do r <- try (BL.readFile pkgYaml) :: IO (Either SomeException BL.ByteString) case r of Left _ -> return (Just (minimalManifest name)) Right bs -> case decodePackageMeta bs of Just meta -> return . Just $ ModuleManifest { mmName = if T.null (packageName meta) then T.pack name else packageName meta , mmVersion = packageVersion meta , mmSynopsis = packageSynopsis meta , mmExports = [] , mmMorlocDeps = [] , mmReason = "" } Nothing -> return (Just (minimalManifest name)) else return (Just (minimalManifest name)) ) entries where minimalManifest name = ModuleManifest { mmName = T.pack name , mmVersion = "" , mmSynopsis = "" , mmExports = [] , mmMorlocDeps = [] , mmReason = "" } printModule :: Int -> ModuleManifest -> IO () printModule verbose m = do let name = mmName m ver = if T.null (mmVersion m) then "" else " " <> T.unpack (mmVersion m) syn = if T.null (mmSynopsis m) then "" else " " <> T.unpack (mmSynopsis m) putStrLn $ " " <> T.unpack name <> ver <> syn if verbose > 0 then mapM_ (\(n, t) -> putStrLn $ " " <> T.unpack n <> " :: " <> T.unpack t) (mmExports m) else return () printProgram :: Int -> ProgramManifest -> IO () printProgram verbose p = do let name = pmName p cmds = pmCommands p cmdCount = length cmds summary = show cmdCount <> " command" <> (if cmdCount /= 1 then "s" else "") putStrLn $ " " <> T.unpack name <> " " <> summary if verbose > 0 then mapM_ (\c -> putStrLn $ " " <> T.unpack (pcName c) <> " :: " <> T.unpack (pcReturnType c)) cmds else return () -- ====================================================================== -- Uninstall command -- ====================================================================== cmdUninstall :: UninstallCommand -> Config.Config -> IO Bool cmdUninstall args config = do let fdbDir = Config.configHome config "fdb" libDir = Config.configLibrary config Config.configPlane config binDir = Config.configHome config "bin" exeDir = Config.configHome config "exe" dryRun = uninstallDryRun args kind = uninstallKind args names <- if uninstallAll args then do fdbExists <- doesDirectoryExist fdbDir if not fdbExists then return [] else do entries <- listDirectory fdbDir let moduleNames = [dropExtension f | f <- entries, ".module" `isSuffixOf` f] return moduleNames else return (uninstallNames args) if null names then do if uninstallAll args then putStrLn "No modules installed" else putStrLn "No module names specified. Use --all to uninstall all modules." return True else do let skipDepCheck = uninstallAll args allPassed <- mapM (\name -> uninstallOne fdbDir libDir binDir exeDir dryRun skipDepCheck kind name) names let anyRemoved = or allPassed -- Regenerate completions if anything was actually removed if anyRemoved && not dryRun then Completion.regenerateCompletions False (Config.configHome config) else return () return True uninstallOne :: FilePath -> FilePath -> FilePath -> FilePath -> Bool -> Bool -> Maybe ListKind -> String -> IO Bool uninstallOne fdbDir libDir binDir exeDir dryRun skipDepCheck kind name = do let moduleManifest = fdbDir name ++ ".module" programManifest = fdbDir name ++ ".manifest" moduleDir = libDir name hasModule <- doesFileExist moduleManifest hasModuleDir <- doesDirectoryExist moduleDir hasProgram <- doesFileExist programManifest let removeModule = (hasModule || hasModuleDir) && kind /= Just ListPrograms removeProgram = hasProgram && kind /= Just ListModules if not removeModule && not removeProgram then do putStrLn $ "Nothing found for '" <> name <> "'" return False else do -- Reverse dependency check for modules (skip when uninstalling all) if removeModule then do if not skipDepCheck then checkReverseDeps fdbDir name else return () if dryRun then do putStrLn $ "Would uninstall module '" <> name <> "'" if hasModuleDir then putStrLn $ " Remove: " <> moduleDir else return () if hasModule then putStrLn $ " Remove: " <> moduleManifest else return () else do if hasModuleDir then removeDirectoryRecursive moduleDir else return () if hasModule then removeFile moduleManifest else return () hPutStrLn stderr $ "Uninstalled module '" <> name <> "'" else return () if removeProgram then do let binPath = binDir name binExists <- doesFileExist binPath if dryRun then do putStrLn $ "Would uninstall program '" <> name <> "'" if binExists then putStrLn $ " Remove: " <> binPath else return () -- Check for exe dir exeDirPath <- findExeDir exeDir name case exeDirPath of Just d -> putStrLn $ " Remove: " <> d Nothing -> return () putStrLn $ " Remove: " <> programManifest else do if binExists then removeFile binPath else return () exeDirPath <- findExeDir exeDir name case exeDirPath of Just d -> removeDirectoryRecursive d Nothing -> return () removeFile programManifest hPutStrLn stderr $ "Uninstalled program '" <> name <> "'" else return () return (removeModule || removeProgram) -- | Find the exe directory for a program findExeDir :: FilePath -> String -> IO (Maybe FilePath) findExeDir exeDir name = do exists <- doesDirectoryExist exeDir if not exists then return Nothing else do entries <- listDirectory exeDir -- Look for name or name- let matches = filter (\e -> e == name || (name ++ "-") `isPrefixOf'` e) entries case matches of (m : _) -> return (Just (exeDir m)) [] -> return Nothing where isPrefixOf' prefix str = take (length prefix) str == prefix -- | Check if any other modules depend on the one being uninstalled checkReverseDeps :: FilePath -> String -> IO () checkReverseDeps fdbDir name = do result <- try (listDirectory fdbDir) :: IO (Either SomeException [FilePath]) case result of Left _ -> return () Right entries -> do let moduleFiles = filter (".module" `isSuffixOf`) entries nameT = T.pack name forM_ moduleFiles $ \f -> do r <- try (BL.readFile (fdbDir f)) :: IO (Either SomeException BL.ByteString) case r of Left _ -> return () Right bs -> case JSON.eitherDecode bs :: Either String ModuleManifest of Right m | nameT `elem` mmMorlocDeps m && mmName m /= nameT -> putStrLn $ "Warning: module '" <> T.unpack (mmName m) <> "' depends on '" <> name <> "'" _ -> return () ================================================ FILE: executable/UI.hs ================================================ {- | Module : UI Description : CLI argument parsing with optparse-applicative Copyright : (c) Zebulun Arendsee, 2016-2026 License : Apache-2.0 Maintainer : z@morloc.io Defines the command-line interface for the @morloc@ executable using optparse-applicative: subcommands (make, typecheck, install, init, dump, completion), their options, and help text. -} module UI ( opts , CliCommand (..) , MakeCommand (..) , InitCommand (..) , InstallCommand (..) , TypecheckCommand (..) , DumpCommand (..) , ListCommand (..) , ListKind (..) , UninstallCommand (..) , NewCommand (..) , EvalCommand (..) ) where import Morloc.Module (GitProtocol (..), OverwriteProtocol (..)) import Morloc.Version (versionStr) import Options.Applicative import qualified Options.Applicative.Extra as OAE opts :: ParserInfo CliCommand opts = info (cliParser <**> helper <**> OAE.simpleVersioner versionStr) ( fullDesc <> progDesc "Call 'morloc make -h', 'morloc install -h', etc for details" <> header ("morloc v" <> versionStr) ) data CliCommand = CmdMake MakeCommand | CmdInstall InstallCommand | CmdUninstall UninstallCommand | CmdList ListCommand | CmdTypecheck TypecheckCommand | CmdDump DumpCommand | CmdInit InitCommand | CmdNew NewCommand | CmdEval EvalCommand cliParser :: Parser CliCommand cliParser = hsubparser ( makeSubcommand <> installSubcommand <> uninstallSubcommand <> listSubcommand <> typecheckSubcommand <> dumpSubcommand <> initSubcommand <> newSubcommand <> evalSubcommand ) data MakeCommand = MakeCommand { makeExpression :: Bool , makeConfig :: String , makeVerbose :: Int , makeVanilla :: Bool , makeOutfile :: String , makeInstall :: Bool , makeForce :: Bool , makeInclude :: [String] , makeScript :: String } makeCommandParser :: Parser MakeCommand makeCommandParser = MakeCommand <$> optExpression <*> optConfig <*> optVerbose <*> optVanilla <*> optOutfile <*> optMakeInstall <*> optMakeForce <*> optMakeInclude <*> optScript makeSubcommand :: Mod CommandFields CliCommand makeSubcommand = command "make" (info (CmdMake <$> makeCommandParser) (progDesc "Build a morloc script")) data InitCommand = InitCommand { initConfig :: String , initQuiet :: Bool , initVanilla :: Bool , initForce :: OverwriteProtocol , initSlurmSupport :: Bool , initSanitize :: Bool } initCommandParser :: Parser InitCommand initCommandParser = InitCommand <$> optConfig <*> optQuiet <*> optVanilla <*> optForce <*> optSlurmSupport <*> optSanitize initSubcommand :: Mod CommandFields CliCommand initSubcommand = command "init" (info (CmdInit <$> initCommandParser) (progDesc "Initialize morloc environment")) data NewCommand = NewCommand { newName :: String } newCommandParser :: Parser NewCommand newCommandParser = NewCommand <$> strArgument ( metavar "NAME" <> value "" <> help "Package name (defaults to current directory name)" ) newSubcommand :: Mod CommandFields CliCommand newSubcommand = command "new" (info (CmdNew <$> newCommandParser) (progDesc "Create a new morloc package")) data InstallCommand = InstallCommand { installConfig :: String , installVanilla :: Bool , installVerbose :: Int , installForce :: OverwriteProtocol , installUseSSH :: GitProtocol , installNoTypecheck :: Bool , installBuild :: Bool , installModuleStrings :: [String] } makeInstallParser :: Parser InstallCommand makeInstallParser = InstallCommand <$> optConfig <*> optVanilla <*> optVerbose <*> optForce <*> optUseSSH <*> optNoTypecheck <*> optInstallBuild <*> optModuleStrings installSubcommand :: Mod CommandFields CliCommand installSubcommand = command "install" (info (CmdInstall <$> makeInstallParser) (progDesc "Install a morloc module")) data TypecheckCommand = TypecheckCommand { typecheckConfig :: String , typecheckVanilla :: Bool , typecheckType :: Bool , typecheckRaw :: Bool , typecheckExpression :: Bool , typecheckVerbose :: Int , typecheckRealize :: Bool , typecheckScript :: String } makeTypecheckParser :: Parser TypecheckCommand makeTypecheckParser = TypecheckCommand <$> optConfig <*> optVanilla <*> optType <*> optRaw <*> optExpression <*> optVerbose <*> optRealize <*> optScript typecheckSubcommand :: Mod CommandFields CliCommand typecheckSubcommand = command "typecheck" (info (CmdTypecheck <$> makeTypecheckParser) (progDesc "Typecheck a morloc program")) dumpSubcommand :: Mod CommandFields CliCommand dumpSubcommand = command "dump" (info (CmdDump <$> makeDumpParser) (progDesc "Dump parsed code")) data DumpCommand = DumpCommand { dumpConfig :: String , dumpVanilla :: Bool , dumpVerbose :: Int , dumpExpression :: Bool , dumpScript :: String } makeDumpParser :: Parser DumpCommand makeDumpParser = DumpCommand <$> optConfig <*> optVanilla <*> optVerbose <*> optExpression <*> optScript data ListKind = ListModules | ListPrograms deriving (Show, Eq) data ListCommand = ListCommand { listPattern :: Maybe String , listConfig :: String , listVanilla :: Bool , listVerbose :: Int , listKind :: Maybe ListKind } makeListParser :: Parser ListCommand makeListParser = ListCommand <$> optListPattern <*> optConfig <*> optVanilla <*> optVerbose <*> optListKind listSubcommand :: Mod CommandFields CliCommand listSubcommand = command "list" (info (CmdList <$> makeListParser) (progDesc "List installed modules and programs")) data UninstallCommand = UninstallCommand { uninstallNames :: [String] , uninstallConfig :: String , uninstallVanilla :: Bool , uninstallKind :: Maybe ListKind , uninstallDryRun :: Bool , uninstallAll :: Bool } makeUninstallParser :: Parser UninstallCommand makeUninstallParser = UninstallCommand <$> optUninstallNamesOrNone <*> optConfig <*> optVanilla <*> optUninstallKind <*> optDryRun <*> optUninstallAll uninstallSubcommand :: Mod CommandFields CliCommand uninstallSubcommand = command "uninstall" (info (CmdUninstall <$> makeUninstallParser) (progDesc "Uninstall a module or program")) optExpression :: Parser Bool optExpression = switch ( long "expression" <> short 'e' <> help "Read script as string rather than file" ) optVanilla :: Parser Bool optVanilla = switch ( long "vanilla" <> help "Ignore local configuration files" ) optForce :: Parser OverwriteProtocol optForce = flag DoNotOverwrite ForceOverwrite ( long "force" <> short 'f' <> help "Overwrite files if they already exist" ) optUseSSH :: Parser GitProtocol optUseSSH = flag HttpsProtocol SshProtocol ( long "ssh" <> help "Use SSH protocol for remote git access" ) optNoTypecheck :: Parser Bool optNoTypecheck = switch ( long "no-typecheck" <> help "Skip typechecking during install" ) optInstallBuild :: Parser Bool optInstallBuild = switch ( long "build" <> short 'b' <> help "Build and install executable after module install" ) optModuleStrings :: Parser [String] optModuleStrings = some -- one or more . strArgument $ ( metavar "INSTALL" <> help "Module install strings" ) optRaw :: Parser Bool optRaw = switch ( long "raw" <> help "Print raw objects" ) optSlurmSupport :: Parser Bool optSlurmSupport = switch ( long "slurm" <> help "Allow use of SLURM for remote jobs" ) optSanitize :: Parser Bool optSanitize = switch ( long "sanitize" <> help "Enable alignment sanitizer for debugging memory layout issues" ) optVerbose :: Parser Int optVerbose = length <$> many (flag' () (short 'v')) optQuiet :: Parser Bool optQuiet = switch ( long "quiet" <> short 'q' <> help "Print minimal output to STDERR" ) optRealize :: Parser Bool optRealize = switch ( long "realize" <> short 'r' <> help "Typecheck the composition realizations" ) optConfig :: Parser String optConfig = strOption ( long "config" <> metavar "CONFIG" <> value "" <> help "Use this config rather than the one in morloc home" ) optOutfile :: Parser String optOutfile = strOption ( long "outfile" <> short 'o' <> metavar "OUT" <> value "" <> showDefault <> help "The name of the generated executable" ) optMakeInstall :: Parser Bool optMakeInstall = switch ( long "install" <> help "Install module to PATH" ) optMakeForce :: Parser Bool optMakeForce = switch ( long "force" <> short 'f' <> help "Overwrite existing install" ) optMakeInclude :: Parser [String] optMakeInclude = many ( strOption ( long "include" <> metavar "PATTERN" <> help "File pattern to include in install" ) ) optScript :: Parser String optScript = argument str (metavar "