Repository: jamii/dida Branch: main Commit: 18305fdd8647 Files: 37 Total size: 305.7 KB Directory structure: gitextract_do_r7o6p/ ├── .gitignore ├── .gitmodules ├── README.md ├── bindings/ │ ├── js_common.zig │ ├── node/ │ │ ├── README.md │ │ ├── abi.zig │ │ ├── build.zig │ │ ├── codegen.zig │ │ ├── package.json │ │ ├── runtime.zig │ │ └── shell.nix │ └── wasm/ │ ├── README.md │ ├── abi.js │ ├── abi.zig │ ├── build.zig │ ├── codegen.zig │ ├── runtime.zig │ └── shell.nix ├── debugger/ │ ├── .gitignore │ ├── README.md │ ├── build.zig │ ├── main.zig │ └── shell.nix ├── dependencies.nix ├── docs/ │ └── why.md ├── examples/ │ ├── core.html │ ├── core.js │ └── sugar.zig ├── lib/ │ ├── dida/ │ │ ├── core.zig │ │ ├── debug.zig │ │ ├── sugar.zig │ │ └── util.zig │ └── dida.zig ├── shell.nix ├── test/ │ ├── core.zig │ └── should_panic/ │ └── reentry.zig └── test.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ zig-cache zig-out ================================================ FILE: .gitmodules ================================================ [submodule "native-debugger/ZT"] path = debugger/native-debugger/ZT url = git@github.com:jamii/ZT.git [submodule "debugger/ZT"] path = debugger/ZT url = git@github.com:jamii/ZT.git ================================================ FILE: README.md ================================================ Dida is a (WIP) library for streaming, incremental, iterative, internally-consistent computation on time-varying collections. The jargon-free version: You write code that manipulates collections using familiar operations like `map`, `join` and `loop`. You run the code on some input and get some output. Then when the input changes, you get changes to the output, much faster than recomputing the whole thing from scratch. (And the outputs will be [correct](https://scattered-thoughts.net/writing/internal-consistency-in-streaming-systems/)!) If you want to learn how it works, start at [docs/why.md](./docs/why.md) and then read [lib/dida/core.zig](./lib/dida/core.zig). ## Design Dida is heavily based on [differential dataflow](https://github.com/TimelyDataflow/differential-dataflow/) and is informed by experience using differential dataflow as a backend at [materialize](https://materialize.com/). Compared to differential dataflow, dida aims to: * [ ] Be [easier to understand](https://scattered-thoughts.net/writing/why-isnt-differential-dataflow-more-popular/). (Differential dataflow aims to be as flexible, extensible and composable as possible, which makes sense for a research platform but can make the code very difficult to follow.) * [x] Tentatively aim to keep the core under 3kloc. (For comparison timely dataflow and differential dataflow total 14-16kloc depending on which components you count). * [x] Only implement as much of [timely dataflow](https://github.com/TimelyDataflow/timely-dataflow/) as is needed to support the features in differential dataflow. * [x] Only support timestamps which are products of integers. (Differential dataflow supports timestamps which are arbitary lattices, but I haven't yet seen any uses that can't be encoded as products of integers.) * [x] Use a simpler progress tracking algorithm which doesn't require path summaries, multiple outputs per node or internal edges within nodes. * [x] Store all state associated with the dataflow in a single flat structure for easy introspection. * [x] Optionally log all actions to enable debugging and vizualization. * [ ] Be [easier to use](https://scattered-thoughts.net/writing/why-isnt-differential-dataflow-more-popular/). * [ ] Provide an api layer that locks in reasonable defaults instead of exposing maximum flexibility. * [x] Expose the storage system and event loop so that it is clear where data is stored and when computation happens. * [ ] Provide well-documented default implementations for common tasks (eg writing output to a file). * [ ] Better support use as an interpreter backend and for binding to other languages. * [x] Split the api into a data-centric runtime-checked core, and a per-binding-language strongly-typed sugar that helps make dataflows correct-by-construction. (The differential dataflow api provides excellent compile-time safety but is hard to map across FFI into a language with a different type system.) * [ ] Don't rely on specialization for performance, since it requires compilation and also doesn't work well cross-language. This will require rethinking eg how functions are lifted over batches. * [ ] Support storing data inline in indexes when the size is only known at runtime. (Differential dataflow can support this, but materialize currently stores each row in a separate heap allocation even if the row is all fixed-width types.) * [ ] Support reference-counted values without adding overhead for non-reference-counted values. This is needed for eg pinning javascript objects but also helps reduce memory usage in string-heavy dataflows. (Materialize could reference-count eg strings but would then pay for the row-by-row Drop impl on all data, not just string data.) * [ ] Support being embedded in another event loop. (Differential dataflow can be run a step at a time, but the amount of work per step is not bounded so it can block the event loop for arbitrary amounts of time.) * [ ] Support cpu and memory limits. This makes it much easier to safely support live evaluation eg embedding a query repl in a tracing dashboard. * [ ] Provide interactive graphical debuggers for every component. (Many of the complaints I've heard about differential dataflow are about struggling to understand where state is stored, when things happen, how different ways of writing a computation affect performance etc. Debuggers can answer this question directly, but I suspect will also help by teaching useful mental representations.) * [ ] Write a short book that uses the debuggers to explain both the theory of differential dataflow and the details of this implementation. (Differential dataflow suffers from having the theory spread across myriad papers with limited space and which each describe different versions of the algorithms.) ## Implementation * [ ] Core * [ ] Data * [ ] Treat data as untyped bytes in the core * [x] Timestamps * [x] Frontiers * [ ] Change batches * [x] Sort and coalesce changes * [ ] Use columnar storage * [ ] Only store one copy of each row, regardless of number of timestamps/diffs * [ ] Nodes * [x] Input * [x] Output * [ ] Map * [x] Basic logic * [ ] Replace with general linear operator * [x] TimestampPush/Increment/Pop * [x] Union * [ ] Index * [x] Basic logic * [x] Merge layers * [ ] Merge layers incrementally * [ ] Compact layers * [ ] Figure out a cheaper way to maintain frontier support? * [ ] Distinct * [x] Basic logic * [x] Semi-efficient implementation using per-row pending timestamps * [ ] Figure out a cheaper way to maintain pending timestamps? * [ ] Count, Threshold * [ ] Join * [x] Basic logic * [x] Efficient implementation using merge join * [x] Reduce * [x] Basic logic * [ ] Efficient implementation using better index structure * [ ] Should reduce take a separate input for keys? * [ ] ReduceAbelian * [ ] Exchange * [x] Graph * [x] Validation * [ ] Progress tracking * [x] Incremental frontier maintenance * [ ] Finer-grained change tracking to avoid empty updates * [ ] Use a sorted data-structure for `unprocessed_frontier_updates` * [ ] Scheduling * [x] Schedule work in a correct order * [ ] Figure out how to schedule work for best throughput/latency * [ ] Ensure that the runtime of `doWork` is roughly bounded * [ ] Enforce a maximum batch size and suspend operations that would produce more than one batch * [x] Single-threaded cooperative worker * [ ] Multi-threaded workers * [ ] Expose as state machine for easy simulation * [x] Memory management * [ ] Testing * [ ] Unit test * [x] Timestamp ordering / lub * [x] Batch builder * [x] Frontier move / order * [x] Supported frontier update * [x] Batch / index queries * [x] Index build * [ ] ... * [ ] Port property testing / fuzzing framework from imp to replace hand-written unit tests * [ ] Unit test that known failure modes for graphs don't validate * [ ] Test that random graphs either fail to validate or have no paths where `orderPointstamps(start,end) != .lt` * [ ] Test that random progress tracking updates never cause frontiers to go backwards * [ ] Test that random reorderings of updates to progress tracker have same final result * [ ] Test that random reorderings of inputs to dataflows have same final result * [ ] Add debug-mode validation to progress tracker, shard * [ ] Integration test against problems with known results (eg TPC) * [x] Enable double free, use after free and memory leak detection for all tests * [ ] Figure out how to test bindings * [ ] Wasm * [ ] Node * [ ] Bindings * [ ] Wasm * [x] Basic sketch * [ ] Better runtime type checking * [ ] Memory management * [x] Panic handler * [ ] Packaging * [ ] Node * [x] Basic sketch * [ ] Better runtime type checking (type tags?) * [ ] Memory management * [ ] Panic handler * [ ] Packaging * [ ] Sugar * [ ] Zig * [x] Basic sketch * [ ] Automatically add Push/Pop nodes as needed * [ ] Add index wrappers for indexes behind Push/Pop * [ ] Memory management * [ ] Wasm * [ ] Node * [ ] Debuggers / visualization * [ ] Documentation / book * [x] First pass at high-level explanation ================================================ FILE: bindings/js_common.zig ================================================ pub const std = @import("std"); pub const dida = @import("../lib/dida.zig"); /// root.abi defines functions for interacting with javascript values const abi = @import("root").abi; // --- global allocator --- var gpa = std.heap.GeneralPurposeAllocator(.{ .safety = true, .never_unmap = true, }){}; pub const allocator = gpa.allocator(); // --- constructors for External types need to use global allocator --- pub fn GraphBuilder_init() !dida.core.GraphBuilder { return dida.core.GraphBuilder.init(allocator); } pub fn Graph_init(node_specs: []const dida.core.NodeSpec, node_immediate_subgraphs: []const dida.core.Subgraph, subgraph_parents: []const dida.core.Subgraph) !dida.core.Graph { return dida.core.Graph.init(allocator, node_specs, node_immediate_subgraphs, subgraph_parents); } pub fn Shard_init(graph: *const dida.core.Graph) !dida.core.Shard { return dida.core.Shard.init(allocator, graph); } // --- serde --- pub const exported_functions = .{ .{ "GraphBuilder_init", GraphBuilder_init }, .{ "GraphBuilder_addSubgraph", dida.core.GraphBuilder.addSubgraph }, .{ "GraphBuilder_addNode", dida.core.GraphBuilder.addNode }, .{ "GraphBuilder_connectLoop", dida.core.GraphBuilder.connectLoop }, .{ "GraphBuilder_finishAndReset", dida.core.GraphBuilder.finishAndReset }, .{ "Graph_init", Graph_init }, .{ "Shard_init", Shard_init }, .{ "Shard_pushInput", dida.core.Shard.pushInput }, .{ "Shard_flushInput", dida.core.Shard.flushInput }, .{ "Shard_advanceInput", dida.core.Shard.advanceInput }, .{ "Shard_hasWork", dida.core.Shard.hasWork }, .{ "Shard_doWork", dida.core.Shard.doWork }, .{ "Shard_popOutput", dida.core.Shard.popOutput }, }; pub const types_with_js_constructors = .{ dida.core.GraphBuilder, dida.core.Graph, dida.core.Shard, dida.core.Change, dida.core.ChangeBatch, dida.core.Subgraph, dida.core.Node, dida.core.NodeSpec, dida.core.NodeSpec.MapSpec, dida.core.NodeSpec.JoinSpec, dida.core.NodeSpec.TimestampPushSpec, dida.core.NodeSpec.TimestampPopSpec, dida.core.NodeSpec.TimestampIncrementSpec, dida.core.NodeSpec.IndexSpec, dida.core.NodeSpec.UnionSpec, dida.core.NodeSpec.DistinctSpec, dida.core.NodeSpec.ReduceSpec, dida.core.NodeSpec.OutputSpec, }; pub fn hasJsConstructor(comptime T: type) bool { inline for (types_with_js_constructors) |T2| { if (T == T2) return true; } return false; } /// How a given type will be (de)serialized across the abi boundary pub const SerdeStrategy = enum { /// Heap-allocate on zig side and give a pointer to js side External, /// Convert between zig and js values at the callsite Value, }; pub fn serdeStrategy(comptime T: type) SerdeStrategy { return switch (T) { dida.core.GraphBuilder, *dida.core.GraphBuilder, dida.core.Graph, *dida.core.Graph, *const dida.core.Graph, dida.core.Shard, *dida.core.Shard, *const dida.core.Shard, => .External, void, bool, usize, isize, []const usize, dida.core.Timestamp, dida.core.Frontier, f64, []const u8, dida.core.Value, std.meta.Tag(dida.core.Value), []const dida.core.Value, dida.core.Row, dida.core.Change, []dida.core.Change, dida.core.ChangeBatch, ?dida.core.ChangeBatch, dida.core.Subgraph, []const dida.core.Subgraph, dida.core.Node, ?dida.core.Node, [2]dida.core.Node, dida.core.NodeSpec, []const dida.core.NodeSpec, std.meta.Tag(dida.core.NodeSpec), dida.core.NodeSpec.MapSpec, *dida.core.NodeSpec.MapSpec.Mapper, dida.core.NodeSpec.JoinSpec, dida.core.NodeSpec.TimestampPushSpec, dida.core.NodeSpec.TimestampPopSpec, dida.core.NodeSpec.TimestampIncrementSpec, dida.core.NodeSpec.IndexSpec, dida.core.NodeSpec.UnionSpec, dida.core.NodeSpec.DistinctSpec, dida.core.NodeSpec.ReduceSpec, *dida.core.NodeSpec.ReduceSpec.Reducer, dida.core.NodeSpec.OutputSpec, => .Value, else => dida.util.compileError("No SerdeStrategy for {}", .{T}), }; } /// Take a zig function and return a wrapped version that handles serde pub fn handleSerdeForFunction(comptime zig_fn: anytype) fn (abi.Env, []const abi.Value) abi.Value { return struct { fn wrappedMethod(env: abi.Env, js_args: []const abi.Value) abi.Value { var zig_args: std.meta.ArgsTuple(@TypeOf(zig_fn)) = undefined; comptime var i: usize = 0; inline while (i < zig_args.len) : (i += 1) { const js_arg = js_args[i]; const zig_arg_type = @TypeOf(zig_args[i]); zig_args[i] = switch (comptime serdeStrategy(zig_arg_type)) { .External => deserializeExternal(env, js_arg, zig_arg_type), .Value => deserializeValue(env, js_arg, zig_arg_type), }; } const result_or_err = @call(.{}, zig_fn, zig_args); const result = if (@typeInfo(@TypeOf(result_or_err)) == .ErrorUnion) result_or_err catch |err| dida.util.panic("{}", .{err}) else result_or_err; switch (comptime serdeStrategy(@TypeOf(result))) { .External => { const result_ptr = allocator.create(@TypeOf(result)) catch |err| dida.util.panic("{}", .{err}); result_ptr.* = result; return serializeExternal(env, result_ptr); }, .Value => { return serializeValue(env, result); }, } } }.wrappedMethod; } // --- private serde stuff --- const usize_bits = @bitSizeOf(usize); comptime { dida.util.comptimeAssert( usize_bits == 32 or usize_bits == 64, "Can only handle 32 bit or 64 bit architectures, not {}", .{usize_bits}, ); } fn serializeExternal(env: abi.Env, value: anytype) abi.Value { const info = @typeInfo(@TypeOf(value)); dida.util.comptimeAssert( comptime (serdeStrategy(@TypeOf(value)) == .External), "Used serializeExternal on a type that is expected to require serializeValue: {}", .{@TypeOf(value)}, ); dida.util.comptimeAssert( info == .Pointer and info.Pointer.size == .One, "serializeExternal should be called with *T, got {}", .{@TypeOf(value)}, ); dida.util.comptimeAssert( comptime hasJsConstructor(info.Pointer.child), "Tried to create an external for a type that doesn't have a matching js constructor: {}", .{@TypeOf(value)}, ); const external = abi.createExternal(env, @ptrCast(*anyopaque, value)); const result = abi.createObject(env); abi.setProperty(env, result, abi.createString(env, "external"), external); return result; } fn deserializeExternal(env: abi.Env, value: abi.Value, comptime ReturnType: type) ReturnType { dida.util.comptimeAssert( comptime (serdeStrategy(ReturnType) == .External), "Used deserializeExternal on a type that is expected to require deserializeValue: {}", .{ReturnType}, ); const info = @typeInfo(ReturnType); dida.util.comptimeAssert( info == .Pointer and info.Pointer.size == .One, "deserializeExternal should be called with *T, got {}", .{ReturnType}, ); const external = abi.getProperty(env, value, abi.createString(env, "external")); const ptr = abi.getExternal(env, external); return @ptrCast(ReturnType, @alignCast(@alignOf(info.Pointer.child), ptr)); } fn serializeValue(env: abi.Env, value: anytype) abi.Value { dida.util.comptimeAssert( comptime (serdeStrategy(@TypeOf(value)) == .Value), "Used serializeValue on a type that is expected to require serializeExternal: {}", .{@TypeOf(value)}, ); if (@TypeOf(value) == []const u8) { return abi.createString(env, value); } if (@TypeOf(value) == dida.core.Value) { return switch (value) { .String => |string| serializeValue(env, string), .Number => |number| serializeValue(env, number), }; } if (@TypeOf(value) == dida.core.Row) { return serializeValue(env, value.values); } if (@TypeOf(value) == dida.core.Timestamp) { return serializeValue(env, value.coords); } if (@TypeOf(value) == dida.core.Frontier) { const len = value.timestamps.count(); const js_array = abi.createArray(env, len); var iter = value.timestamps.iterator(); var i: usize = 0; while (iter.next()) |entry| { const js_timestamp = serializeValue(env, entry.key_ptr.*); abi.setElement(env, js_array, @intCast(u32, i), js_timestamp); i += 1; } return js_array; } const info = @typeInfo(@TypeOf(value)); switch (info) { .Bool => { return abi.createBoolean(env, value); }, .Int => { return switch (@TypeOf(value)) { usize => if (usize_bits == 64) abi.createI64(env, @intCast(i64, value)) else abi.createU32(env, value), isize => if (usize_bits == 64) abi.createI64(env, value) else abi.createI32(env, value), else => dida.util.compileError("Don't know how to create js value for {}", .{@TypeOf(value)}), }; }, .Float => { const abi_fn = switch (@TypeOf(value)) { f64 => abi.createF64, else => dida.util.compileError("Don't know how to create js value for {}", .{@TypeOf(value)}), }; return @call(.{}, abi_fn, .{ env, value }); }, .Struct => |struct_info| { dida.util.comptimeAssert( comptime hasJsConstructor(@TypeOf(value)), "Tried to create a value for a struct type that doesn't have a matching js constructor: {}", .{@TypeOf(value)}, ); const result = abi.createObject(env); inline for (struct_info.fields) |field_info| { const field_value = serializeValue(env, @field(value, field_info.name)); abi.setProperty(env, result, abi.createString(env, field_info.name), field_value); } return result; }, .Union => |union_info| { if (union_info.tag_type) |tag_type| { dida.util.comptimeAssert( comptime hasJsConstructor(@TypeOf(value)), "Tried to create a value for a union type that doesn't have a matching js constructor: {}", .{@TypeOf(value)}, ); const result = abi.createObject(env); const tag = std.meta.activeTag(value); abi.setProperty(env, result, abi.createString(env, "tag"), abi.createString(env, @tagName(tag))); inline for (@typeInfo(tag_type).Enum.fields) |enum_field_info| { if (@enumToInt(tag) == enum_field_info.value) { const payload = @field(value, enum_field_info.name); const js_payload = serializeValue(env, payload); abi.setProperty(env, result, abi.createString(env, "payload"), js_payload); return result; } } unreachable; } else { dida.util.compileError("Can't create value for untagged union type {}", .{@TypeOf(value)}); } }, .Enum => |enum_info| { comptime var max_len: usize = 0; inline for (enum_info.fields) |field_info| { comptime { max_len = dida.util.max(max_len, field_info.name.len); } } // max_len+1 to make space for null byte var buffer: [max_len + 1]u8 = undefined; const tag_name = abi.getStringInto(env, value, &buffer); inline for (enum_info.fields) |field_info| { if (std.mem.eql(u8, tag_name, field_info.name)) { return @intToEnum(@TypeOf(value), field_info.value); } } dida.util.panic("Type {s} does not contain a tag named \"{s}\"", .{ @TypeOf(value), tag_name }); }, .Pointer => |pointer_info| { switch (pointer_info.size) { .Slice => { const js_array = abi.createArray(env, value.len); for (value) |elem, i| { const js_elem = serializeValue(env, elem); abi.setElement(env, js_array, @intCast(u32, i), js_elem); } return js_array; }, else => dida.util.compileError("Don't know how to create value of type {}", .{@TypeOf(value)}), } }, .Optional => { return if (value) |payload| serializeValue(env, payload) else abi.createUndefined(env); }, .Void => { return abi.createUndefined(env); }, else => dida.util.compileError("Don't know how to create value of type {}", .{@TypeOf(value)}), } } const JsMapper = struct { // TODO is it safe to hold on to this? env: abi.Env, js_fn_ref: abi.RefCounted, mapper: dida.core.NodeSpec.MapSpec.Mapper, fn map(self: *dida.core.NodeSpec.MapSpec.Mapper, row: dida.core.Row) error{OutOfMemory}!dida.core.Row { const parent = @fieldParentPtr(JsMapper, "mapper", self); const js_fn = abi.getRefCounted(parent.env, parent.js_fn_ref); const js_args = [_]abi.Value{ serializeValue(parent.env, row), }; const js_output = abi.callFunction(parent.env, js_fn, &js_args); const output = deserializeValue(parent.env, js_output, dida.core.Row); return output; } }; const JsReducer = struct { // TODO is it safe to hold on to this? env: abi.Env, js_fn_ref: abi.RefCounted, reducer: dida.core.NodeSpec.ReduceSpec.Reducer, fn reduce(self: *dida.core.NodeSpec.ReduceSpec.Reducer, reduced_value: dida.core.Value, row: dida.core.Row, count: usize) error{OutOfMemory}!dida.core.Value { const parent = @fieldParentPtr(JsReducer, "reducer", self); const js_fn = abi.getRefCounted(parent.env, parent.js_fn_ref); const js_args = [_]abi.Value{ serializeValue(parent.env, reduced_value), serializeValue(parent.env, row), serializeValue(parent.env, count), }; const js_output = abi.callFunction(parent.env, js_fn, &js_args); const output = deserializeValue(parent.env, js_output, dida.core.Value); return output; } }; fn deserializeValue(env: abi.Env, value: abi.Value, comptime ReturnType: type) ReturnType { dida.util.comptimeAssert( comptime (serdeStrategy(ReturnType) == .Value), "Used deserializeValue on a type that is expected to require deserializeExternal: {}", .{ReturnType}, ); if (ReturnType == []const u8) { return abi.getString(env, value) catch |err| dida.util.panic("{}", .{err}); } if (ReturnType == dida.core.Value) { const js_type = abi.jsTypeOf(env, value); return switch (js_type) { .String => dida.core.Value{ .String = deserializeValue(env, value, []const u8) }, .Number => dida.core.Value{ .Number = deserializeValue(env, value, f64) }, else => dida.util.panic("Don't know how to get a dida.core.Value from {}", .{js_type}), }; } if (ReturnType == dida.core.Row) { return dida.core.Row{ .values = deserializeValue(env, value, []const dida.core.Value) }; } if (ReturnType == dida.core.Timestamp) { return dida.core.Timestamp{ .coords = deserializeValue(env, value, []const usize) }; } if (ReturnType == *dida.core.NodeSpec.MapSpec.Mapper) { // TODO we're just leaking this for now var js_mapper = allocator.create(JsMapper) catch |err| dida.util.panic("{}", .{err}); js_mapper.* = JsMapper{ .env = env, .js_fn_ref = abi.createRefCounted(env, value, 1), .mapper = .{ .map_fn = JsMapper.map, }, }; return &js_mapper.mapper; } if (ReturnType == *dida.core.NodeSpec.ReduceSpec.Reducer) { // TODO we're just leaking this for now var js_reducer = allocator.create(JsReducer) catch |err| dida.util.panic("{}", .{err}); js_reducer.* = JsReducer{ .env = env, .js_fn_ref = abi.createRefCounted(env, value, 1), .reducer = .{ .reduce_fn = JsReducer.reduce, }, }; return &js_reducer.reducer; } const info = @typeInfo(ReturnType); switch (info) { .Int => { return switch (ReturnType) { usize => if (usize_bits == 64) @intCast(usize, abi.getI64(env, value)) else abi.getU32(env, value), isize => if (usize_bits == 64) abi.getI64(env, value) else abi.getI32(env, value), else => dida.util.compileError("Don't know how to create js value for {}", .{ReturnType}), }; }, .Float => { const abi_fn = switch (ReturnType) { f64 => abi.getF64, else => dida.util.compileError("Don't know how to create js value for {}", .{ReturnType}), }; return @call(.{}, abi_fn, .{ env, value }); }, .Struct => |struct_info| { var result: ReturnType = undefined; inline for (struct_info.fields) |field_info| { const field_value = abi.getProperty(env, value, abi.createString(env, field_info.name)); @field(result, field_info.name) = deserializeValue(env, field_value, field_info.field_type); } return result; }, .Union => |union_info| { if (union_info.tag_type) |tag_type| { const tag = deserializeValue(env, abi.getProperty(env, value, abi.createString(env, "tag")), tag_type); inline for (@typeInfo(tag_type).Enum.fields) |enum_field_info, i| { if (@enumToInt(tag) == enum_field_info.value) { const union_field_info = union_info.fields[i]; const payload = deserializeValue(env, abi.getProperty(env, value, abi.createString(env, "payload")), union_field_info.field_type); return @unionInit(ReturnType, union_field_info.name, payload); } } unreachable; } else { dida.util.compileError("Can't get value for untagged union type {}", .{ReturnType}); } }, .Enum => |enum_info| { comptime var max_len: usize = 0; inline for (enum_info.fields) |field_info| { comptime { max_len = dida.util.max(max_len, field_info.name.len); } } // max_len+1 to make space for null byte :( var buffer: [max_len + 1]u8 = undefined; const tag_name = abi.getStringInto(env, value, &buffer); inline for (enum_info.fields) |field_info| { if (std.mem.eql(u8, tag_name, field_info.name)) { return @intToEnum(ReturnType, field_info.value); } } dida.util.panic("Type {s} does not contain a tag named \"{s}\"", .{ ReturnType, tag_name }); }, .Array => |array_info| { const js_len = abi.getArrayLength(env, value); dida.util.assert(js_len == array_info.len, "Expected array of length {}, got length {}", .{ array_info.len, js_len }); var result: ReturnType = undefined; for (result) |*elem, i| { const js_elem = abi.getElement(env, value, @intCast(u32, i)); elem.* = deserializeValue(env, js_elem, array_info.child); } return result; }, .Pointer => |pointer_info| { switch (pointer_info.size) { .Slice => { const len = abi.getArrayLength(env, value); const result = allocator.alloc(pointer_info.child, len) catch |err| dida.util.panic("{}", .{err}); for (result) |*elem, i| { const js_elem = abi.getElement(env, value, @intCast(u32, i)); elem.* = deserializeValue(env, js_elem, pointer_info.child); } return result; }, else => dida.util.compileError("Don't know how to get value for type {}", .{ReturnType}), } }, .Optional => |optional_info| { const js_type = abi.jsTypeOf(env, value); return switch (js_type) { .Undefined, .Null => null, else => deserializeValue(env, value, optional_info.child), }; }, .Void => { return {}; }, else => dida.util.compileError("Don't know how to get value for type {}", .{ReturnType}), } } // --- abi types --- pub const JsType = enum(u8) { Undefined = 0, Null = 1, Boolean = 2, Number = 3, String = 4, Object = 5, Function = 6, }; ================================================ FILE: bindings/node/README.md ================================================ To build: ``` nix-shell zig build install -Drelease-safe=true zig build run-codegen ``` To run example: ``` nix-shell node ../../examples/core.js ``` ================================================ FILE: bindings/node/abi.zig ================================================ //! Wrapper aound [NAPI](https://nodejs.org/api/n-api.html). //! Used by ../js_common.zig pub const js_common = @import("../js_common.zig"); pub const dida = @import("../../lib/dida.zig"); // --- node-specific stuff --- const c = @cImport({ @cInclude("node_api.h"); }); fn napiCall(comptime napi_fn: anytype, args: anytype, comptime ReturnType: type) ReturnType { if (ReturnType != void) { var result: ReturnType = undefined; const status: c.napi_status = @call(.{}, napi_fn, args ++ .{&result}); dida.util.assert(status == c.napi_ok, "Call returned status {}", .{status}); return result; } else { const status: c.napi_status = @call(.{}, napi_fn, args); dida.util.assert(status == c.napi_ok, "Call returned status {}", .{status}); } } pub fn createFunction( env: Env, name: [:0]const u8, comptime num_args: usize, comptime zig_fn: fn (env: Env, []const Value) Value, ) Value { const callback = struct { fn callback(callback_env: Env, info: c.napi_callback_info) callconv(.C) Value { var actual_num_args: usize = num_args; var args: [num_args]Value = undefined; var this: Value = undefined; napiCall(c.napi_get_cb_info, .{ callback_env, info, &actual_num_args, @ptrCast([*c]Value, @as([]Value, &args)), &this, null, }, void); dida.util.assert( actual_num_args == num_args, "Expected {} args, got {} args", .{ num_args, actual_num_args, }, ); return @call(.{}, zig_fn, .{ callback_env, &args }); } }.callback; return napiCall(c.napi_create_function, .{ env, name, name.len, callback, null }, Value); } // --- interface required by js_common --- pub const Env = c.napi_env; pub const Value = c.napi_value; pub const RefCounted = c.napi_ref; pub fn jsTypeOf(env: Env, value: Value) js_common.JsType { const napi_type = napiCall(c.napi_typeof, .{ env, value }, c.napi_valuetype); return switch (napi_type) { c.napi_undefined => .Undefined, c.napi_null => .Null, c.napi_boolean => .Boolean, c.napi_number => .Number, c.napi_string => .String, c.napi_object => .Object, c.napi_function => .Function, else => dida.util.panic("Don't know how to handle this napi_valuetype: {}", .{napi_type}), }; } pub fn createUndefined(env: Env) Value { return napiCall(c.napi_get_undefined, .{env}, Value); } pub fn createBoolean(env: Env, value: bool) Value { // Not a typo - napi_get_boolean retrieves a global singleton return napiCall(c.napi_get_boolean, .{ env, value }, Value); } pub fn createU32(env: Env, int: u32) Value { return napiCall(c.napi_create_uint32, .{ env, int }, Value); } pub fn createI32(env: Env, int: i32) Value { return napiCall(c.napi_create_int32, .{ env, int }, Value); } pub fn createI64(env: Env, int: i64) Value { return napiCall(c.napi_create_int64, .{ env, int }, Value); } pub fn createF64(env: Env, float: f64) Value { return napiCall(c.napi_create_double, .{ env, float }, Value); } pub fn createString(env: Env, utf8_string: []const u8) Value { return napiCall(c.napi_create_string_utf8, .{ env, @ptrCast([*c]const u8, utf8_string), utf8_string.len }, Value); } pub fn createObject(env: Env) Value { return napiCall(c.napi_create_object, .{env}, Value); } pub fn createArray(env: Env, len: usize) Value { return napiCall(c.napi_create_array_with_length, .{ env, @intCast(u32, len) }, Value); } pub fn createExternal(env: Env, pointer: *anyopaque) Value { return napiCall(c.napi_create_external, .{ env, pointer, null, null }, Value); } pub fn createRefCounted(env: Env, value: Value, refcount: u32) RefCounted { return napiCall(c.napi_create_reference, .{ env, value, refcount }, RefCounted); } pub fn getI32(env: Env, value: Value) i32 { return napiCall(c.napi_get_value_int32, .{ env, value }, i32); } pub fn getI64(env: Env, value: Value) i64 { return napiCall(c.napi_get_value_int64, .{ env, value }, i64); } pub fn getF64(env: Env, value: Value) f64 { return napiCall(c.napi_get_value_double, .{ env, value }, f64); } pub fn getString(env: Env, value: Value) ![]const u8 { const len = napiCall(c.napi_get_value_string_utf8, .{ env, value, null, 0 }, usize); // len+1 for null byte var buffer = try js_common.allocator.alloc(u8, len + 1); return getStringInto(env, value, buffer); } pub fn getStringInto(env: Env, value: Value, buffer: []u8) []const u8 { const len = napiCall(c.napi_get_value_string_utf8, .{ env, value, @ptrCast([*c]u8, buffer), buffer.len }, usize); return buffer[0..len]; } pub fn getExternal(env: Env, value: Value) *anyopaque { return napiCall(c.napi_get_value_external, .{ env, value }, ?*anyopaque).?; } pub fn getRefCounted(env: Env, ref: RefCounted) Value { return napiCall(c.napi_get_reference_value, .{ env, ref }, Value); } pub fn getArrayLength(env: Env, array: Value) u32 { return napiCall(c.napi_get_array_length, .{ env, array }, u32); } pub fn getElement(env: Env, array: Value, index: u32) Value { return napiCall(c.napi_get_element, .{ env, array, index }, Value); } pub fn setElement(env: Env, array: Value, index: u32, value: Value) void { napiCall(c.napi_set_element, .{ env, array, index, value }, void); } pub fn setProperty(env: Env, object: Value, name: Value, value: Value) void { napiCall(c.napi_set_property, .{ env, object, name, value }, void); } pub fn getProperty(env: Env, object: Value, name: Value) Value { return napiCall(c.napi_get_property, .{ env, object, name }, Value); } pub fn callFunction(env: Env, function: Value, args: []const Value) Value { dida.util.assert(!napiCall(c.napi_is_exception_pending, .{env}, bool), "Shouldn't be any exceptions before function call", .{}); const napi_undefined = createUndefined(env); const output = napiCall(c.napi_call_function, .{ env, napi_undefined, function, args.len, @ptrCast([*c]const Value, args) }, Value); dida.util.assert(!napiCall(c.napi_is_exception_pending, .{env}, bool), "Shouldn't be any exceptions after function call", .{}); return output; } ================================================ FILE: bindings/node/build.zig ================================================ const builtin = @import("builtin"); const std = @import("std"); const allocator = std.heap.page_allocator; pub fn build(b: *std.build.Builder) !void { const mode = b.standardReleaseOptions(); const runtime = b.addSharedLibrary("dida", "./runtime.zig", .unversioned); runtime.setBuildMode(mode); runtime.setMainPkgPath("../../"); runtime.linkLibC(); var buffer = std.ArrayList(u8).init(allocator); try std.fmt.format(buffer.writer(), "{s}/include/node", .{std.os.getenv("NIX_NODEJS").?}); runtime.addIncludeDir(buffer.items); runtime.install(); b.installLibFile("zig-out/lib/libdida.so", "dida.node"); const runtime_step = b.step("runtime", "Build runtime (zig-out/lib/dida.o TODO)"); runtime_step.dependOn(&runtime.step); const codegen = b.addExecutable("codegen", "./codegen.zig"); codegen.setMainPkgPath("../../"); const run_codegen_step = b.step("run-codegen", "Run codegen (to generate zig-out/lib/dida.js)"); run_codegen_step.dependOn(&codegen.run().step); } ================================================ FILE: bindings/node/codegen.zig ================================================ const std = @import("std"); const dida = @import("../../lib/dida.zig"); const js_common = @import("../js_common.zig"); pub fn main() !void { const file = try std.fs.cwd().createFile("zig-out/lib/dida.js", .{ .read = false, .truncate = true }); defer file.close(); const writer = file.writer(); try writer.writeAll("const dida = require('./dida.node');\n\n"); inline for (js_common.types_with_js_constructors) |T| { try generateConstructor(writer, T); } try writer.writeAll("\n\n"); inline for (js_common.types_with_js_constructors) |T| { try std.fmt.format(writer, "exports.{s} = {s};\n", .{ T, T }); } } fn generateConstructor(writer: anytype, comptime Type: type) !void { const info = @typeInfo(Type); switch (comptime js_common.serdeStrategy(Type)) { .External => { inline for (info.Struct.decls) |decl_info| { if (decl_info.is_pub and decl_info.data == .Fn) { const fn_decl_info = decl_info.data.Fn; const fn_info = @typeInfo(fn_decl_info.fn_type).Fn; // First arg is allocator or self const args = fn_info.args[1..]; // TODO fn_decl_info.arg_names.len is empty // See https://github.com/ziglang/zig/issues/8259 var arg_names: [args.len][]const u8 = undefined; for (arg_names) |*arg_name, i| { arg_name.* = try dida.util.format(js_common.allocator, "arg{}", .{i}); } // NOTE this relies on `init` being the first decl if (comptime std.mem.eql(u8, decl_info.name, "init")) { try std.fmt.format( writer, \\function {s}({s}) {{ \\ this.external = dida.{s}_init({s}).external; \\}} \\ , .{ Type, std.mem.join(js_common.allocator, ", ", &arg_names), Type, std.mem.join(js_common.allocator, ", ", &arg_names), }, ); } else { try std.fmt.format( writer, \\{s}.prototype.{s} = function {s}({s}) {{ \\ const result = dida.{s}_{s}(this, {s}); \\ return result; \\}} \\ , .{ Type, decl_info.name, decl_info.name, std.mem.join(js_common.allocator, ", ", &arg_names), Type, decl_info.name, std.mem.join(js_common.allocator, ", ", &arg_names), }, ); } } } try writer.writeAll("\n"); }, else => { switch (info) { .Struct => |struct_info| { try std.fmt.format(writer, "function {s}(", .{Type}); inline for (struct_info.fields) |field_info| { try std.fmt.format(writer, "{s}, ", .{field_info.name}); } try writer.writeAll(") {\n"); inline for (struct_info.fields) |field_info| { try std.fmt.format(writer, " this.{s} = {s};\n", .{ field_info.name, field_info.name }); } try writer.writeAll("}\n\n"); }, .Union => |union_info| { if (union_info.tag_type) |_| { // TODO name payload args instead of using `arguments[i]` try std.fmt.format(writer, "const {s} = {{\n", .{Type}); inline for (union_info.fields) |field_info| { const payload = switch (field_info.field_type) { []const u8, f64 => "arguments[0]", void => "undefined", else => payload: { const num_args = @typeInfo(field_info.field_type).Struct.fields.len; var args: [num_args][]const u8 = undefined; for (args) |*arg, arg_ix| arg.* = try dida.util.format(js_common.allocator, "arguments[{}]", .{arg_ix}); break :payload try dida.util.format(js_common.allocator, "new {s}({s})", .{ field_info.field_type, std.mem.join(js_common.allocator, ", ", &args), }); }, }; try std.fmt.format( writer, \\ {s}: function () {{ \\ this.tag = "{s}"; \\ this.payload = {s}; \\ }}, \\ , .{ field_info.name, field_info.name, payload }, ); } try writer.writeAll("};\n\n"); } else { dida.util.compileError("Don't know how to make constructor for non-tagged union type {}", .{Type}); } }, else => dida.util.compileError("Don't know how to make constructor for type {}", .{Type}), } }, } } ================================================ FILE: bindings/node/package.json ================================================ { "name": "dida", "version": "0.0.0", "description": "", "main": "test.js", "author": "Jamie Brandon ", "license": "UNLICENSED" } ================================================ FILE: bindings/node/runtime.zig ================================================ pub const js_common = @import("../js_common.zig"); pub const abi = @import("./abi.zig"); export fn napi_register_module_v1(env: abi.Env, exports: abi.Value) abi.Value { inline for (js_common.exported_functions) |exported_function| { const num_args = @typeInfo(@TypeOf(exported_function[1])).Fn.args.len; abi.setProperty( env, exports, abi.createString(env, exported_function[0]), abi.createFunction(env, exported_function[0], num_args, comptime js_common.handleSerdeForFunction(exported_function[1])), ); } return exports; } ================================================ FILE: bindings/node/shell.nix ================================================ with (import ../../dependencies.nix); pkgs.mkShell rec { buildInputs = [ pkgs.nodejs zig ]; NIX_NODEJS=pkgs.nodejs; } ================================================ FILE: bindings/wasm/README.md ================================================ To build: ``` nix-shell zig build install -Drelease-safe=true zig build run-codegen ``` To run example: ``` nix-shell -p python3 cd ../../ python3 -m http.server & $BROWSER http://localhost:8000/examples/core.html ``` ================================================ FILE: bindings/wasm/abi.js ================================================ async function Abi(dida_url) { // id >= 0 for values on stack // id < 0 for values in ref_counted var stack = []; var ref_counted = {}; var next_ref_counted_id = -1; function stackPush(value) { stack.push(value); return stack.length - 1; } function stackRead(ix) { return stack[ix]; } function stackGetLength() { return stack.length; } function stackReset(length) { stack.length = length; } function RefCounted(value, refcount) { this.value = value; this.refcount = refcount; } function createRefCounted(value_id, refcount) { const ref_counted_id = next_ref_counted_id; next_ref_counted_id -= 1; ref_counted[ref_counted_id] = new RefCounted(stackRead(value_id), refcount); return ref_counted_id } function getRefCounted(ref_counted_id) { return stackPush(ref_counted[ref_counted_id].value); } // Return values must be kept in sync with js_common.JsType function jsTypeOf(value_ix) { const value = stackRead(value_ix); const typ = typeof(value); if (typ == 'undefined') return 0; // typeof(null) == 'object' :| if (value == null) return 1; if (typ == 'boolean') return 2; if (typ == 'number') return 3; if (typ == 'string') return 4; if (typ == 'object') return 5; if (typ == 'function') return 6; throw (typ + ' is not a type that the abi understands'); } function createUndefined() { return stackPush(undefined); } function createString(address, length) { let bytes = new Uint8Array(wasm.instance.exports.memory.buffer); let string = new TextDecoder().decode(bytes.slice(address, address + length)); return stackPush(string); } function createObject() { return stackPush({}); } function createArray(len) { return stackPush(new Array(len)); } function getStringLength(string_id) { return stackRead(string_id).length; } function getStringInto(string_id, address, max_len) { const string = stackRead(string_id); const encoded = new TextEncoder().encode(string); const bytes = new Uint8Array(wasm.instance.exports.memory.buffer); const len = Math.min(string.length, max_len); bytes.set(encoded.subarray(0, len), address); return len; } function getArrayLength(array_id) { return stackRead(array_id).length; } function getElement(array_id, ix) { return stackPush(stackRead(array_id)[ix]); } function setElement(array_id, ix, value_id) { stackRead(array_id)[ix] = stackRead(value_id); } function getProperty(object_id, name_id) { return stackPush(stackRead(object_id)[stackRead(name_id)]); } function setProperty(object_id, name_id, value_id) { stackRead(object_id)[stackRead(name_id)] = stackRead(value_id); } function callFunction(function_id, args_id) { return stackPush(stackRead(function_id).apply(null, stackRead(args_id))); } function consoleLog(message_id) { console.log(stackRead(message_id)); } function consoleError(message_id) { console.error(stackRead(message_id)); } function throwException(value_ix) { throw stackRead(value_ix); } const wasm = await WebAssembly.instantiateStreaming( fetch(dida_url), { env: { jsTypeOf: jsTypeOf, createUndefined: createUndefined, createBool: stackPush, createU32: stackPush, createI32: stackPush, createI64: stackPush, createF64: stackPush, createString: createString, createObject: createObject, createArray: createArray, createRefCounted: createRefCounted, getU32: stackRead, getI32: stackRead, getI64: stackRead, getF64: stackRead, getStringLength: getStringLength, getStringInto: getStringInto, getRefCounted: getRefCounted, getArrayLength: getArrayLength, getElement: getElement, setElement: setElement, getProperty: getProperty, setProperty: setProperty, callFunction: callFunction, consoleLog: consoleLog, consoleError: consoleError, throwException: throwException, } } ); return { wasm: wasm, stackGetLength: stackGetLength, stackReset: stackReset, stackRead: stackRead, stackPush: stackPush, }; } ================================================ FILE: bindings/wasm/abi.zig ================================================ //! Wasm abi mimicking NAPI //! Used by ../js_common.zig //! TODO much of this could be automatically generated with https://github.com/ziglang/zig/issues/6709 const dida = @import("../../lib/dida.zig"); const js_common = @import("../js_common.zig"); // --- wasm-specific stuff --- pub const js = struct { pub extern fn jsTypeOf(Value) u32; pub extern fn createUndefined() Value; pub extern fn createBool(bool) Value; pub extern fn createU32(u32) Value; pub extern fn createI32(i32) Value; pub extern fn createI64(i64) Value; pub extern fn createF64(f64) Value; pub extern fn createString(u32, u32) Value; pub extern fn createObject() Value; pub extern fn createArray(u32) Value; pub extern fn createRefCounted(Value, u32) RefCounted; pub extern fn getRefCounted(RefCounted) Value; pub extern fn getU32(Value) u32; pub extern fn getI32(Value) i32; pub extern fn getI64(Value) i64; pub extern fn getF64(Value) f64; pub extern fn getStringLength(Value) u32; pub extern fn getStringInto(Value, u32, u32) u32; pub extern fn getArrayLength(Value) u32; pub extern fn getElement(Value, u32) Value; pub extern fn setElement(Value, u32, Value) void; pub extern fn getProperty(Value, Value) Value; pub extern fn setProperty(Value, Value, Value) void; pub extern fn callFunction(Value, Value) Value; pub extern fn consoleLog(Value) void; pub extern fn consoleError(Value) void; pub extern fn throwException(Value) noreturn; }; fn HandleAbiForFunction(comptime num_args: usize) type { return switch (num_args) { 0 => fn callback() callconv(.C) Value, 1 => fn callback(Value) callconv(.C) Value, 2 => fn callback(Value, Value) callconv(.C) Value, 3 => fn callback(Value, Value, Value) callconv(.C) Value, else => dida.util.compileError("Need to add a boilerplate branch for exporting functions with {} args", .{num_args}), }; } pub fn handleAbiForFunction( comptime num_args: usize, comptime zig_fn: fn (env: Env, []const Value) Value, ) HandleAbiForFunction(num_args) { return switch (num_args) { 0 => struct { fn callback() callconv(.C) Value { return @call(.{}, zig_fn, .{ {}, &[_]Value{} }); } }.callback, 1 => struct { fn callback(a1: Value) callconv(.C) Value { return @call(.{}, zig_fn, .{ {}, &[_]Value{a1} }); } }.callback, 2 => struct { fn callback(a1: Value, a2: Value) callconv(.C) Value { return @call(.{}, zig_fn, .{ {}, &[_]Value{ a1, a2 } }); } }.callback, 3 => struct { fn callback(a1: Value, a2: Value, a3: Value) callconv(.C) Value { return @call(.{}, zig_fn, .{ {}, &[_]Value{ a1, a2, a3 } }); } }.callback, else => dida.util.compileError("Need to add a boilerplate branch for exporting functions with {} args", .{num_args}), }; } // --- interface required by js_common --- pub const Env = void; pub const Value = i32; pub const RefCounted = i32; comptime { dida.util.comptimeAssert(@bitSizeOf(*anyopaque) == 32, "Expect wasm to have 32 bit addresses", .{}); } pub fn jsTypeOf(_: Env, value: Value) js_common.JsType { return @intToEnum(js_common.JsType, @intCast(u3, js.jsTypeOf(value))); } pub fn createUndefined(_: Env) Value { return js.createUndefined(); } pub fn createBoolean(_: Env, b: bool) Value { return js.createBool(b); } pub fn createU32(_: Env, int: u32) Value { return js.createU32(int); } pub fn createI32(_: Env, int: i32) Value { return js.createI32(int); } pub fn createI64(_: Env, int: i64) Value { return js.createI64(int); } pub fn createF64(_: Env, int: f64) Value { return js.createF64(int); } pub fn createString(_: Env, string: []const u8) Value { return js.createString(@as(u32, @ptrToInt(@ptrCast([*c]const u8, string))), @as(u32, string.len)); } pub fn createObject(_: Env) Value { return js.createObject(); } pub fn createArray(_: Env, len: usize) Value { return js.createArray(len); } pub fn createRefCounted(_: Env, value: Value, refcount: u32) RefCounted { return js.createRefCounted(value, refcount); } pub fn createExternal(env: Env, pointer: *anyopaque) Value { const address = @as(u32, @ptrToInt(pointer)); return createU32(env, address); } pub fn getU32(_: Env, value: Value) u32 { return js.getU32(value); } pub fn getI32(_: Env, value: Value) i32 { return js.getI32(value); } pub fn getI64(_: Env, value: Value) i64 { return js.getI64(value); } pub fn getF64(_: Env, value: Value) f64 { return js.getF64(value); } pub fn getString(env: Env, value: Value) ![]const u8 { const len = js.getStringLength(value); var buffer = try js_common.allocator.alloc(u8, len); return getStringInto(env, value, buffer); } pub fn getStringInto(_: Env, value: Value, buffer: []u8) []const u8 { const len = js.getStringInto(value, @as(u32, @ptrToInt(@ptrCast([*c]u8, buffer))), buffer.len); return buffer[0..len]; } pub fn getExternal(_: Env, external: Value) *anyopaque { const address = getU32({}, external); return @intToPtr(*anyopaque, @as(usize, address)); } pub fn getRefCounted(_: Env, ref: RefCounted) Value { return js.getRefCounted(ref); } pub fn getArrayLength(_: Env, array: Value) u32 { return js.getArrayLength(array); } pub fn getElement(_: Env, array: Value, index: u32) Value { return js.getElement(array, index); } pub fn setElement(_: Env, array: Value, index: u32, value: Value) void { js.setElement(array, index, value); } pub fn getProperty(_: Env, object: Value, name: Value) Value { return js.getProperty(object, name); } pub fn setProperty(_: Env, object: Value, name: Value, value: Value) void { return js.setProperty(object, name, value); } pub fn callFunction(env: Env, function: Value, args: []const Value) Value { const args_array = createArray(env, args.len); for (args) |arg, i| setElement(env, args_array, @as(u32, i), arg); return js.callFunction(function, args_array); } ================================================ FILE: bindings/wasm/build.zig ================================================ const builtin = @import("builtin"); const std = @import("std"); pub fn build(b: *std.build.Builder) !void { const mode = b.standardReleaseOptions(); const runtime = b.addSharedLibrary("dida", "./runtime.zig", .unversioned); runtime.setBuildMode(mode); runtime.setTarget(.{ .cpu_arch = .wasm32, .os_tag = .freestanding, }); runtime.setMainPkgPath("../../"); runtime.install(); const runtime_step = b.step("runtime", "Build runtime (zig-out/lib/dida.wasm)"); runtime_step.dependOn(&runtime.step); const codegen = b.addExecutable("codegen", "./codegen.zig"); codegen.setMainPkgPath("../../"); const run_codegen_step = b.step("run-codegen", "Run codegen (to generate zig-out/lib/dida.js)"); run_codegen_step.dependOn(&codegen.run().step); } ================================================ FILE: bindings/wasm/codegen.zig ================================================ const std = @import("std"); const dida = @import("../../lib/dida.zig"); const js_common = @import("../js_common.zig"); pub fn main() !void { const file = try std.fs.cwd().createFile("zig-out/lib/dida.js", .{ .read = false, .truncate = true }); defer file.close(); var writer = file.writer(); try writer.writeAll("function Dida(abi) {\n\n"); inline for (js_common.types_with_js_constructors) |T| { try generateConstructor(writer, T); } try writer.writeAll("\n\n"); inline for (js_common.types_with_js_constructors) |T| { try std.fmt.format(writer, "this.{s} = {s};\n", .{ T, T }); } try writer.writeAll("}"); } fn generateConstructor(writer: anytype, comptime Type: type) !void { const info = @typeInfo(Type); switch (comptime js_common.serdeStrategy(Type)) { .External => { inline for (info.Struct.decls) |decl_info| { if (decl_info.is_pub and decl_info.data == .Fn) { const fn_decl_info = decl_info.data.Fn; const fn_info = @typeInfo(fn_decl_info.fn_type).Fn; // First arg is allocator or self const args = fn_info.args[1..]; // TODO fn_decl_info.arg_names.len is empty // See https://github.com/ziglang/zig/issues/8259 var arg_names: [args.len][]const u8 = undefined; for (arg_names) |*arg_name, i| { arg_name.* = try dida.util.format(js_common.allocator, "arg{}", .{i}); } var arg_pushes: [args.len][]const u8 = undefined; for (arg_pushes) |*arg_push, i| { arg_push.* = try dida.util.format(js_common.allocator, "abi.stackPush(arg{})", .{i}); } // NOTE this relies on `init` being the first decl if (comptime std.mem.eql(u8, decl_info.name, "init")) { try std.fmt.format( writer, \\function {s}({s}) {{ \\ const init_stack_len = abi.stackGetLength(); \\ const result_ix = abi.wasm.instance.exports.{s}_init({s}); \\ const result = abi.stackRead(result_ix); \\ abi.stackReset(init_stack_len); \\ this.external = result.external; \\}} \\ , .{ Type, std.mem.join(js_common.allocator, ", ", &arg_names), Type, std.mem.join(js_common.allocator, ", ", &arg_pushes), }, ); } else { try std.fmt.format( writer, \\{s}.prototype.{s} = function {s}({s}) {{ \\ const init_stack_len = abi.stackGetLength(); \\ const result_ix = abi.wasm.instance.exports.{s}_{s}(abi.stackPush(this), {s}); \\ const result = abi.stackRead(result_ix); \\ abi.stackReset(init_stack_len); \\ return result; \\}} \\ , .{ Type, decl_info.name, decl_info.name, std.mem.join(js_common.allocator, ", ", &arg_names), Type, decl_info.name, std.mem.join(js_common.allocator, ", ", &arg_pushes), }, ); } } } try writer.writeAll("\n"); }, else => { switch (info) { .Struct => |struct_info| { try std.fmt.format(writer, "function {s}(", .{Type}); inline for (struct_info.fields) |field_info| { try std.fmt.format(writer, "{s}, ", .{field_info.name}); } try writer.writeAll(") {\n"); inline for (struct_info.fields) |field_info| { try std.fmt.format(writer, " this.{s} = {s};\n", .{ field_info.name, field_info.name }); } try writer.writeAll("}\n\n"); }, .Union => |union_info| { if (union_info.tag_type) |_| { // TODO name payload args instead of using `arguments[i]` try std.fmt.format(writer, "const {s} = {{\n", .{Type}); inline for (union_info.fields) |field_info| { const payload = switch (field_info.field_type) { []const u8, f64 => "arguments[0]", void => "undefined", else => payload: { const num_args = @typeInfo(field_info.field_type).Struct.fields.len; var args: [num_args][]const u8 = undefined; for (args) |*arg, arg_ix| arg.* = try dida.util.format(js_common.allocator, "arguments[{}]", .{arg_ix}); break :payload try dida.util.format(js_common.allocator, "new {s}({s})", .{ field_info.field_type, std.mem.join(js_common.allocator, ", ", &args), }); }, }; try std.fmt.format( writer, \\ {s}: function () {{ \\ this.tag = "{s}"; \\ this.payload = {s}; \\ }}, \\ , .{ field_info.name, field_info.name, payload }, ); } try writer.writeAll("};\n\n"); } else { dida.util.compileError("Don't know how to make constructor for non-tagged union type {}", .{Type}); } }, else => dida.util.compileError("Don't know how to make constructor for type {}", .{Type}), } }, } } ================================================ FILE: bindings/wasm/runtime.zig ================================================ const std = @import("std"); const dida = @import("../../lib/dida.zig"); const js_common = @import("../js_common.zig"); pub const abi = @import("./abi.zig"); // Requires fixing https://github.com/ziglang/zig/issues/8027 //comptime { //inline for (exported_functions) |exported_function| { //const num_args = @typeInfo(@TypeOf(exported_function[1])).Fn.args.len; //@export( //abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(exported_function[1])), //.{ //.name = exported_function[0], //.linkage = .Strong, //}, //); //} //} comptime { { const name = "GraphBuilder_init"; const function = js_common.GraphBuilder_init; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } { const name = "GraphBuilder_addSubgraph"; const function = dida.core.GraphBuilder.addSubgraph; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } { const name = "GraphBuilder_addNode"; const function = dida.core.GraphBuilder.addNode; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } { const name = "GraphBuilder_connectLoop"; const function = dida.core.GraphBuilder.connectLoop; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } { const name = "GraphBuilder_finishAndReset"; const function = dida.core.GraphBuilder.finishAndReset; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } { const name = "Graph_init"; const function = js_common.Graph_init; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } { const name = "Shard_init"; const function = js_common.Shard_init; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } { const name = "Shard_pushInput"; const function = dida.core.Shard.pushInput; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } { const name = "Shard_flushInput"; const function = dida.core.Shard.flushInput; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } { const name = "Shard_advanceInput"; const function = dida.core.Shard.advanceInput; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } { const name = "Shard_hasWork"; const function = dida.core.Shard.hasWork; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } { const name = "Shard_doWork"; const function = dida.core.Shard.doWork; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } { const name = "Shard_popOutput"; const function = dida.core.Shard.popOutput; const num_args = @typeInfo(@TypeOf(function)).Fn.args.len; const exported_function = abi.handleAbiForFunction(num_args, js_common.handleSerdeForFunction(function)); @export( exported_function, .{ .name = name, .linkage = .Strong, }, ); } } pub fn panic(message: []const u8, stack_trace: ?*std.builtin.StackTrace) noreturn { //TODO Something in the call tree of StackTrace.format tries to open stderr, which doesn't compile on wasm //var buf = std.ArrayList(u8).init(allocator); //var writer = buf.writer(); //std.fmt.format(writer, "{s}\n\n{s}", .{ message, stack_trace }) catch |_| //std.mem.copy(u8, buf.items[buf.items.len - 3 .. buf.items.len], "OOM"); //const js_message = abi.createString({}, buf.items); _ = stack_trace; const js_message = abi.createString({}, message); // Use consoleError to get a js stack trace abi.js.consoleError(js_message); // Throw an exception to stop execution (and to get a breakpoint in the debugger) abi.js.throwException(js_message); } ================================================ FILE: bindings/wasm/shell.nix ================================================ with (import ../../dependencies.nix); pkgs.mkShell rec { buildInputs = [ zig ]; } ================================================ FILE: debugger/.gitignore ================================================ workspace ================================================ FILE: debugger/README.md ================================================ WARNING: Broken in zig 0.9.0 Todo: * don't show node for items that can't be expanded * or expand to something more useful? * color name, type, data differently * figure out cause of occasional hangs * how to be robust to invalid data? ================================================ FILE: debugger/build.zig ================================================ const std = @import("std"); const zt = @import("ZT/build.zig"); pub fn build(b: *std.build.Builder) void { const target = b.standardTargetOptions(.{}); const mode = b.standardReleaseOptions(); const exe = b.addExecutable("build", "./main.zig"); zt.link(exe); exe.setTarget(target); exe.setBuildMode(mode); exe.setMainPkgPath("../"); exe.install(); // Run cmd const run_cmd = exe.run(); run_cmd.step.dependOn(b.getInstallStep()); if (b.args) |args| { run_cmd.addArgs(args); } const run_step = b.step("run", "Run the app"); run_step.dependOn(&run_cmd.step); } ================================================ FILE: debugger/main.zig ================================================ const std = @import("std"); const zt = @import("zt"); const ig = @import("imgui"); const zg = zt.custom_components; const dida = @import("../lib/dida.zig"); const dida_test = @import("../test/core.zig"); const global_allocator = std.heap.c_allocator; pub fn main() !void { std.debug.print("Started!\n", .{}); dida_test.testShardTotalBalance() catch |err| dida.util.dump(err); run({}); } pub fn run(extra: anytype) void { var selected_event_ix: usize = 0; const Context = zt.App(void); // TODO this can't be called twice var context = Context.begin(global_allocator) catch unreachable; context.settings.energySaving = false; while (context.open) { context.beginFrame(); const viewport = ig.igGetMainViewport(); ig.igSetNextWindowPos(viewport.*.Pos, 0, .{}); ig.igSetNextWindowSize(viewport.*.Size, 0); var open = true; if (ig.igBegin( "The window", &open, ig.ImGuiWindowFlags_NoDecoration | ig.ImGuiWindowFlags_NoBackground | ig.ImGuiWindowFlags_AlwaysAutoResize | ig.ImGuiWindowFlags_NoSavedSettings | ig.ImGuiWindowFlags_NoFocusOnAppearing | ig.ImGuiWindowFlags_NoNav, )) { if (ig.igButton("<<", .{})) selected_event_ix = 0; ig.igSameLine(0, 0); if (ig.igButton("<", .{}) and selected_event_ix > 0) selected_event_ix -= 1; ig.igSameLine(0, 0); var c_i = @intCast(c_int, selected_event_ix); if (ig.igDragInt("##i", &c_i, 1.0, 0, @intCast(c_int, debug_events.items.len - 1), "%d", 0)) selected_event_ix = @intCast(usize, c_i); ig.igSameLine(0, 0); if (ig.igButton(">", .{}) and selected_event_ix < debug_events.items.len - 1) selected_event_ix += 1; ig.igSameLine(0, 0); if (ig.igButton(">>", .{})) selected_event_ix = debug_events.items.len - 1; const State = struct { prev_event: dida.debug.DebugEvent, next_event: ?dida.debug.DebugEvent, validation_errors: []const dida.debug.ValidationError, shard: dida.core.Shard, }; inspect("root", State{ .prev_event = debug_events.items[selected_event_ix], .next_event = if (selected_event_ix + 1 == debug_events.items.len) null else debug_events.items[selected_event_ix + 1], .shard = shards.items[selected_event_ix], .validation_errors = validation_errors.items[selected_event_ix], }); inspect("events", debug_events); inspect("events_by_node", events_by_node); inspect("ios_by_node", ios_by_node); inspect("extra", extra); } ig.igEnd(); context.endFrame(); } context.deinit(); } fn inspect(name: []const u8, thing: anytype) void { const T = @TypeOf(thing); if (treeNodeFmt("{s}", .{name})) { ig.igSameLine(0, 0); zg.text(": {s}", .{@typeName(T)}); switch (@typeInfo(T)) { .Int => zg.text("{d} 0o{o} 0b{b}", .{ thing, thing, thing }), .Struct => |info| { if (comptime std.mem.startsWith(u8, @typeName(T), "std.array_list.ArrayList")) { inspectSlice(thing.items, 0); } else if (comptime std.mem.startsWith(u8, @typeName(T), "std.hash_map.HashMap")) { var iter = thing.iterator(); var i: usize = 0; while (iter.next()) |entry| { // TODO is there a better way to name these? inspect(zg.fmtTextForImgui("{}", .{i}), T.KV{ .key = entry.key_ptr.*, .value = entry.value_ptr.*, }); i += 1; } } else inline for (info.fields) |field_info| { inspect(field_info.name, @field(thing, field_info.name)); } }, .Union => |info| { if (info.tag_type) |tag_type| { inline for (@typeInfo(tag_type).Enum.fields) |field_info| { if (std.meta.activeTag(thing) == @intToEnum(tag_type, field_info.value)) { inspect(field_info.name, @field(thing, field_info.name)); } } } }, .Array => { for (thing) |elem, i| { inspect(zg.fmtTextForImgui("{}", .{i}), elem); } }, .Pointer => |info| { switch (info.size) { .One => inspect("*", thing.*), .Many => zg.text("{any}", .{thing}), .Slice => inspectSlice(thing, 0), .C => zg.text("{any}", .{thing}), } }, .Optional => { if (thing) |thing_not_null| inspect("?", thing_not_null) else zg.text("null", .{}); }, .Opaque => zg.text("opaque", .{}), else => zg.text("{any}", .{thing}), } ig.igTreePop(); } else { ig.igSameLine(0, 0); switch (@typeInfo(T)) { .Opaque => zg.text("opaque", .{}), else => inspectWithFormat(thing), } } } fn inspectSlice(thing: anytype, thing_start: usize) void { const step = if (thing.len <= 1) 1 else std.math.powi(usize, 10, std.math.log10(thing.len - 1)) catch unreachable; if (step == 1) { for (thing) |elem, i| inspect(zg.fmtTextForImgui("{}", .{thing_start + i}), elem); } else { var start: usize = 0; while (start < thing.len) { const end = start + std.math.min(step, thing.len - start); if (treeNodeFmt("{}..{}", .{ thing_start + start, thing_start + end })) { inspectSlice(thing[start..end], thing_start + start); ig.igTreePop(); } else { ig.igSameLine(0, 0); inspectWithFormat(thing); } start = end; } } } var inspect_with_format_buffer: [1024]u8 = undefined; fn inspectWithFormat(thing: anytype) void { const T = @TypeOf(thing); const format = if (@typeInfo(T) == .Pointer and @typeInfo(T).Pointer.size == .Slice and @typeInfo(T).Pointer.child == u8) " = {s}" else " = {any}"; var stream = std.io.FixedBufferStream([]u8){ .buffer = inspect_with_format_buffer[0..1022], .pos = 0 }; const writer = stream.writer(); std.fmt.format(writer, format, .{thing}) catch {}; if (stream.pos >= 1022) std.mem.copy(u8, inspect_with_format_buffer[1019..1022], "..."); inspect_with_format_buffer[stream.pos] = 0; ig.igText(@ptrCast([*c]const u8, inspect_with_format_buffer[0..stream.pos])); } fn treeNodeFmt(comptime fmt: []const u8, args: anytype) bool { const text = zg.fmtTextForImgui(fmt, args); return ig.igTreeNode_Str(text); } var shards = std.ArrayList(dida.core.Shard).init(global_allocator); var debug_events = std.ArrayList(dida.debug.DebugEvent).init(global_allocator); var validation_errors = std.ArrayList([]const dida.debug.ValidationError).init(global_allocator); const IxAndEvent = struct { ix: usize, event: dida.debug.DebugEvent, }; const Direction = union(enum) { In: usize, Out }; const IO = struct { ix: usize, direction: Direction, changes: []dida.core.Change, }; var events_by_node = dida.util.DeepHashMap(?dida.core.Node, std.ArrayList(IxAndEvent)).init(global_allocator); var ios_by_node = dida.util.DeepHashMap(dida.core.Node, std.ArrayList(IO)).init(global_allocator); // Called from dida.debug pub fn emitDebugEvent(shard: *const dida.core.Shard, debug_event: dida.debug.DebugEvent) void { tryEmitDebugEvent(shard, debug_event) catch dida.util.panic("OOM", .{}); } var ix: usize = 0; pub fn tryEmitDebugEvent(shard: *const dida.core.Shard, debug_event: dida.debug.DebugEvent) error{OutOfMemory}!void { _ = shard; _ = debug_event; dida.util.dump(ix); //dida.util.dump(.{ .ix = ix, .event = debug_event }); try shards.append(try dida.util.deepClone(shard.*, global_allocator)); try debug_events.append(try dida.util.deepClone(debug_event, global_allocator)); try validation_errors.append(dida.debug.validate(global_allocator, shard)); const node: ?dida.core.Node = switch (debug_event) { .PushInput => |e| e.node, .FlushInput => |e| e.node, .AdvanceInput => |e| e.node, .EmitChangeBatch => |e| e.from_node, .ProcessChangeBatch => |e| e.node_input.node, .QueueFrontierUpdate => |e| e.node_input.node, .ApplyFrontierUpdate => |e| e.node, .ProcessFrontierUpdates => null, .ProcessFrontierUpdate => |e| e.node, .ProcessFrontierUpdateReaction => |e| e.node, .PopOutput => |e| e.node, .DoWork => null, }; { const entry = try events_by_node.getOrPutValue(node, std.ArrayList(IxAndEvent).init(global_allocator)); try entry.value_ptr.append(.{ .ix = ix, .event = try dida.util.deepClone(debug_event, global_allocator), }); } const changes: ?[]dida.core.Change = switch (debug_event) { .EmitChangeBatch => |e| e.change_batch.changes, .ProcessChangeBatch => |e| e.change_batch.changes, else => null, }; const direction: ?Direction = switch (debug_event) { .PushInput, .FlushInput, .AdvanceInput, .ProcessFrontierUpdate, .ProcessFrontierUpdateReaction, .PopOutput, .EmitChangeBatch, .ApplyFrontierUpdate, => .Out, .ProcessChangeBatch => |e| .{ .In = e.node_input.input_ix }, .QueueFrontierUpdate => |e| .{ .In = e.node_input.input_ix }, .ProcessFrontierUpdates, .DoWork => null, }; if (changes != null) { const entry = try ios_by_node.getOrPutValue(node.?, std.ArrayList(IO).init(global_allocator)); try entry.value_ptr.append(.{ .ix = ix, .direction = direction.?, .changes = try dida.util.deepClone(changes.?, global_allocator), }); } if (ix == 10000) run({}); ix += 1; } ================================================ FILE: debugger/shell.nix ================================================ with (import ../dependencies.nix); pkgs.mkShell rec { buildInputs = [ zig pkgs.pkg-config pkgs.libGL.all pkgs.xorg.libX11.dev pkgs.xorg.libXrandr.all pkgs.xorg.libXcursor pkgs.xorg.libXinerama pkgs.xorg.xinput pkgs.xlibs.xorgproto pkgs.xlibs.libXi.all pkgs.xlibs.libXext.all pkgs.glew.all ]; } ================================================ FILE: dependencies.nix ================================================ rec { pkgs = import (builtins.fetchTarball { name = "nixos-21.11"; url = "https://github.com/NixOS/nixpkgs/archive/21.11.tar.gz"; sha256 = "162dywda2dvfj1248afxc45kcrg83appjd0nmdb541hl7rnncf02"; }) {}; zig = pkgs.stdenv.mkDerivation { name = "zig"; src = fetchTarball ( if (pkgs.system == "x86_64-linux") then { url = "https://ziglang.org/download/0.9.0/zig-linux-x86_64-0.9.0.tar.xz"; sha256 = "1vagp72wxn6i9qscji6k3a1shy76jg4d6crmx9ijpch9kyn71c96"; } else if (pkgs.system == "aarch64-linux") then { url = "https://ziglang.org/download/0.9.0/zig-linux-aarch64-0.9.0.tar.xz"; sha256 = "00m6nxp64nf6pwq407by52l8i0f2m4mw6hj17jbjdjd267b6sgri"; } else throw ("Unknown system " ++ pkgs.system) ); dontConfigure = true; dontBuild = true; installPhase = '' mkdir -p $out mv ./* $out/ mkdir -p $out/bin mv $out/zig $out/bin ''; }; } ================================================ FILE: docs/why.md ================================================ TODO this doc is WIP, will eventually include lots of interactive examples and diagrams which will make it much easier to follow Dida is heavily based on the ideas behind [differential dataflow](https://github.com/TimelyDataflow/differential-dataflow/) and is in part an attempt to better explain those ideas. The goal of this doc is to explain the design constraints that lead to building something like dida and what kinds of problems dida is useful for. We'll begin with a very simple problem and add the design constraints one at a time to show how they direct the design down this path. And we'll stick to the high-level intuition - see [dida.core](../lib/dida/core.zig) for implementation details. ## Problem The starting point is wanting to __incrementally__ update the results of __structured__ computations over __multisets__. * A __multiset__ (aka bag) is an unordered collection of elements. Unlike sets, they may contain multiple copies of a given element. The most familiar example of a multiset for most people is probably a SQL table. * __Structured__ computation means that the computation we care about is written in terms of a graph of multiset operations (map, reduce, join etc) as opposed to be written in an arbitrary general purpose programming language ([cf](https://scattered-thoughts.net/writing/an-opinionated-map-of-incremental-and-streaming-systems#unstructured-vs-structured)). * __Incrementally__ updating the result means that when the input collections changes slightly (eg some new data arrives) we can calculate the change to the output collections much faster than rerunning the whole computation from scratch. Structured computation over multisets covers a range of applications including SQL, datalog, dataframes, graph computations, in-database machine learning ([eg](https://arxiv.org/pdf/1703.04780.pdf)), probabalistic programming, CRDTs ([eg](https://martin.kleppmann.com/2018/02/26/dagstuhl-data-consistency.html)), specifying distributed systems ([eg](https://www.youtube.com/watch?v=R2Aa4PivG0g)), generating UI ([eg](https://scattered-thoughts.net/writing/relational-ui/)) and a surprising range of fundamental CS algorithms ([eg](https://dl.acm.org/doi/pdf/10.1145/2902251.2902280)). Incrementally updating these computations reduces the latency between adding new inputs and seeing the updated outputs. Using tools like [ksqldb](https://ksqldb.io/), [flink](https://github.com/apache/flink) and [materialize](https://materialize.com/), computations that used to be run as batch jobs overnight can now be kept up to date with sub-second latencies using the same resources. And if you squint a bit, a lot of the work that goes into building a web or mobile app today from the backend database to the app server to the frontend UI all kind of looks like a pile of adhoc solutions to one big incremental update problem ([eg](https://martin.kleppmann.com/2015/03/04/turning-the-database-inside-out.html), [eg](https://engineering.fb.com/2020/03/02/data-infrastructure/messenger/), [eg](https://tonsky.me/blog/the-web-after-tomorrow/#always-late)). So it seems like there could be a lot of value to figuring out how to solve these kinds of problems efficiently and with minimum added complexity. ## Solution We can represent collections as a list of changes: ``` js // Add one copy of "alice" to the collection ("alice", +1) // Remove two copies of "bob" from the collection ("bob", -2) ``` To recover the current value of the collections we just add up all the changes that we've seen so far. ``` js collections = sum_changes(changes) ``` Next, we have to transform our operations that work on collections into operations that work on lists of changes. A correct transformation must obey this rule: ``` js assert( sum_changes(incremental_operator(changes)) == batch_operator(sum_changes(changes)) ) ``` For some operations this transformation is easy eg `map` can just apply it's map function to the data in each change as usual. ``` js function incremental_map(f, changes) { for (change in changes) { let (data, diff) = change; yield (f(data), diff); } } ``` Other operations like `sum` have to maintain some internal state to keep track of the changes they've seen so far. ``` js function incremental_sum(changes) { var total = 0; for (change in changes) { let (data, diff) = change; let old_total = total; total += data * diff; // delete the old total yield (old_total, -1); // add the new total yield (total, 1); } } ``` And finally operations like `join` have to maintain indexes for each input. A simple implementation of an index could just be a list of changes sorted by key. ``` js function incremental_join(changes) { var left_index = new Index(); var right_index = new Index(); for (change in changes) { let (input_side, (key, value), diff) = change; if (input_side == "left") { left_index.update(key, value, diff); for ((other_value, other_diff) in right_index.lookup(key) { yield ((key, value, other_value), diff * other_diff); } } else { right_index.update(key, value, diff); for ((other_value, other_diff) in left_index.lookup(key) { yield ((key, other_value, value), diff * other_diff); } } } } ``` This basic model is pretty well understood and has been implemented in a wide range of systems including ksqldb, flink etc. ## Constraint #1 - internally consistent results The problem with this basic model is that it in many case it produces incorrect outputs. A single change to an input may produce multiple changes to the results of intermediate computations. If these changes are not carefully synchronized then the resulting stream of output changes can be mostly gibberish. (See [internal consistency in streaming systems](https://scattered-thoughts.net/writing/internal-consistency-in-streaming-systems/) for more details.) To guarantee that the results are consistent we add __timestamps__, __multiversion indexes__ and __frontiers__. Each change to the input now must include a timestamp. ``` js // Add one copy of "alice" to the collection at time 12 ("alice", 12, +1) // Remove two copies of "bob" from the collection at time 42 ("bob", 42, -2) ``` These timestamps could be actual real world timestamps (eg unix epochs) or they could just be arbitrary integers that we increment every time we make a new change. Their job is just to keep track of which output changes were caused by which input changes. Previously we could get the state of a collection by summing up all the changes. Now that we have timestamps, we can get the state of a collection as of any point in time T by summing up all the changes which have timestamps <= T. We need to update our rule of incremental operations accordingly: ``` js assert( sum_changes_up_until(time, incremental_operator(changes)) == batch_operator(sum_changes_up_until(time, changes)) ) ``` This means that incremental operations must now calculate the correct timestamp for their outputs. For most operations the output timestamp is the same as the input timestamp: ``` js function incremental_sum(changes) { var total = 0; for (change in changes) { let (data, time, diff) = change; let old_total = total; total += data * diff; // delete the old total yield (old_total, time, -1); // add the new total yield (total, time, 1); } } ``` But some operations like `join` require a bit more thought: ``` js function incremental_join(changes) { var left_index = new Index(); var right_index = new Index(); for (change in changes) { let (input_side, (key, value), time, diff) = change; if (input_side == "left") { left_index.update(key, value, time, diff); for ((other_value, other_time, other_diff) in right_index.lookup(key) { // max(time, other_time) is the earliest time at which both of these changes are visible yield ((key, value, other_value), max(time, other_time), diff * other_diff); } } else { right_index.update(key, value, time, diff); for ((other_value, other_time, other_diff) in left_index.lookup(key) { // max(time, other_time) is the earliest time at which both of these changes are visible yield ((key, other_value, value), max(time, other_time), diff * other_diff); } } } } ``` Indexes now need to track not only the latest value of the collection, but all the previous values. The easiest way to do this is to keep a list of all changes, sorted by key and timestamp. Some operations like `sum` can't produce the correct output for a given timestamp until they've seen all the inputs for that timestamp (as opposed to operations like `map` which can emit outputs immediately for each input). To handle this we need to keep track of each operations __frontier__ - the earliest timestamps that might still appear in the output changes for that operation. Whoever is feeding new changes into the inputs is now also responsible for updating the frontier of the inputs, to tell them when they have seen all the changes for a particular timestamp. And as changes flow through the graph we can also update the frontiers of each operation that they pass through. (Side note: the more common terminology for a frontier is 'watermark', but 'watermark' is also used to refer to a variety of related concepts including per-operation timeouts for windowed operations and handling of late-arriving data, leading to persistent misunderstandings between different groups of people. Also, as we'll see in a moment, frontiers differ from traditional watermarks once we add iterative computations.) Frontiers are also useful at the output of the system - downstream consumers can watch the frontier to learn when the have seen all the changes to the output for a given timestamp and can now safely act on the result. With timestamps, multi-version indexes and frontiers we can build systems that are internally consistent. As a bonus the changes, timestamps and frontiers at the output are exactly the information that is required at the input, so we can take multiple such systems with different internal implementations and they can be composed into a single consistent computation so long as they stick to this format. (Materialize are working on a [protocol](https://materialize.com/docs/connect/materialize-cdc/) that encodes this information in a way that is idempotent and confluent, so you don't even need to have a reliable or ordered connection between systems.) ## Constraint #2 - iterative computations TODO this whole section really needs the interactive examples in order to make any sense :) Iterative computations show up all over the place eg recursive CTEs in sql, recursive rules in datalog, graph algorithms like pagerank, dataflow analsis in compilers. Without iterative computation our system would be very limited, not even Turing-complete. But combining interative computations with the ability to delete inputs makes for a fundamentally difficult incremental update problem. This problem has been well studied in the context of datalog and there are no completely satisfying solutions In dida and differential dataflow, iterative computations are expressed by taking a set of starting collections and repeatedly applying some transformation to them. ``` js function loop(f, collections) { while (true) { collections = f(collections); } return collections; } ``` As written above, this computation would never terminate. But since we're using incremental operations we only have to calculate the change between each iteration. So as long as the collections only change at a finite number of iterations, the incremental version will terminate. The first impact of adding loops is that timestamps become more complicated. Previously, for each change produced by a node we only had to track which input changes produced it. Now when we're dealing with a change inside the loop we also have to track of which loop iteration produced it. We can do this by adding an extra coordinate to the timestamp. ``` js // Inserted one copy of "alice" at input time 12 on the 7th interation of the loop ("alice", (12, 7), +1) ``` If we need to nest loops, we just keep adding more coordinates. At the output nodes of each loop we strip off the extra timestamp coordinate so that the rest of the system doesn't see the internal state of the loop. Previously we could get the state of a collection as of any point in time T by summing up all the changes which have timestamps <= T. But what does `<=` mean when our timestamps have multiple coordinates? There isn't a unique answer to this. We can choose various different orderings for our timestamps and they will produce different incremental operations. But when calculating the change at time T an incremental operation can only make use of changes with timestamps <= T. So we should choose an ordering that gives as much useful information as possible and removes as much useless information at possible. Suppose we are looking at, say, the 3rd iteration of a loop for input time 7. If the changes between times are small, then the 3rd iteration at input time 7 will probably look a lot like the 3rd iteration at input time 6. And the 3rd iteration of any loop certainly needs to know about what happened in the 2nd iteration. But it probably can't make use of any information about the 4th iteration at time 6. So we should have `(6,3) < (7,3)` and `(7,2) < (7,3)` but not `(6,4) < (7,3)`. This suggests that the correct rule is that a timestamp T0 is less than or equal to timestamp T1 when all of the coordinates of T0 are less than or equal to the coordinates of T1. I've been calling this the __causal order__, because it mirrors the possible causality between changes. Apart from this new definition of `<=`, the rule for incremental operations remains unchanged: ``` js assert( sum_changes_up_until(time, incremental_operator(changes)) == batch_operator(sum_changes_up_until(time, changes)) ) ``` This change is enough to make loops just work. Most operations can process changes in any old order and the few that need to wait can rely on frontiers. Frontiers also lets us know when a loop has finished for a given input time - that input time will vanish from the frontier of the loops output nodes once no more changes can be produced. Frontiers however now become more complicated. Firstly, suppose some node might produce changes in the future at times `(0,1)` and `(1,0)`. What should the frontier be? Neither of those timestamps is less than the other. So frontiers have to contain a set of earliest timestamps, rather than just a single timestamp. Secondly, keeping frontiers up to date is harder given that the graph may now contain cycles. We can model the problem by representing frontiers as multisets. The frontier above would be: ``` js // 1 copy of the timestamp (0,1) (0,1) => 1 // 1 copy of the timestamp (1,0) (1,0) => 1 ``` The frontier tracks "what are the earliest timestamps that might appear on changes emitted from this node". So to calculate the frontier at a given node, we need to know: * The frontiers of the nodes immediately upstream, because they might send changes that this node needs to handle. * The timestamps of any changes that are waiting to be processed at this node. * If this node is an input node, we also need the user to explicitly tell us what new inputs they might send in the future by setting the input frontier. Given this information it's easy to calculate the frontier at this node by considering what effect this node has on timestamps. Most nodes don't change timestamps at all, so the output frontier will be the same as the upstream frontier. Nodes like `union` have multiple upstreams so they have to take the minimum of their upstream frontiers. The 'timestamp_increment' node adds 1 to the last coordinate of it's upstream's timestamps, so it must also add 1 to the last coordinate of it's upstream frontier. And so on. As the input frontiers are advanced and as changes flow around the graph, the frontiers will also change. Recomputing all the frontiers from scratch every time we process a change would be too slow. So we have an incremental maintenance problem that involves iterative computations on multisets. We know how to solve this problem! Just add timestamps and frontiers... uh oh. This has to bottom out somewhere. We need to find a different solution to the frontier computation than the one that produced it in the first place. Suppose we just kept a list of changes to frontiers and applied them one by one to downstream frontiers. What could go wrong? We can get into trouble when the graph of changes in a loop becomes self-supporting. Suppose we have a situation like: ``` js node A = input node B = union(A, timestamp_increment(C)) node C = map(f, B) node A frontier: (0, 0) => 1 node B frontier: (0, 0) => 1 node C frontier: (0, 0) => 1 ``` If we advance the input node A it will produce these changes: ``` pending changes: ((0, 0), -1) at A ((1, 0), +1) at A ``` After applying those changes, we'd like to end up with: ``` node A frontier: (1, 0) => 1 node B frontier: (1, 0) => 1 node C frontier: (1, 0) => 1 ``` But here is what can happen if we do this naively. First we process `((0,0), -1) at A`. This changes the frontier at A, producing a change that needs to be applied downstream at B. ``` pending changes: ((1, 0), +1) at A ((0, 0), -1) at B node A frontier: node B frontier: (0, 0) => 1 node C frontier: (0, 0) => 1 ``` Suppose we process `((0, 0), -1) at B` at B next. B can see that the frontier from A is now empty, and the frontier from C contains `(0, 0) => 1`. So the frontier at B should update to `(0, 1) => 1` to reflect the fact that message with timestamp `(0,0)` might come from C and pass through the `timestamp_increment`. ``` pending changes: ((1, 0), +1) at A ((0, 0), -1) at C ((0, 1), +1) at C node A frontier: node B frontier: (0, 1) => 1 node C frontier: (0, 0) => 1 ``` Next we handle the changes `((0, 0), -1) at C` and `((0, 1), +1) at C`. This advances the frontier at C and produces a new pending change for B. ``` pending changes: ((1, 0), +1) at A ((0, 1), -1) at B ((0, 2), +1) at B node A frontier: node B frontier: (0, 2) => 1 node C frontier: (0, 1) => 1 ``` By this point, you can probably see where this is going. The inserts and deletes can race each other around this cycle in the graph generating later and later timestamps. As long as the deletes never catch up to the inserts this algorithm won't even terminate, let alone produce the correct frontiers. One way to avoid this is to always process changes in __causal order__. If a timestamp T0 at operation Op0 could produce a timestamp T2 at operation Op1, then we must handle T1 first. If we can cheaply compute this order then we can just sort the list of outstanding changes in this order and pop off one change at a time. In the example above the deletions are always at earlier timestamps than the insertions, so we process the deletions all the way around the loop first and they catch up to and cancel out the insertions. The question now is how do we compute the causal order? In fact, back up, does the causal order even exist? What if we had a timestamp T0 at that operation Op0 that could produce a timestamp T1 at operation Op1, but timestamp T1 at operation Op1 could also produce timestamp T0 at operation Op0? After all, our graph of operations does contain cycles. The solution to this is to place some constraints on what kinds of graphs we'll allow. The most important constraints are: * The output changes at any operation in the graph must not contain timestamps which are earlier than the input change that produced them (ie time must not go backwards). * Any backwards edge in the graph must increment the timestamps of the changes that flow across it (ie time in loops must go forwards). Together, these two constraints prevent the situation where two changes can both potentially cause each other. (See [Graph.validate](https://github.com/jamii/dida/search?q=%22fn+validate%28self%3A+Graph%22) for the full list of constraints.) With those constraints in place, we can use the following ordering: 1. Process changes with earlier timestamps first. 2. If two changes have the same timestamp, process the change that is earlier in the graph first. (The presence of multiple loops actually makes the ordering a little more subtle - see [orderPointstamps](https://github.com/jamii/dida/search?q=%22fn+orderPointstamps%22) for the gory details.) So long as we process outstanding changes in this order, we can guarantee that our frontier updates algorithm will terminate. ## Constraint 3 - process changes in batches and in parallel If processing changes in causal order solves the incremental update problem, why didn't we just do that in the first place instead of messing around with frontiers? The problem is that it can be very slow if our timestamps are fine-grained. If we have 1 million changes at some node that all have the same timestamp, great, we can process them all at once. But if we have 1 million changes with 1 million different timestamps then we have to process them one by one. In data-processing systems, batching is key to achieving good cache locality and amortizing interpreter overhead. If we have to process changes one at a time it will destroy our performance. Similarly, if we want to shard up this computation and run it across multiple threads, we can't have all the threads hitting synchronization points after every change while they check to make sure that the other threads don't have any earlier changes that should run first. So instead we separate the two aspects of the problem. The changes to our data can be processed in large batches, in any order, as if they were using coarse-grained timestamps, until they all pile up at operations that are waiting on frontier updates. Then we switch to updating our frontiers with the one-change-at-a-time method, but instead of tracking every timestamp the frontiers only have to track the smallest timestamps in each batch so it's a much smaller problem. And the frontier update problem is also monotonic (frontiers only ever advance) so it's easy to split across multiple threads in a wait-free fashion - each thread just broadcasts it's frontier changes and if other threads don't receive the changes right away then no problem, their frontiers will advance a little slower then they could have but none of their other work is blocked. I find this solution very elegant. We start with a problem that appears to require a lot of waiting and synchronization. And then we condense the actual essential waiting down into a much smaller problem and run everything else in big batches in whatever order it shows up in. ================================================ FILE: examples/core.html ================================================ ================================================ FILE: examples/core.js ================================================ const util = require('util'); const dida = require('../bindings/node/zig-out/lib/dida.js'); var graph_builder = new dida.GraphBuilder(); const subgraph_0 = new dida.Subgraph(0); const subgraph_1 = graph_builder.addSubgraph(subgraph_0); const edges = graph_builder.addNode(subgraph_0, new dida.NodeSpec.Input()); const edges_1 = graph_builder.addNode(subgraph_1, new dida.NodeSpec.TimestampPush(edges)); const reach_future = graph_builder.addNode(subgraph_1, new dida.NodeSpec.TimestampIncrement(null)); const reach_index = graph_builder.addNode(subgraph_1, new dida.NodeSpec.Index(reach_future)); const distinct_reach_index = graph_builder.addNode(subgraph_1, new dida.NodeSpec.Distinct(reach_index)); const swapped_edges = graph_builder.addNode(subgraph_1, new dida.NodeSpec.Map(edges_1, input => [input[1], input[0]])); const swapped_edges_index = graph_builder.addNode(subgraph_1, new dida.NodeSpec.Index(swapped_edges)); const joined = graph_builder.addNode(subgraph_1, new dida.NodeSpec.Join([distinct_reach_index, swapped_edges_index], 1)); const without_middle = graph_builder.addNode(subgraph_1, new dida.NodeSpec.Map(joined, input => [input[2], input[1]])); const reach = graph_builder.addNode(subgraph_1, new dida.NodeSpec.Union([edges_1, without_middle])); graph_builder.connectLoop(reach, reach_future); const reach_pop = graph_builder.addNode(subgraph_0, new dida.NodeSpec.TimestampPop(distinct_reach_index)); const reach_out = graph_builder.addNode(subgraph_0, new dida.NodeSpec.Output(reach_pop)); const reach_summary = graph_builder.addNode(subgraph_1, new dida.NodeSpec.Reduce( distinct_reach_index, 1, "", function (reduced_value, row, count) { for (var i = 0; i < count; i++) { reduced_value += row[1]; } return reduced_value; } )); const reach_summary_out = graph_builder.addNode(subgraph_1, new dida.NodeSpec.Output(reach_summary)); const graph = graph_builder.finishAndReset(); var shard = new dida.Shard(graph); shard.pushInput(edges, new dida.Change(["a", "b"], [0], 1)); shard.pushInput(edges, new dida.Change(["b", "c"], [0], 1)); shard.pushInput(edges, new dida.Change(["b", "d"], [0], 1)); shard.pushInput(edges, new dida.Change(["c", "a"], [0], 1)); shard.pushInput(edges, new dida.Change(["b", "c"], [1], -1)); shard.flushInput(edges); shard.advanceInput(edges, [1]); while (shard.hasWork()) { shard.doWork(); while (true) { const change_batch = shard.popOutput(reach_out); if (change_batch == undefined) break; console.log(util.inspect(change_batch, false, null, true)); } while (true) { const change_batch = shard.popOutput(reach_summary_out); if (change_batch == undefined) break; console.log(util.inspect(change_batch, false, null, true)); } } console.log("Advancing!"); shard.advanceInput(edges, [2]); while (shard.hasWork()) { shard.doWork(); while (true) { const change_batch = shard.popOutput(reach_out); if (change_batch == undefined) break; console.log(util.inspect(change_batch, false, null, true)); } while (true) { const change_batch = shard.popOutput(reach_summary_out); if (change_batch == undefined) break; console.log(util.inspect(change_batch, false, null, true)); } } ================================================ FILE: examples/sugar.zig ================================================ // TODO this is just a proof of concept, api might change a lot const std = @import("std"); const dida = @import("../lib/dida.zig"); var gpa = std.heap.GeneralPurposeAllocator(.{ .safety = true, .never_unmap = true, }){}; var arena = std.heap.ArenaAllocator.init(&gpa.allocator); const allocator = &arena.allocator; pub fn main() !void { defer { arena.deinit(); _ = gpa.detectLeaks(); } var sugar = dida.sugar.Sugar.init(allocator); const edges = sugar.input(); const loop = sugar.loop(); const edges_1 = loop.importNode(edges); const reach = loop.loopNode(); reach.fixpoint(reach .index() .join(edges_1.project(.{ 1, 0 }).index(), 1) .project(.{ 2, 1 }) .union_(edges_1) .index().distinct()); const out = loop.exportNode(reach).output(); sugar.build(); try edges.push(.{ .{ "a", "b" }, .{0}, 1 }); try edges.push(.{ .{ "b", "c" }, .{0}, 1 }); try edges.push(.{ .{ "b", "d" }, .{0}, 1 }); try edges.push(.{ .{ "c", "a" }, .{0}, 1 }); try edges.push(.{ .{ "b", "c" }, .{1}, -1 }); try edges.flush(); try edges.advance(.{1}); try sugar.doAllWork(); while (out.pop()) |change_batch| { dida.common.dump(change_batch); } std.debug.print("Advancing!\n", .{}); try edges.advance(.{2}); try sugar.doAllWork(); while (out.pop()) |change_batch| { dida.common.dump(change_batch); } //dida.common.dump(sugar); } ================================================ FILE: lib/dida/core.zig ================================================ //! The core of dida handles all the actual computation. //! It exposes an api that is maximally flexible but also verbose and error-prone. //! See ./sugar.zig for a friendlier layer on top of the core. //! //! Assume that all struct parameters are owned unless otherwise stated. //! Assume all function arguments are borrowed unless otherwise stated. const std = @import("std"); const dida = @import("../dida.zig"); const u = dida.util; /// The basic unit of data in dida. pub const Value = union(enum) { String: []const u8, Number: f64, pub fn deinit(self: *Value, allocator: u.Allocator) void { switch (self.*) { .String => |string| allocator.free(string), .Number => {}, } self.* = undefined; } }; /// Every operation takes rows as inputs and produces rows as outputs. // TODO This will eventually be replaced by raw bytes plus an optional type tag, so that users of dida can use whatever values and serde scheme they want. pub const Row = struct { values: []const Value, pub fn deinit(self: *Row, allocator: u.Allocator) void { for (self.values) |_value| { // can't deinit through []const var value = _value; value.deinit(allocator); } allocator.free(self.values); self.* = undefined; } }; /// A [bag](https://en.wikipedia.org/wiki/Multiset) of rows. /// The dataflow is a graph of operations, each of which takes one or more bags of rows as inputs and produces a bag of rows as outputs. pub const Bag = struct { /// Rows are all borrowed. rows: u.DeepHashMap(Row, isize), pub fn init(allocator: u.Allocator) Bag { return .{ .rows = u.DeepHashMap(Row, isize).init(allocator), }; } pub fn deinit(self: *Bag) void { // rows are all borrowed so no need to free them self.rows.deinit(); self.* = undefined; } pub fn update(self: *Bag, row: Row, diff: isize) !void { const entry = try self.rows.getOrPutValue(row, 0); entry.value_ptr.* += diff; _ = if (entry.value_ptr.* == 0) self.rows.remove(row); } }; /// The result of comparing two elements in a [partially ordered set](https://en.wikipedia.org/wiki/Partially_ordered_set). /// (Field names are weird to be consistent with std.math.Order) pub const PartialOrder = enum { lt, eq, gt, none, pub fn isLessThanOrEqual(self: PartialOrder) bool { return switch (self) { .lt, .eq => true, .gt, .none => false, }; } }; /// > Time is what prevents everything from happening all at once. /// /// Timestamps represent the logical time something happened. /// The first coord represents the logical time in the dataflow as a whole. /// Each extra coord represent the iteration number within some enclosing loop in the dataflow (outermost loop first, innermost loop last). pub const Timestamp = struct { coords: []const usize, pub fn initLeast(allocator: u.Allocator, num_coords: usize) !Timestamp { var coords = try allocator.alloc(usize, num_coords); for (coords) |*coord| coord.* = 0; return Timestamp{ .coords = coords }; } pub fn deinit(self: *Timestamp, allocator: u.Allocator) void { allocator.free(self.coords); self.* = undefined; } pub fn pushCoord(self: Timestamp, allocator: u.Allocator) !Timestamp { var new_coords = try allocator.alloc(usize, self.coords.len + 1); std.mem.copy(usize, new_coords, self.coords); new_coords[new_coords.len - 1] = 0; return Timestamp{ .coords = new_coords }; } pub fn incrementCoord(self: Timestamp, allocator: u.Allocator) !Timestamp { var new_coords = try allocator.dupe(usize, self.coords[0..self.coords.len]); new_coords[new_coords.len - 1] += 1; return Timestamp{ .coords = new_coords }; } pub fn popCoord(self: Timestamp, allocator: u.Allocator) !Timestamp { u.assert(self.coords.len > 0, "Tried to call popCoord on a timestamp with length 0", .{}); const new_coords = try allocator.dupe(usize, self.coords[0 .. self.coords.len - 1]); return Timestamp{ .coords = new_coords }; } /// A partial ordering on timestamps such that if a change at timestamp A could ever cause a change at timestamp B, then A <= B. /// This is used to process changes in an order that is guaranteed to converge, and to define the behavior of frontiers. pub fn causalOrder(self: Timestamp, other: Timestamp) PartialOrder { u.assert(self.coords.len == other.coords.len, "Tried to compute causalOrder of timestamps with different lengths: {} vs {}", .{ self.coords.len, other.coords.len }); var lt: usize = 0; var gt: usize = 0; var eq: usize = 0; for (self.coords) |self_coord, i| { const other_coord = other.coords[i]; switch (std.math.order(self_coord, other_coord)) { .lt => lt += 1, .eq => eq += 1, .gt => gt += 1, } } if (eq == self.coords.len) return .eq; if (lt + eq == self.coords.len) return .lt; if (gt + eq == self.coords.len) return .gt; return .none; } /// A total ordering on timestamps that is compatible with the causal order. /// ie If `a.causalOrder(b) != .none` then `a.causalOrder(b) == a.lexicalOrder(b)`. /// This is useful if you want to sort Timestamps by causal order - standard sorting algorithms don't always work well on partial orders. pub fn lexicalOrder(self: Timestamp, other: Timestamp) std.math.Order { u.assert(self.coords.len == other.coords.len, "Tried to compute lexicalOrder of timestamps with different lengths: {} vs {}", .{ self.coords.len, other.coords.len }); for (self.coords) |self_coord, i| { const other_coord = other.coords[i]; switch (std.math.order(self_coord, other_coord)) { .lt => return .lt, .eq => {}, .gt => return .gt, } } return .eq; } /// Returns the earliest timestamp that is greater than both the inputs (in the causal ordering). pub fn leastUpperBound(allocator: u.Allocator, self: Timestamp, other: Timestamp) !Timestamp { u.assert(self.coords.len == other.coords.len, "Tried to compute leastUpperBound of timestamps with different lengths: {} vs {}", .{ self.coords.len, other.coords.len }); var output_coords = try allocator.alloc(usize, self.coords.len); for (self.coords) |self_coord, i| { const other_coord = other.coords[i]; output_coords[i] = u.max(self_coord, other_coord); } return Timestamp{ .coords = output_coords }; } }; /// A frontier represents the earliest timestamps in some set of timestamps (by causal order). /// It's used to track progress in the dataflow and also to summarize the contents of a change batch. pub const Frontier = struct { allocator: u.Allocator, /// Invariant: timestamps don't overlap - for any two timestamps t1 and t2 in timestamps `t1.causalOrder(t2) == .none` timestamps: u.DeepHashSet(Timestamp), pub fn init(allocator: u.Allocator) Frontier { return Frontier{ .allocator = allocator, .timestamps = u.DeepHashSet(Timestamp).init(allocator), }; } pub fn deinit(self: *Frontier) void { { var iter = self.timestamps.iterator(); while (iter.next()) |entry| { entry.key_ptr.deinit(self.allocator); } } self.timestamps.deinit(); self.* = undefined; } /// Compares `timestamp` to `self.timestamps`. pub fn causalOrder(self: Frontier, timestamp: Timestamp) PartialOrder { var iter = self.timestamps.iterator(); while (iter.next()) |entry| { const order = entry.key_ptr.causalOrder(timestamp); // Since the timestamps in `self.timestamps` are always mututally incomparable, we can never have `t1 < timestamp < t2`. // So it's safe to return as soon as we find some comparison. switch (order) { .lt => return .lt, .eq => return .eq, .gt => return .gt, .none => {}, } } return .none; } pub const Direction = enum { Later, Earlier }; /// Mutate `self` to a later (or earlier) frontier. /// Remove any timestamps that are earlier (or later) than `timestamp`. /// Reports any changes to the frontier into `changes_into`. pub fn move(self: *Frontier, comptime direction: Direction, timestamp: Timestamp, changes_into: *u.ArrayList(FrontierChange)) !void { u.assert(changes_into.items.len == 0, "Need to start with an empty changes_into buffer so can use it to remove timestamps", .{}); var iter = self.timestamps.iterator(); while (iter.next()) |entry| { switch (timestamp.causalOrder(entry.key_ptr.*)) { .eq, if (direction == .Later) .lt else .gt => { // Moved in the wrong direction u.assert(changes_into.items.len == 0, "Frontier timestamps invariant was broken", .{}); return; }, if (direction == .Later) .gt else .lt => { try changes_into.append(.{ .timestamp = entry.key_ptr.*, .diff = -1 }); }, .none => {}, } } // If we got this far, timestamp is being added to the frontier and might also be replacing some other timestamps that are currently on the frontier for (changes_into.items) |frontier_change| { _ = self.timestamps.remove(frontier_change.timestamp); } try changes_into.append(.{ .timestamp = try u.deepClone(timestamp, self.allocator), .diff = 1 }); try self.timestamps.put(try u.deepClone(timestamp, self.allocator), {}); } }; /// Tracks both a bag of timestamps and the frontier of that bag. /// This is used to incrementally compute the frontiers of each node in the graph as the dataflow progresses. pub const SupportedFrontier = struct { allocator: u.Allocator, support: u.DeepHashMap(Timestamp, usize), // Invariant: frontier contains exactly the least timestamps from support frontier: Frontier, pub fn init(allocator: u.Allocator) !SupportedFrontier { return SupportedFrontier{ .allocator = allocator, .support = u.DeepHashMap(Timestamp, usize).init(allocator), .frontier = Frontier.init(allocator), }; } pub fn deinit(self: *SupportedFrontier) void { { var iter = self.support.iterator(); while (iter.next()) |entry| entry.key_ptr.deinit(self.allocator); } self.support.deinit(); self.frontier.deinit(); self.* = undefined; } /// Change the count of `timestamp` by `diff`. /// Reports any changes to the frontier into `changes_into`. /// Changes are owned by the caller. pub fn update(self: *SupportedFrontier, timestamp: Timestamp, diff: isize, changes_into: *u.ArrayList(FrontierChange)) !void { const support_entry = try self.support.getOrPut(timestamp); if (!support_entry.found_existing) { support_entry.key_ptr.* = try u.deepClone(support_entry.key_ptr.*, self.allocator); support_entry.value_ptr.* = 0; } support_entry.value_ptr.* = @intCast(usize, @intCast(isize, support_entry.value_ptr.*) + diff); if (support_entry.value_ptr.* == 0) { // Timestamp was just removed, might have been in frontier if (self.support.fetchRemove(timestamp)) |*remove_entry| { remove_entry.key.deinit(self.allocator); } if (self.frontier.timestamps.fetchRemove(timestamp)) |*remove_entry| { remove_entry.key.deinit(self.allocator); // Removed this timestamp from frontier try changes_into.append(.{ .timestamp = try u.deepClone(timestamp, self.allocator), .diff = -1 }); // Find timestamps in support that might now be on the frontier var candidates = u.ArrayList(Timestamp).init(self.allocator); defer candidates.deinit(); var iter = self.support.iterator(); while (iter.next()) |entry| { if (timestamp.causalOrder(entry.key_ptr.*) == .lt) try candidates.append(entry.key_ptr.*); } // Add in lexical order any candidates that are not past the current frontier (or past any earlier candidates) std.sort.sort(Timestamp, candidates.items, {}, struct { fn lessThan(_: void, a: Timestamp, b: Timestamp) bool { return a.lexicalOrder(b) == .lt; } }.lessThan); for (candidates.items) |candidate| { if (self.frontier.causalOrder(candidate) == .none) { try self.frontier.timestamps.put(try u.deepClone(candidate, self.allocator), {}); try changes_into.append(.{ .timestamp = try u.deepClone(candidate, self.allocator), .diff = 1 }); } } } } if (support_entry.value_ptr.* == diff) { // Timestamp was just added, might be in frontier if (self.frontier.causalOrder(timestamp) != .lt) { // Add to frontier try self.frontier.timestamps.put(try u.deepClone(timestamp, self.allocator), {}); try changes_into.append(.{ .timestamp = try u.deepClone(timestamp, self.allocator), .diff = 1 }); // Remove any other timestamp that is greater than the new timestamp var to_remove = u.ArrayList(Timestamp).init(self.allocator); defer to_remove.deinit(); var iter = self.frontier.timestamps.iterator(); while (iter.next()) |frontier_entry| { if (frontier_entry.key_ptr.causalOrder(timestamp) == .gt) try to_remove.append(frontier_entry.key_ptr.*); } for (to_remove.items) |other_timestamp| { _ = self.frontier.timestamps.remove(other_timestamp); try changes_into.append(.{ .timestamp = other_timestamp, .diff = -1 }); } } } } }; /// Represents a single change to the set of earliest timestamps in a frontier. pub const FrontierChange = struct { timestamp: Timestamp, diff: isize, pub fn deinit(self: *FrontierChange, allocator: u.Allocator) void { self.timestamp.deinit(allocator); self.* = undefined; } }; /// Represents a change to some bag in the dataflow. /// The count of `row` changed by `diff` at `timestamp`. pub const Change = struct { row: Row, timestamp: Timestamp, diff: isize, pub fn deinit(self: *Change, allocator: u.Allocator) void { self.row.deinit(allocator); self.timestamp.deinit(allocator); self.* = undefined; } }; pub const ConcatOrder = enum { LeftThenRight, RightThenLeft }; /// A batch of changes, conveniently pre-sorted and de-duplicated. pub const ChangeBatch = struct { /// Invariant: for every change in changes, lower_bound.causalOrder(change).isLessThanOrEqual() lower_bound: Frontier, /// Invariant: non-empty, /// Invariant: sorted by row/timestamp /// Invariant: no two changes with same row/timestamp // TODO should be `[]const Change`? changes: []Change, pub fn empty(allocator: u.Allocator) ChangeBatch { var empty_changes = [0]Change{}; return ChangeBatch{ .lower_bound = Frontier.init(allocator), .changes = &empty_changes, }; } pub fn deinit(self: *ChangeBatch, allocator: u.Allocator) void { for (self.changes) |*change| change.deinit(allocator); allocator.free(self.changes); self.lower_bound.deinit(); self.* = undefined; } /// Find the first row after `from` that starts with `row[0..key_columns]` or, if there is no such row, the position where it would be. /// IE returns `ix` such that: /// * `self.changes[ix].row[0..key_columns] >= row[0..key_columns]` (or `ix == self.changes.len`) /// * `self.changes[ix-1].row[0..key_columns] < row[0..key_columns]` (or `ix == 0`) /// Uses a binary search with increasing step size. /// If `from == self.changes.len`, then returns `from`. pub fn seekRowStart(self: ChangeBatch, from: usize, row: Row, key_columns: usize) usize { u.assert( from <= self.changes.len, "Can't seek to row from a start point that is beyond the end of the batch", .{}, ); if (from == self.changes.len or u.deepOrder( self.changes[from].row.values[0..key_columns], row.values[0..key_columns], ) != .lt) return from; var lo = from; var skip: usize = 1; while (true) { const next = lo + skip; if (next >= self.changes.len) { skip = self.changes.len - lo; break; } if (u.deepOrder( self.changes[next].row.values[0..key_columns], row.values[0..key_columns], ) != .lt) break; lo = next; skip *= 2; } var hi = lo + skip; // now lo is < row and hi is >= row u.assert( u.deepOrder( self.changes[lo].row.values[0..key_columns], row.values[0..key_columns], ) == .lt, "", .{}, ); u.assert( hi >= self.changes.len or u.deepOrder( self.changes[hi].row.values[0..key_columns], row.values[0..key_columns], ) != .lt, "", .{}, ); while (hi - lo > 1) { const mid = lo + @divTrunc(hi - lo, 2); if (u.deepOrder( self.changes[mid].row.values[0..key_columns], row.values[0..key_columns], ) == .lt) { lo = mid; } else { hi = mid; } } return hi; } /// Find the last row after `from` that starts with `row[0..key_columns]` or, if there is no such row, the position where it would be. /// IE returns `ix` such that: /// * `self.changes[ix].row[0..key_columns] > row[0..key_columns]` (or `ix == self.changes.len`) /// * `self.changes[ix-1].row[0..key_columns] <= row[0..key_columns]` (or `ix == 0`) /// Uses a linear scan. /// If `from == self.changes.len`, then returns `from`. pub fn seekRowEnd(self: ChangeBatch, from: usize, row: Row, key_columns: usize) usize { u.assert( from <= self.changes.len, "Can't seek to row from a start point that is beyond the end of the batch", .{}, ); if (from == self.changes.len) return from; var ix = from; while (ix < self.changes.len and u.deepOrder( self.changes[ix].row.values[0..key_columns], row.values[0..key_columns], ) != .gt) ix += 1; return ix; } /// Find the last row after `from` that starts with `self.changes[from].row[0..key_columns]` /// If `from == self.changes.len`, then returns `from`. pub fn seekCurrentRowEnd(self: ChangeBatch, from: usize, key_columns: usize) usize { u.assert( from <= self.changes.len, "Can't seek to row from a start point that is beyond the end of the batch", .{}, ); if (from == self.changes.len) return from; return self.seekRowEnd(from + 1, self.changes[from].row, key_columns); } /// Relational join on the first `key_columns` columns of self and other. /// Produces rows that look like `self_row ++ other_row[key_columns..]`. pub fn mergeJoin( self: ChangeBatch, self_frontier: Frontier, other: ChangeBatch, key_columns: usize, concat_order: ConcatOrder, into_builder: *ChangeBatchBuilder, ) !void { var ix_self: usize = 0; var ix_other: usize = 0; while (ix_self < self.changes.len and ix_other < other.changes.len) { switch (u.deepOrder( self.changes[ix_self].row.values[0..key_columns], other.changes[ix_other].row.values[0..key_columns], )) { .eq => { const ix_self_end = self.seekCurrentRowEnd(ix_self, key_columns); const ix_other_end = other.seekCurrentRowEnd(ix_other, key_columns); const ix_other_start = ix_other; while (ix_self < ix_self_end) : (ix_self += 1) { if (self_frontier.causalOrder(self.changes[ix_self].timestamp) == .gt) { ix_other = ix_other_start; while (ix_other < ix_other_end) : (ix_other += 1) { const change_self = self.changes[ix_self]; const change_other = other.changes[ix_other]; var values = try std.mem.concat(into_builder.allocator, Value, switch (concat_order) { .LeftThenRight => &[_][]const Value{ change_self.row.values, change_other.row.values[key_columns..], }, .RightThenLeft => &[_][]const Value{ change_other.row.values, change_self.row.values[key_columns..], }, }); for (values) |*value| { value.* = try u.deepClone(value.*, into_builder.allocator); } try into_builder.changes.append(.{ .row = .{ .values = values }, .timestamp = try Timestamp.leastUpperBound(into_builder.allocator, change_self.timestamp, change_other.timestamp), .diff = change_self.diff * change_other.diff, }); } } } // now ix_self and ix_other are both at next row }, .lt => { ix_self = self.seekRowStart(ix_self, other.changes[ix_other].row, key_columns); }, .gt => { ix_other = other.seekRowStart(ix_other, self.changes[ix_self].row, key_columns); }, } } } }; /// A helper for building a ChangeBatch. /// Append to `changes` as you like and call `finishAndReset` to produce a batch. pub const ChangeBatchBuilder = struct { allocator: u.Allocator, changes: u.ArrayList(Change), pub fn init(allocator: u.Allocator) ChangeBatchBuilder { return ChangeBatchBuilder{ .allocator = allocator, .changes = u.ArrayList(Change).init(allocator), }; } pub fn deinit(self: *ChangeBatchBuilder) void { for (self.changes.items) |*change| change.deinit(self.allocator); self.changes.deinit(); self.* = undefined; } /// Coalesce changes with identical rows and timestamps. pub fn coalesce(self: *ChangeBatchBuilder) void { if (self.changes.items.len == 0) return; std.sort.sort(Change, self.changes.items, {}, struct { fn lessThan(_: void, a: Change, b: Change) bool { return u.deepOrder(a, b) == .lt; } }.lessThan); var prev_i: usize = 0; for (self.changes.items[1..]) |*change| { const prev_change = &self.changes.items[prev_i]; if (u.deepEqual(prev_change.row, change.row) and u.deepEqual(prev_change.timestamp, change.timestamp)) { prev_change.diff += change.diff; } else { if (prev_change.diff != 0) prev_i += 1; std.mem.swap(Change, &self.changes.items[prev_i], change); } } if (self.changes.items[prev_i].diff != 0) prev_i += 1; for (self.changes.items[prev_i..]) |*change| change.deinit(self.allocator); self.changes.shrinkRetainingCapacity(prev_i); } /// Produce a change batch. /// If the batch would have been empty, return null instead. /// Resets `self` so that it can be used again. pub fn finishAndReset(self: *ChangeBatchBuilder) !?ChangeBatch { self.coalesce(); if (self.changes.items.len == 0) return null; var lower_bound = Frontier.init(self.allocator); var changes_into = u.ArrayList(FrontierChange).init(self.allocator); defer changes_into.deinit(); for (self.changes.items) |change| { try lower_bound.move(.Earlier, change.timestamp, &changes_into); for (changes_into.items) |*frontier_change| frontier_change.deinit(self.allocator); try changes_into.resize(0); } return ChangeBatch{ .lower_bound = lower_bound, .changes = self.changes.toOwnedSlice(), }; } }; /// Represents the state of a bag at a variety of timestamps. /// Allows efficiently adding new changes and querying previous changes. pub const Index = struct { allocator: u.Allocator, /// Invariant: each batch is at most half the size of it's left neighbour change_batches: u.ArrayList(ChangeBatch), pub fn init(allocator: u.Allocator) Index { return .{ .allocator = allocator, .change_batches = u.ArrayList(ChangeBatch).init(allocator), }; } pub fn deinit(self: *Index) void { for (self.change_batches.items) |*change_batch| change_batch.deinit(self.allocator); self.change_batches.deinit(); self.* = undefined; } /// Takes ownership of `change_batch` // TODO merge incrementally to avoid latency spikes pub fn addChangeBatch(self: *Index, change_batch: ChangeBatch) !void { try self.change_batches.append(change_batch); while (true) { const len = self.change_batches.items.len; if (len <= 1 or @divFloor(self.change_batches.items[len - 2].changes.len, 2) >= self.change_batches.items[len - 1].changes.len) break; var batch_a = self.change_batches.pop(); defer { batch_a.lower_bound.deinit(); self.allocator.free(batch_a.changes); } var batch_b = self.change_batches.pop(); defer { batch_b.lower_bound.deinit(); self.allocator.free(batch_b.changes); } var builder = ChangeBatchBuilder.init(self.allocator); defer builder.deinit(); try builder.changes.ensureTotalCapacity(batch_a.changes.len + batch_b.changes.len); try builder.changes.appendSlice(batch_a.changes); try builder.changes.appendSlice(batch_b.changes); if (try builder.finishAndReset()) |batch_ab| { try self.change_batches.append(batch_ab); } } } /// Relational join on the first `key_columns` columns of self and change_batch. /// Produces rows that look like: /// * `self_row ++ other_row[key_columns..]` if `concat_order == .LeftThenRight` /// * `other_row ++ self_row[key_columns..]` if `concat_order == .RightThenLeft` // TODO would it be better to merge against a cursor, to avoid touching change_batch multiple times? pub fn mergeJoin( self: *const Index, self_frontier: Frontier, change_batch: ChangeBatch, key_columns: usize, concat_order: ConcatOrder, into_builder: *ChangeBatchBuilder, ) !void { for (self.change_batches.items) |self_change_batch| { try self_change_batch.mergeJoin(self_frontier, change_batch, key_columns, concat_order, into_builder); } } /// Appends every change where `row.values[0..key_columns] == change.row.values[0..key_columns]` into `into_changes`. /// Changes are borrowed from the index. pub fn getChangesForKey(self: *Index, row: Row, key_columns: usize, into_changes: *u.ArrayList(Change)) !void { for (self.change_batches.items) |change_batch| { var start_ix = change_batch.seekRowStart(0, row, key_columns); const end_ix = change_batch.seekRowEnd(start_ix, row, key_columns); while (start_ix < end_ix) : (start_ix += 1) try into_changes.append(change_batch.changes[start_ix]); } } pub fn getCountForRowAsOf(self: *Index, row: Row, timestamp: Timestamp) isize { var count: isize = 0; for (self.change_batches.items) |change_batch| { var start_ix = change_batch.seekRowStart(0, row, row.values.len); const end_ix = change_batch.seekRowEnd(start_ix, row, row.values.len); while (start_ix < end_ix) : (start_ix += 1) { const change = change_batch.changes[start_ix]; if (change.timestamp.causalOrder(timestamp).isLessThanOrEqual()) count += change.diff; } } return count; } }; /// A node in the dataflow graph. pub const Node = struct { id: usize, }; /// One of the input edges to some node in a dataflow graph. pub const NodeInput = struct { node: Node, input_ix: usize, }; pub const NodeSpecTag = enum { Input, Map, Index, Join, Output, TimestampPush, TimestampIncrement, TimestampPop, Union, Distinct, Reduce, pub fn hasIndex(self: NodeSpecTag) bool { return switch (self) { .Index, .Distinct, .Reduce => true, else => false, }; } pub fn needsIndex(self: NodeSpecTag) bool { return switch (self) { .Distinct, .Reduce => true, else => false, }; } }; /// Specifies how a node should transform inputs bags into an output bag. pub const NodeSpec = union(NodeSpecTag) { Input, Map: MapSpec, Index: IndexSpec, Join: JoinSpec, Output: OutputSpec, TimestampPush: TimestampPushSpec, TimestampIncrement: TimestampIncrementSpec, TimestampPop: TimestampPopSpec, Union: UnionSpec, Distinct: DistinctSpec, Reduce: ReduceSpec, pub const MapSpec = struct { input: Node, mapper: *Mapper, pub const Mapper = struct { map_fn: fn (self: *Mapper, row: Row) error{OutOfMemory}!Row, }; }; pub const IndexSpec = struct { input: Node, }; pub const JoinSpec = struct { inputs: [2]Node, key_columns: usize, }; pub const OutputSpec = struct { input: Node, }; pub const TimestampPushSpec = struct { input: Node, }; pub const TimestampIncrementSpec = struct { // Initially null, will be set later to a future edge input: ?Node, }; pub const TimestampPopSpec = struct { input: Node, }; pub const UnionSpec = struct { inputs: [2]Node, }; pub const DistinctSpec = struct { input: Node, }; pub const ReduceSpec = struct { input: Node, key_columns: usize, init_value: Value, reducer: *Reducer, pub const Reducer = struct { reduce_fn: fn (self: *Reducer, reduced_value: Value, row: Row, count: usize) error{OutOfMemory}!Value, }; }; pub fn getInputs(self: *const NodeSpec) []const Node { return switch (self.*) { .Input => |_| &[_]Node{}, .Map => |*spec| u.ptrToSlice(Node, &spec.input), .Index => |*spec| u.ptrToSlice(Node, &spec.input), .Output => |*spec| u.ptrToSlice(Node, &spec.input), .TimestampPush => |*spec| u.ptrToSlice(Node, &spec.input), .TimestampIncrement => |*spec| u.ptrToSlice(Node, &spec.input.?), .TimestampPop => |*spec| u.ptrToSlice(Node, &spec.input), .Distinct => |*spec| u.ptrToSlice(Node, &spec.input), .Reduce => |*spec| u.ptrToSlice(Node, &spec.input), .Join => |*spec| &spec.inputs, .Union => |*spec| &spec.inputs, }; } }; /// The internal state of a node in a running dataflow. pub const NodeState = union(enum) { Input: InputState, Map, Index: IndexState, Join: JoinState, Output: OutputState, TimestampPush, TimestampIncrement, TimestampPop, Union, Distinct: DistinctState, Reduce: ReduceState, pub const InputState = struct { frontier: Frontier, /// These changes are being buffered. /// When flushed they will form a change batch. unflushed_changes: ChangeBatchBuilder, }; pub const IndexState = struct { index: Index, /// These changes are waiting for the frontier to move past them, at which point they will be added to the index. pending_changes: u.ArrayList(Change), }; pub const JoinState = struct { // The input frontier of the input indexes, as of the last ChangeBatch processed from them. // We use these to ensure we ignore Changes that exist in the index already but haven't yet been processed by this join. index_input_frontiers: [2]Frontier, }; pub const OutputState = struct { unpopped_change_batches: u.Queue(ChangeBatch), }; pub const DistinctState = struct { index: Index, /// These are rows/timestamps at which the output might change even if there is no new input. /// For example, if a distinct row appears at two different timestamps, then at the leastUpperBound of those timestamps the total count would be 2 and we need to correct that. /// To calculate: /// * For each row in the input, take the leastUpperBound of every possible subset of timestamps at which that row changed. /// * Filter out timestamps that are before the output frontier of this node. // TODO If Index supported cheap single updates, it would maybe be a suitable data structure here. pending_corrections: u.DeepHashMap(Row, u.DeepHashSet(Timestamp)), }; pub const ReduceState = struct { index: Index, /// These are keys/timestamps at which the output might change even if there is no new input. /// For example, if a key appears at two different timestamps, then at the leastUpperBound of those timestamps the there will be two output values and we need to replace that with the correct single output. /// To calculate: /// * For each key in the input, take the leastUpperBound of every possible subset of timestamps at which that key changed. /// * Filter out timestamps that are before the output frontier of this node. // TODO If Index supported cheap single updates, it would maybe be a suitable data structure here. pending_corrections: u.DeepHashMap(Row, u.DeepHashSet(Timestamp)), }; pub fn init(allocator: u.Allocator, node_spec: NodeSpec) NodeState { return switch (node_spec) { .Input => .{ .Input = .{ .frontier = Frontier.init(allocator), .unflushed_changes = ChangeBatchBuilder.init(allocator), }, }, .Map => .Map, .Index => .{ .Index = .{ .index = Index.init(allocator), .pending_changes = u.ArrayList(Change).init(allocator), }, }, .Join => .{ .Join = .{ .index_input_frontiers = .{ Frontier.init(allocator), Frontier.init(allocator), }, }, }, .Output => .{ .Output = .{ .unpopped_change_batches = u.Queue(ChangeBatch).init(allocator), }, }, .TimestampPush => .TimestampPush, .TimestampIncrement => .TimestampIncrement, .TimestampPop => .TimestampPop, .Union => .Union, .Distinct => .{ .Distinct = .{ .index = Index.init(allocator), .pending_corrections = u.DeepHashMap(Row, u.DeepHashSet(Timestamp)).init(allocator), }, }, .Reduce => .{ .Reduce = .{ .index = Index.init(allocator), .pending_corrections = u.DeepHashMap(Row, u.DeepHashSet(Timestamp)).init(allocator), }, }, }; } pub fn deinit(self: *NodeState, allocator: u.Allocator) void { switch (self.*) { .Input => |*input| { input.frontier.deinit(); input.unflushed_changes.deinit(); }, .Index => |*index| { index.index.deinit(); for (index.pending_changes.items) |*change| change.deinit(allocator); index.pending_changes.deinit(); }, .Join => |*join| { for (join.index_input_frontiers) |*frontier| frontier.deinit(); }, .Output => |*output| { for (output.unpopped_change_batches.in.items) |*change_batch| change_batch.deinit(allocator); for (output.unpopped_change_batches.out.items) |*change_batch| change_batch.deinit(allocator); output.unpopped_change_batches.deinit(); }, .Distinct => |*distinct| { distinct.index.deinit(); { var iter = distinct.pending_corrections.iterator(); while (iter.next()) |entry| { entry.key_ptr.deinit(allocator); { var value_iter = entry.value_ptr.iterator(); while (value_iter.next()) |value_entry| { value_entry.key_ptr.deinit(allocator); } } entry.value_ptr.deinit(); } } distinct.pending_corrections.deinit(); }, .Reduce => |*reduce| { reduce.index.deinit(); { var iter = reduce.pending_corrections.iterator(); while (iter.next()) |entry| { entry.key_ptr.deinit(allocator); { var value_iter = entry.value_ptr.iterator(); while (value_iter.next()) |value_entry| { value_entry.key_ptr.deinit(allocator); } } entry.value_ptr.deinit(); } } reduce.pending_corrections.deinit(); }, .Map, .TimestampPush, .TimestampIncrement, .TimestampPop, .Union => {}, } self.* = undefined; } pub fn getIndex(self: *NodeState) ?*Index { return switch (self.*) { .Index => |*state| &state.index, .Distinct => |*state| &state.index, .Reduce => |*state| &state.index, // TODO should be able to follow TimestampPush/Pop to an index and wrap it else => null, }; } }; /// A subgraph of the dataflow graph. /// Every node in a subgraph has the same number of timestamp coordinates. /// Every loop in the graph must be contained entirely by a single subgraph. /// Subgraphs must be nested hierarchically - no overlaps. pub const Subgraph = struct { id: usize, }; /// A description of a dataflow graph. pub const Graph = struct { allocator: u.Allocator, /// For each node, the spec that determines its behavior node_specs: []const NodeSpec, /// For each node, the subgraphs that it belongs to (outermost first, innermost last). node_subgraphs: []const []const Subgraph, /// For each subgraph, the parent subgraph that it is nested within /// (Indexed by subgraph.id-1, because subgraph 0 has no parent) subgraph_parents: []const Subgraph, /// For each node, the nodes that are immediately downstream (ie have this node as an input). downstream_node_inputs: []const []const NodeInput, /// Takes ownership of `node_specs` and `subgraph_parents`. pub fn init(allocator: u.Allocator, node_specs: []const NodeSpec, node_immediate_subgraphs: []const Subgraph, subgraph_parents: []const Subgraph) !Graph { const num_nodes = node_specs.len; u.assert( node_immediate_subgraphs.len == num_nodes, "node_specs and node_immediate_subgraphs should have same length, got {} vs {}", .{ node_immediate_subgraphs.len, num_nodes }, ); // For each node, store its subgraph, its subgraphs parent, its subgraphs parents parent etc var node_subgraphs = try allocator.alloc([]Subgraph, num_nodes); for (node_immediate_subgraphs) |immediate_subgraph, node_id| { var subgraphs = u.ArrayList(Subgraph).init(allocator); defer subgraphs.deinit(); var subgraph = immediate_subgraph; while (true) { try subgraphs.append(subgraph); if (subgraph.id == 0) break; subgraph = subgraph_parents[subgraph.id - 1]; } std.mem.reverse(Subgraph, subgraphs.items); node_subgraphs[node_id] = subgraphs.toOwnedSlice(); } // Collect downstream nodes var downstream_node_inputs = try allocator.alloc(u.ArrayList(NodeInput), num_nodes); defer allocator.free(downstream_node_inputs); for (node_specs) |_, node_id| { downstream_node_inputs[node_id] = u.ArrayList(NodeInput).init(allocator); } for (node_specs) |node_spec, node_id| { for (node_spec.getInputs()) |input_node, input_ix| { try downstream_node_inputs[input_node.id].append(.{ .node = .{ .id = node_id }, .input_ix = input_ix }); } } var frozen_downstream_node_inputs = try allocator.alloc([]NodeInput, node_specs.len); for (downstream_node_inputs) |*node_inputs, node_id| frozen_downstream_node_inputs[node_id] = node_inputs.toOwnedSlice(); var self = Graph{ .allocator = allocator, .node_specs = node_specs, .node_subgraphs = node_subgraphs, .subgraph_parents = subgraph_parents, .downstream_node_inputs = frozen_downstream_node_inputs, }; try self.validate(); return self; } pub fn deinit(self: *Graph) void { for (self.downstream_node_inputs) |downstream_node_inputs| self.allocator.free(downstream_node_inputs); self.allocator.free(self.downstream_node_inputs); self.allocator.free(self.subgraph_parents); for (self.node_subgraphs) |node_subgraphs| self.allocator.free(node_subgraphs); self.allocator.free(self.node_subgraphs); self.allocator.free(self.node_specs); self.* = undefined; } /// Assert that the graph obeys all the constraints required to make the progress tracking algorithm work. pub fn validate(self: Graph) !void { const num_nodes = self.node_specs.len; for (self.subgraph_parents) |parent, subgraph_id_minus_one| { u.assert( parent.id < subgraph_id_minus_one + 1, "The parent of a subgraph must have a smaller id than its child", .{}, ); } for (self.node_specs) |node_spec, node_id| { for (node_spec.getInputs()) |input_node| { u.assert(input_node.id < num_nodes, "All input nodes must exist", .{}); if (node_spec == .TimestampIncrement) { u.assert( input_node.id > node_id, "TimestampIncrement nodes must have a later node as input", .{}, ); } else { u.assert( input_node.id < node_id, "All nodes (other than TimestampIncrement) must have an earlier node as input", .{}, ); } if (std.meta.activeTag(node_spec).needsIndex()) u.assert( std.meta.activeTag(self.node_specs[input_node.id]).hasIndex(), "Inputs to {} node must contain an index", .{std.meta.activeTag(node_spec)}, ); switch (node_spec) { .TimestampPush => { const input_subgraph = u.last(Subgraph, self.node_subgraphs[input_node.id]); const output_subgraph = u.last(Subgraph, self.node_subgraphs[node_id]); u.assert( output_subgraph.id > 0, "TimestampPush nodes cannot have an output on subgraph 0", .{}, ); u.assert( self.subgraph_parents[output_subgraph.id - 1].id == input_subgraph.id, "TimestampPush nodes must cross from a parent subgraph to a child subgraph", .{}, ); }, .TimestampPop => { const input_subgraph = u.last(Subgraph, self.node_subgraphs[input_node.id]); const output_subgraph = u.last(Subgraph, self.node_subgraphs[node_id]); u.assert( input_subgraph.id > 0, "TimestampPop nodes cannot have an input on subgraph 0", .{}, ); u.assert( self.subgraph_parents[input_subgraph.id - 1].id == output_subgraph.id, "TimestampPop nodes must cross from a child subgraph to a parent subgraph", .{}, ); }, else => { const input_subgraph = u.last(Subgraph, self.node_subgraphs[input_node.id]); const output_subgraph = u.last(Subgraph, self.node_subgraphs[node_id]); u.assert( input_subgraph.id == output_subgraph.id, "Nodes (other than TimestampPop and TimestampPush) must be on the same subgraph as their inputs", .{}, ); }, } } } var earliest_subgraph_pops = u.DeepHashMap(Subgraph, Node).init(self.allocator); defer earliest_subgraph_pops.deinit(); var latest_subgraph_pushes = u.DeepHashMap(Subgraph, Node).init(self.allocator); defer latest_subgraph_pushes.deinit(); for (self.node_specs) |node_spec, node_id| { switch (node_spec) { .TimestampPush => { const subgraph = u.last(Subgraph, self.node_subgraphs[node_id]); const entry = try latest_subgraph_pushes.getOrPutValue(subgraph, .{ .id = node_id }); entry.value_ptr.id = u.max(entry.value_ptr.id, node_id); }, .TimestampPop => |spec| { const subgraph = u.last(Subgraph, self.node_subgraphs[spec.input.id]); const entry = try earliest_subgraph_pops.getOrPutValue(subgraph, .{ .id = node_id }); entry.value_ptr.id = u.min(entry.value_ptr.id, node_id); }, else => {}, } } var subgraph_id: usize = 1; while (subgraph_id - 1 < self.subgraph_parents.len) : (subgraph_id += 1) { if (earliest_subgraph_pops.get(.{ .id = subgraph_id })) |earliest| { if (latest_subgraph_pushes.get(.{ .id = subgraph_id })) |latest| { // TODO This constraint works, but is clunky. Could instead test directly for the case where a path exits and re-enters the subgraph without going backwards. u.assert( earliest.id >= latest.id, "Every TimestampPush into a subgraph must have an earlier node id than every TimestampPop from that subgraph. Found TimestampPush at {} later than TimestampPop at {}", .{ latest, earliest }, ); } } } } }; /// A helper for building a graph. /// Call `addSubgraph` and `addNode` to build it up. /// Call `connectLoop` to connect backwards edges in loops. /// Call `finishAndReset` to produce the graph. pub const GraphBuilder = struct { allocator: u.Allocator, node_specs: u.ArrayList(NodeSpec), node_subgraphs: u.ArrayList(Subgraph), subgraph_parents: u.ArrayList(Subgraph), pub fn init(allocator: u.Allocator) GraphBuilder { return GraphBuilder{ .allocator = allocator, .node_specs = u.ArrayList(NodeSpec).init(allocator), .node_subgraphs = u.ArrayList(Subgraph).init(allocator), .subgraph_parents = u.ArrayList(Subgraph).init(allocator), }; } pub fn deinit(self: *GraphBuilder) void { self.subgraph_parents.deinit(); self.node_subgraphs.deinit(); self.node_specs.deinit(); self.* = undefined; } pub fn addSubgraph(self: *GraphBuilder, parent: Subgraph) !Subgraph { try self.subgraph_parents.append(parent); return Subgraph{ .id = self.subgraph_parents.items.len }; } /// Add a new node to the graph. /// When adding a `TimestampIncrement` node, set its input to null initially and then later use `connectLoop` once the input node has been added. pub fn addNode(self: *GraphBuilder, subgraph: Subgraph, node_spec: NodeSpec) !Node { const node = Node{ .id = self.node_specs.items.len }; try self.node_specs.append(node_spec); try self.node_subgraphs.append(subgraph); return node; } /// Sets the input of `earlier_node` to `later_node`. /// `earlier_node` must be a `TimestampIncrement` node - the only node that is allowed to have backwards edges. pub fn connectLoop(self: *GraphBuilder, later_node: Node, earlier_node: Node) void { self.node_specs.items[earlier_node.id].TimestampIncrement.input = later_node; } /// Produce the final graph. /// Resets `self` so it can be used again. pub fn finishAndReset(self: *GraphBuilder) !Graph { const node_subgraphs = self.node_subgraphs.toOwnedSlice(); defer self.allocator.free(node_subgraphs); return Graph.init( self.allocator, self.node_specs.toOwnedSlice(), node_subgraphs, self.subgraph_parents.toOwnedSlice(), ); } }; /// Part of a running dataflow. /// In a single-threaded dataflow there will be only one shard. /// In a multi-threaded dataflow (TODO) there will be one shard per thread. pub const Shard = struct { allocator: u.Allocator, /// Borrowed from caller of init. graph: *const Graph, /// For each node, the internal state of that node. node_states: []NodeState, /// For each node, the frontier for the nodes output. /// Invariant: any change emitted from a node has a timestamp that is not earlier than the frontier: node_frontiers[node.id].frontier.causalOrder(change.timestamp).isLessThanOrEqual() node_frontiers: []SupportedFrontier, /// An unordered list of change batches that have not yet been processed by some node. unprocessed_change_batches: u.ArrayList(ChangeBatchAtNodeInput), /// Frontier updates that have not yet been applied to some node's input frontier. /// (The input frontier is never materialized, so when these changes are processed they will be immediately transformed to apply to the ouput frontier). unprocessed_frontier_updates: u.DeepHashMap(Pointstamp, isize), pub const ChangeBatchAtNodeInput = struct { change_batch: ChangeBatch, input_frontier: ?Frontier, node_input: NodeInput, }; pub const Pointstamp = struct { node_input: NodeInput, /// Borrowed from self.graph subgraphs: []const Subgraph, timestamp: Timestamp, pub fn deinit(self: *Pointstamp, allocator: u.Allocator) void { self.timestamp.deinit(allocator); self.* = undefined; } }; pub fn init(allocator: u.Allocator, graph: *const Graph) !Shard { const num_nodes = graph.node_specs.len; var node_states = try allocator.alloc(NodeState, num_nodes); for (node_states) |*node_state, node_id| node_state.* = NodeState.init(allocator, graph.node_specs[node_id]); var node_frontiers = try allocator.alloc(SupportedFrontier, num_nodes); for (node_frontiers) |*node_frontier| node_frontier.* = try SupportedFrontier.init(allocator); var unprocessed_frontier_updates = u.DeepHashMap(Pointstamp, isize).init(allocator); var self = Shard{ .allocator = allocator, .graph = graph, .node_states = node_states, .node_frontiers = node_frontiers, .unprocessed_change_batches = u.ArrayList(ChangeBatchAtNodeInput).init(allocator), .unprocessed_frontier_updates = unprocessed_frontier_updates, }; // Init input frontiers for (graph.node_specs) |node_spec, node_id| { if (node_spec == .Input) { var timestamp = try Timestamp.initLeast(allocator, graph.node_subgraphs[node_id].len); _ = try self.applyFrontierSupportChange(.{ .id = node_id }, timestamp, 1); try self.node_states[node_id].Input.frontier.timestamps.put(timestamp, {}); } } while (self.hasWork()) try self.doWork(); return self; } pub fn deinit(self: *Shard) void { { var iter = self.unprocessed_frontier_updates.iterator(); while (iter.next()) |entry| { entry.key_ptr.deinit(self.allocator); } } self.unprocessed_frontier_updates.deinit(); for (self.unprocessed_change_batches.items) |*change_batch_at_node_input| { change_batch_at_node_input.change_batch.deinit(self.allocator); } self.unprocessed_change_batches.deinit(); for (self.node_frontiers) |*node_frontier| node_frontier.deinit(); self.allocator.free(self.node_frontiers); for (self.node_states) |*node_state| node_state.deinit(self.allocator); self.allocator.free(self.node_states); // self.graph is borrowed self.* = undefined; } /// Add a new change to an input node. /// These changes will not be processed by `hasWork`/`doWork` until `flushInput` is called. /// Takes ownership of `change`. pub fn pushInput(self: *Shard, node: Node, change: Change) !void { dida.debug.emitDebugEvent(self, .{ .PushInput = .{ .node = node, .change = change } }); u.assert( self.node_states[node.id].Input.frontier.causalOrder(change.timestamp).isLessThanOrEqual(), "May not push inputs that are less than the Input node frontier set by Shard.advanceInput", .{}, ); try self.node_states[node.id].Input.unflushed_changes.changes.append(change); } /// Flush all of the changes at an input node into a change batch. pub fn flushInput(self: *Shard, node: Node) !void { dida.debug.emitDebugEvent(self, .{ .FlushInput = .{ .node = node } }); var unflushed_changes = &self.node_states[node.id].Input.unflushed_changes; if (try unflushed_changes.finishAndReset()) |change_batch| { try self.emitChangeBatch(node, change_batch); } } /// Promise that you will never call `pushInput` on `node` with a change whose timestamp is earlier than `timestamp`. /// Doing this allows operations which need to see all the input at a given timestamp to progress. /// (This also implicitly flushes `node`.) // TODO Is advance the best verb? Would prefer to stay consistent with Earlier/Later used elsewhere. pub fn advanceInput(self: *Shard, node: Node, timestamp: Timestamp) !void { dida.debug.emitDebugEvent(self, .{ .AdvanceInput = .{ .node = node, .timestamp = timestamp } }); // Have to flush input so that there aren't any pending changes with timestamps less than the new frontier try self.flushInput(node); var changes = u.ArrayList(FrontierChange).init(self.allocator); defer changes.deinit(); try self.node_states[node.id].Input.frontier.move(.Later, timestamp, &changes); for (changes.items) |*change| { _ = try self.applyFrontierSupportChange(node, change.timestamp, change.diff); change.deinit(self.allocator); } } /// Report that `from_node` produced `change_batch` as an output. /// Takes ownership of `change_batch` and `output_frontier`. fn emitChangeBatch(self: *Shard, from_node: Node, change_batch: ChangeBatch) !void { var input_frontier: ?Frontier = null; { const node_spec = self.graph.node_specs[from_node.id]; if (NodeSpecTag.hasIndex(node_spec)) { u.assert( node_spec.getInputs().len == 1, "At present all nodes with indexes have only one input. If this changed for {}, need to rethink this code.", .{std.meta.activeTag(node_spec)}, ); input_frontier = self.node_frontiers[node_spec.getInputs()[0].id].frontier; } } dida.debug.emitDebugEvent(self, .{ .EmitChangeBatch = .{ .from_node = from_node, .change_batch = change_batch, .input_frontier = input_frontier, } }); // Check that this emission is legal { const output_frontier = self.node_frontiers[from_node.id]; var iter = change_batch.lower_bound.timestamps.iterator(); while (iter.next()) |entry| { u.assert( output_frontier.frontier.causalOrder(entry.key_ptr.*).isLessThanOrEqual(), "Emitted a change at a timestamp that is behind the output frontier. Node {}, timestamp {}.", .{ from_node, entry.key_ptr.* }, ); } } var cloned_change_batch = change_batch; var cloned_input_frontier = input_frontier; for (self.graph.downstream_node_inputs[from_node.id]) |to_node_input, i| { if (i != 0) // We take ownership of change_batch so we don't have to clone it the first time we add it to the queue cloned_change_batch = try u.deepClone(cloned_change_batch, self.allocator); cloned_input_frontier = try u.deepClone(cloned_input_frontier, self.allocator); var iter = cloned_change_batch.lower_bound.timestamps.iterator(); while (iter.next()) |entry| { try self.queueFrontierSupportChange(to_node_input, entry.key_ptr.*, 1); } try self.unprocessed_change_batches.append(.{ .change_batch = cloned_change_batch, .input_frontier = cloned_input_frontier, .node_input = to_node_input, }); } } /// Process one unprocessed change batch from the queue. fn processChangeBatch(self: *Shard) !void { const change_batch_at_node_input = self.unprocessed_change_batches.popOrNull() orelse return; var input_frontier = change_batch_at_node_input.input_frontier; defer if (input_frontier) |*_input_frontier| _input_frontier.deinit(); var change_batch = change_batch_at_node_input.change_batch; defer change_batch.deinit(self.allocator); const node_input = change_batch_at_node_input.node_input; const node = node_input.node; const node_spec = self.graph.node_specs[node.id]; const node_state = &self.node_states[node.id]; dida.debug.emitDebugEvent(self, .{ .ProcessChangeBatch = .{ .node_input = node_input, .change_batch = change_batch } }); // Remove change_batch from progress tracking { var iter = change_batch.lower_bound.timestamps.iterator(); while (iter.next()) |entry| { try self.queueFrontierSupportChange(node_input, entry.key_ptr.*, -1); } } switch (node_spec) { .Input => u.panic("Input nodes should not have work pending on their input", .{}), .Map => |map| { var output_change_batch_builder = ChangeBatchBuilder.init(self.allocator); defer output_change_batch_builder.deinit(); for (change_batch.changes) |change| { const output_row = try map.mapper.map_fn(map.mapper, change.row); try output_change_batch_builder.changes.append(.{ .row = output_row, .timestamp = try u.deepClone(change.timestamp, self.allocator), .diff = change.diff, }); } if (try output_change_batch_builder.finishAndReset()) |output_change_batch| { try self.emitChangeBatch(node_input.node, output_change_batch); } }, .Index => { // These won't be emitted until the frontier passes them // TODO this is a lot of timestamps - is there a cheaper way to maintain the support for the index frontier? for (change_batch.changes) |change| { u.assert( self.node_frontiers[node.id].frontier.causalOrder(change.timestamp).isLessThanOrEqual(), "Index received a change that was behind its output frontier. Node {}, timestamp {}.", .{ node, change.timestamp }, ); _ = try self.applyFrontierSupportChange(node, change.timestamp, 1); } try node_state.Index.pending_changes.appendSlice(change_batch.changes); // Took ownership of rows in changes, so don't deinit them self.allocator.free(change_batch.changes); change_batch.changes = &[0]Change{}; }, .Join => |join| { const index = self.node_states[join.inputs[1 - node_input.input_ix].id].getIndex().?; var output_change_batch_builder = ChangeBatchBuilder.init(self.allocator); defer output_change_batch_builder.deinit(); try index.mergeJoin( node_state.Join.index_input_frontiers[1 - node_input.input_ix], change_batch, join.key_columns, switch (node_input.input_ix) { 0 => .RightThenLeft, 1 => .LeftThenRight, else => u.panic("Bad input_ix for join: {}", .{node_input.input_ix}), }, &output_change_batch_builder, ); if (try output_change_batch_builder.finishAndReset()) |output_change_batch| { try self.emitChangeBatch(node_input.node, output_change_batch); } node_state.Join.index_input_frontiers[node_input.input_ix].deinit(); node_state.Join.index_input_frontiers[node_input.input_ix] = input_frontier.?; // Took ownership of input_frontier, so don't deinit it input_frontier = null; }, .Output => { try node_state.Output.unpopped_change_batches.push(change_batch); // Took ownership of change_batch so don't deinit it change_batch = ChangeBatch.empty(self.allocator); }, .TimestampPush => { var output_change_batch_builder = ChangeBatchBuilder.init(self.allocator); defer output_change_batch_builder.deinit(); for (change_batch.changes) |change| { const output_timestamp = try change.timestamp.pushCoord(self.allocator); try output_change_batch_builder.changes.append(.{ .row = try u.deepClone(change.row, self.allocator), .timestamp = output_timestamp, .diff = change.diff, }); } try self.emitChangeBatch(node_input.node, (try output_change_batch_builder.finishAndReset()).?); }, .TimestampIncrement => { var output_change_batch_builder = ChangeBatchBuilder.init(self.allocator); defer output_change_batch_builder.deinit(); for (change_batch.changes) |change| { const output_timestamp = try change.timestamp.incrementCoord(self.allocator); try output_change_batch_builder.changes.append(.{ .row = try u.deepClone(change.row, self.allocator), .timestamp = output_timestamp, .diff = change.diff, }); } try self.emitChangeBatch(node_input.node, (try output_change_batch_builder.finishAndReset()).?); }, .TimestampPop => { var output_change_batch_builder = ChangeBatchBuilder.init(self.allocator); defer output_change_batch_builder.deinit(); for (change_batch.changes) |change| { const output_timestamp = try change.timestamp.popCoord(self.allocator); try output_change_batch_builder.changes.append(.{ .row = try u.deepClone(change.row, self.allocator), .timestamp = output_timestamp, .diff = change.diff, }); } if (try output_change_batch_builder.finishAndReset()) |output_change_batch| { try self.emitChangeBatch(node_input.node, output_change_batch); } }, .Union => { // Pass straight through try self.emitChangeBatch(node_input.node, change_batch); // Took ownership of change_batch so don't deinit it change_batch = ChangeBatch.empty(self.allocator); }, .Distinct, .Reduce => { // Figure out which new rows/timestamps might need later corrections const pending_corrections = switch (node_state.*) { .Distinct => |*state| &state.pending_corrections, .Reduce => |*state| &state.pending_corrections, else => unreachable, }; for (change_batch.changes) |change| { const key = switch (node_spec) { .Distinct => change.row, .Reduce => |spec| Row{ .values = change.row.values[0..spec.key_columns] }, else => unreachable, }; const timestamps_entry = try pending_corrections.getOrPut(key); if (!timestamps_entry.found_existing) { timestamps_entry.key_ptr.* = try u.deepClone(timestamps_entry.key_ptr.*, self.allocator); timestamps_entry.value_ptr.* = u.DeepHashSet(Timestamp).init(self.allocator); } const timestamps = timestamps_entry.value_ptr; { // change.timestamp is pending const old_entry = try timestamps.getOrPut(change.timestamp); // if was already pending, nothing more to do if (old_entry.found_existing) continue; old_entry.key_ptr.* = try u.deepClone(old_entry.key_ptr.*, self.allocator); // otherwise, update frontier _ = try self.applyFrontierSupportChange(node, change.timestamp, 1); } // for any other pending timestamp on this row, leastUpperBound(change.timestamp, other_timestamp) is pending var buffer = u.ArrayList(Timestamp).init(self.allocator); defer buffer.deinit(); var iter = timestamps.iterator(); while (iter.next()) |entry| { const timestamp = try Timestamp.leastUpperBound( self.allocator, change.timestamp, entry.key_ptr.*, ); try buffer.append(timestamp); } for (buffer.items) |*timestamp| { const old_entry = try timestamps.getOrPut(timestamp.*); if (old_entry.found_existing) { timestamp.deinit(self.allocator); } else { _ = try self.applyFrontierSupportChange(node, timestamp.*, 1); } } } }, } } /// Report that the input frontier at `node_input` has changed, so the output frontier might need updating. fn queueFrontierSupportChange(self: *Shard, node_input: NodeInput, timestamp: Timestamp, diff: isize) !void { dida.debug.emitDebugEvent(self, .{ .QueueFrontierUpdate = .{ .node_input = node_input, .timestamp = timestamp, .diff = diff } }); const node_spec = self.graph.node_specs[node_input.node.id]; const input_node = node_spec.getInputs()[node_input.input_ix]; var entry = try self.unprocessed_frontier_updates.getOrPut(.{ .node_input = node_input, .subgraphs = self.graph.node_subgraphs[input_node.id], .timestamp = timestamp, }); if (!entry.found_existing) { entry.key_ptr.timestamp = try u.deepClone(entry.key_ptr.timestamp, self.allocator); entry.value_ptr.* = 0; } entry.value_ptr.* += diff; if (entry.value_ptr.* == 0) { var removed = self.unprocessed_frontier_updates.fetchRemove(entry.key_ptr.*).?; removed.key.deinit(self.allocator); } } /// Change the output frontier at `node` and report the change to any downstream nodes. fn applyFrontierSupportChange(self: *Shard, node: Node, timestamp: Timestamp, diff: isize) !enum { Updated, NotUpdated } { dida.debug.emitDebugEvent(self, .{ .ApplyFrontierUpdate = .{ .node = node, .timestamp = timestamp, .diff = diff } }); var frontier_changes = u.ArrayList(FrontierChange).init(self.allocator); defer frontier_changes.deinit(); try self.node_frontiers[node.id].update(timestamp, diff, &frontier_changes); for (frontier_changes.items) |*frontier_change| { for (self.graph.downstream_node_inputs[node.id]) |downstream_node_input| { try self.queueFrontierSupportChange(downstream_node_input, frontier_change.timestamp, frontier_change.diff); } frontier_change.deinit(self.allocator); } return if (frontier_changes.items.len > 0) .Updated else .NotUpdated; } // An ordering on Pointstamp that is compatible with causality. // IE if the existence of a change at `this` causes a change to later be produced at `that`, then we need to have `orderPointstamps(this, that) == .lt`. // The invariants enforced for the graph structure guarantee that this is possible. fn orderPointstamps(this: Pointstamp, that: Pointstamp) std.math.Order { const min_len = u.min(this.subgraphs.len, that.subgraphs.len); var i: usize = 0; while (i < min_len) : (i += 1) { // If `this` and `that` are in different subgraphs then there is no way for a change to travel from a later node to an earlier node without incrementing the timestamp coord at `i-1`. if (this.subgraphs[i].id != that.subgraphs[i].id) return u.deepOrder(this.node_input, that.node_input); // If `this` and `that` are in the same subgraph but one has a higher timestamp coord at `i` than the other then there is no way the higher timestamp could be decremented to produce the lower timestamp. const timestamp_order = std.math.order(this.timestamp.coords[i], that.timestamp.coords[i]); if (timestamp_order != .eq) return timestamp_order; } // If we get this far, either `this` and `that` are in the same subgraph or one is in a subgraph that is nested inside the other. // Either way there is no way for a change to travel from a later node to an earlier node without incrementing the timestamp coord at `min_len-1`. return u.deepOrder(this.node_input, that.node_input); } /// Process all unprocessed frontier updates. fn processFrontierUpdates(self: *Shard) !void { dida.debug.emitDebugEvent(self, .ProcessFrontierUpdates); // Nodes whose input frontiers have changed // TODO is it worth tracking the actual changes? might catch cases where the total diff is zero var updated_nodes = u.DeepHashSet(Node).init(self.allocator); defer updated_nodes.deinit(); // Process frontier updates // NOTE We have to process all of these before doing anything else - the intermediate states can be invalid while (self.unprocessed_frontier_updates.count() > 0) { // Find min pointstamp // (We have to process pointstamps in causal order to ensure that this algorithm terminates. See [/docs/why.md](/docs/why.md) for more detail.) // TODO use a sorted data structure for unprocessed_frontier_updates var iter = self.unprocessed_frontier_updates.iterator(); var min_entry = iter.next().?; while (iter.next()) |entry| { if (orderPointstamps(entry.key_ptr.*, min_entry.key_ptr.*) == .lt) min_entry = entry; } const node = min_entry.key_ptr.node_input.node; var input_timestamp = min_entry.key_ptr.timestamp; const diff = min_entry.value_ptr.*; _ = self.unprocessed_frontier_updates.remove(min_entry.key_ptr.*); dida.debug.emitDebugEvent(self, .{ .ProcessFrontierUpdate = .{ .node = node, .input_timestamp = input_timestamp, .diff = diff } }); // An input frontier for this node changed, so we may need to take some action on it later try updated_nodes.put(node, {}); // Work out how this node changes the timestamp var output_timestamp = switch (self.graph.node_specs[node.id]) { .TimestampPush => try input_timestamp.pushCoord(self.allocator), .TimestampIncrement => try input_timestamp.incrementCoord(self.allocator), .TimestampPop => try input_timestamp.popCoord(self.allocator), else => input_timestamp, }; switch (self.graph.node_specs[node.id]) { .TimestampPush, .TimestampIncrement, .TimestampPop => input_timestamp.deinit(self.allocator), else => {}, } defer output_timestamp.deinit(self.allocator); // Apply change to frontier _ = try self.applyFrontierSupportChange(node, output_timestamp, diff); } // Trigger special actions at nodes whose frontier has changed. // TODO Probably should pop these one at a time to avoid doWork being unbounded var updated_nodes_iter = updated_nodes.iterator(); while (updated_nodes_iter.next()) |updated_nodes_entry| { const node = updated_nodes_entry.key_ptr.*; const node_spec = self.graph.node_specs[node.id]; const node_state = &self.node_states[node.id]; dida.debug.emitDebugEvent(self, .{ .ProcessFrontierUpdateReaction = .{ .node = node } }); // Index-specific stuff if (node_spec == .Index) { // Might be able to produce an output batch now that the frontier has moved later var timestamps_to_remove = u.ArrayList(Timestamp).init(self.allocator); defer { for (timestamps_to_remove.items) |*timestamp| timestamp.deinit(self.allocator); timestamps_to_remove.deinit(); } var change_batch_builder = ChangeBatchBuilder.init(self.allocator); defer change_batch_builder.deinit(); var pending_changes = u.ArrayList(Change).init(self.allocator); defer pending_changes.deinit(); const input_frontier = self.node_frontiers[node_spec.Index.input.id]; for (node_state.Index.pending_changes.items) |change| { if (input_frontier.frontier.causalOrder(change.timestamp) == .gt) { // Have to store the timestamps separately, because we need access to all of them after the change_batch may have coalesced and freed some of them try timestamps_to_remove.append(try u.deepClone(change.timestamp, self.allocator)); try change_batch_builder.changes.append(change); } else { try pending_changes.append(change); } } std.mem.swap(u.ArrayList(Change), &node_state.Index.pending_changes, &pending_changes); if (try change_batch_builder.finishAndReset()) |change_batch| { try node_state.Index.index.addChangeBatch(try u.deepClone(change_batch, self.allocator)); try self.emitChangeBatch(node, change_batch); } for (timestamps_to_remove.items) |timestamp| { _ = try self.applyFrontierSupportChange(node, timestamp, -1); } } // Distinct/Reduce-specific stuff // TODO this is somewhat inefficient // TODO I think this should be looking at index input frontier if (node_spec == .Distinct or node_spec == .Reduce) { const input_node = node_spec.getInputs()[0]; const input_frontier = self.node_frontiers[input_node.id]; const input_index = self.node_states[input_node.id].getIndex().?; const output_index = node_state.getIndex().?; var change_batch_builder = ChangeBatchBuilder.init(self.allocator); defer change_batch_builder.deinit(); var frontier_support_changes = u.ArrayList(FrontierChange).init(self.allocator); defer { for (frontier_support_changes.items) |*frontier_support_change| frontier_support_change.deinit(self.allocator); frontier_support_changes.deinit(); } const pending_corrections = switch (node_state.*) { .Distinct => |*state| &state.pending_corrections, .Reduce => |*state| &state.pending_corrections, else => unreachable, }; var key_iter = pending_corrections.iterator(); while (key_iter.next()) |key_entry| { const key = key_entry.key_ptr.*; const timestamps = key_entry.value_ptr; // Going to check any pending timestamp that is before the new input frontier var timestamps_to_check = u.ArrayList(Timestamp).init(self.allocator); defer { for (timestamps_to_check.items) |*timestamp_to_check| timestamp_to_check.deinit(self.allocator); timestamps_to_check.deinit(); } { var timestamp_iter = timestamps.iterator(); while (timestamp_iter.next()) |timestamp_entry| { const timestamp = timestamp_entry.key_ptr.*; if (input_frontier.frontier.causalOrder(timestamp) == .gt) { try timestamps_to_check.append(try u.deepClone(timestamp, self.allocator)); try frontier_support_changes.append(.{ .timestamp = timestamp, .diff = -1 }); } } } for (timestamps_to_check.items) |timestamp_to_check| { _ = timestamps.remove(timestamp_to_check); } // Sort timestamps so that when we reach each one we've already taken into account previous corrections std.sort.sort(Timestamp, timestamps_to_check.items, {}, struct { fn lessThan(_: void, a: Timestamp, b: Timestamp) bool { return a.lexicalOrder(b) == .lt; } }.lessThan); // Get past inputs for this key // TODO a sorted iterator would be nicer for this var input_changes = u.ArrayList(Change).init(self.allocator); defer input_changes.deinit(); try input_index.getChangesForKey(key, key.values.len, &input_changes); // Figure out correction for each timestamp var new_output_changes = ChangeBatchBuilder.init(self.allocator); defer new_output_changes.deinit(); for (timestamps_to_check.items) |timestamp_to_check| { switch (node_spec) { .Distinct => { // Calculate the correct count for this row var input_count: isize = 0; for (input_changes.items) |input_change| { if (input_change.timestamp.causalOrder(timestamp_to_check).isLessThanOrEqual()) input_count += input_change.diff; } // Calculate what we're currently saying the count is for this row var output_count = output_index.getCountForRowAsOf(key, timestamp_to_check); // If needed, issue a correction const correct_output_count: isize = if (input_count == 0) 0 else 1; const diff = correct_output_count - output_count; if (diff != 0) { try new_output_changes.changes.append(.{ .row = try u.deepClone(key, self.allocator), .timestamp = try u.deepClone(timestamp_to_check, self.allocator), .diff = diff, }); } }, .Reduce => |spec| { // Coalesce inputs so reduce_fn only has to deal with positive diffs var input_bag = Bag.init(self.allocator); defer input_bag.deinit(); for (input_changes.items) |input_change| { if (input_change.timestamp.causalOrder(timestamp_to_check).isLessThanOrEqual()) try input_bag.update(input_change.row, input_change.diff); } // Reduce fn might not be commutative, so it has to process changes in some well-defined order const RowAndCount = struct { row: Row, count: usize }; var sorted_inputs = u.ArrayList(RowAndCount).init(self.allocator); defer sorted_inputs.deinit(); var input_bag_iter = input_bag.rows.iterator(); while (input_bag_iter.next()) |input_bag_entry| try sorted_inputs.append(.{ .row = input_bag_entry.key_ptr.*, .count = @intCast(usize, input_bag_entry.value_ptr.*), }); std.sort.sort(RowAndCount, sorted_inputs.items, {}, (struct { fn lessThan(_: void, a: RowAndCount, b: RowAndCount) bool { return u.deepOrder(a, b) == .lt; } }).lessThan); // Calculate the correct reduced value var input_value = try u.deepClone(spec.init_value, self.allocator); for (sorted_inputs.items) |input| { const new_input_value = try spec.reducer.reduce_fn(spec.reducer, input_value, input.row, input.count); input_value.deinit(self.allocator); input_value = new_input_value; } // Cancel all previous outputs for this key var output_changes = u.ArrayList(Change).init(self.allocator); defer output_changes.deinit(); // TODO query index as of timestamp_to_check try output_index.getChangesForKey(key, key.values.len, &output_changes); for (output_changes.items) |candidate_output_change| { if (candidate_output_change.timestamp.causalOrder(timestamp_to_check).isLessThanOrEqual()) try new_output_changes.changes.append(.{ .row = try u.deepClone(candidate_output_change.row, self.allocator), .timestamp = try u.deepClone(timestamp_to_check, self.allocator), .diff = -candidate_output_change.diff, }); } // Add the new output var values = try std.mem.concat(self.allocator, Value, &[_][]const Value{ key.values, &[_]Value{input_value}, }); for (values[0..key.values.len]) |*value| value.* = try u.deepClone(value.*, self.allocator); try new_output_changes.changes.append(.{ .row = Row{ .values = values }, .timestamp = try u.deepClone(timestamp_to_check, self.allocator), .diff = 1, }); }, else => unreachable, } if (try new_output_changes.finishAndReset()) |change_batch| { try change_batch_builder.changes.appendSlice(change_batch.changes); for (change_batch.changes) |*change| change.* = try u.deepClone(change.*, self.allocator); try output_index.addChangeBatch(change_batch); } } } // TODO if timestamps now empty for a row, remove entry // Emit changes if (try change_batch_builder.finishAndReset()) |change_batch| { try self.emitChangeBatch(node, change_batch); } // Remove frontier support for (frontier_support_changes.items) |frontier_support_change| _ = try self.applyFrontierSupportChange(node, frontier_support_change.timestamp, frontier_support_change.diff); } } } /// Check whether the shard has any work that it could do. pub fn hasWork(self: *const Shard) bool { return (self.unprocessed_change_batches.items.len > 0) or (self.unprocessed_frontier_updates.count() > 0); } /// Do some work. // TODO ideally the runtime of this function would be roughly bounded, so that dida can run cooperatively inside other event loops. pub fn doWork(self: *Shard) !void { dida.debug.emitDebugEvent(self, .DoWork); if (self.unprocessed_change_batches.items.len > 0) { try self.processChangeBatch(); } else if (self.unprocessed_frontier_updates.count() > 0) { try self.processFrontierUpdates(); } } /// Pop a change batch from an output node. /// Caller takes ownership of the result. pub fn popOutput(self: *Shard, node: Node) ?ChangeBatch { const change_batch = self.node_states[node.id].Output.unpopped_change_batches.popOrNull(); dida.debug.emitDebugEvent(self, .{ .PopOutput = .{ .node = node, .change_batch = change_batch } }); return change_batch; } }; // TODO It's currently possible to remove from HashMap without invalidating iterator which would simplify some of the code in this file. But might not be true forever. // TODO Need to decide which types store allocators vs taking them as args, and be careful to allocate/free from the correct allocator ================================================ FILE: lib/dida/debug.zig ================================================ //! Tools for debugging dida. const std = @import("std"); const dida = @import("../dida.zig"); const u = dida.util; /// Things that dida does internally pub const DebugEvent = union(enum) { PushInput: struct { node: dida.core.Node, change: dida.core.Change, }, FlushInput: struct { node: dida.core.Node, }, AdvanceInput: struct { node: dida.core.Node, timestamp: dida.core.Timestamp, }, EmitChangeBatch: struct { from_node: dida.core.Node, change_batch: dida.core.ChangeBatch, input_frontier: ?dida.core.Frontier, }, ProcessChangeBatch: struct { node_input: dida.core.NodeInput, change_batch: dida.core.ChangeBatch, }, QueueFrontierUpdate: struct { node_input: dida.core.NodeInput, timestamp: dida.core.Timestamp, diff: isize, }, ApplyFrontierUpdate: struct { node: dida.core.Node, timestamp: dida.core.Timestamp, diff: isize, }, ProcessFrontierUpdates, ProcessFrontierUpdate: struct { node: dida.core.Node, input_timestamp: dida.core.Timestamp, diff: isize, }, ProcessFrontierUpdateReaction: struct { node: dida.core.Node, }, PopOutput: struct { node: dida.core.Node, change_batch: ?dida.core.ChangeBatch, }, DoWork, }; pub fn emitDebugEvent(shard: *const dida.core.Shard, debug_event: DebugEvent) void { const root = @import("root"); if (@import("builtin").is_test) { // Uncomment this for debugging tests // dumpDebugEvent(shard, debug_event); } else if (@hasDecl(root, "emitDebugEvent")) // You can add a handler to your root file eg // pub const emitDebugEvent = dida.debug.dumpDebugEvent; root.emitDebugEvent(shard, debug_event); } pub fn dumpDebugEvent(shard: *const dida.core.Shard, debug_event: DebugEvent) void { dida.common.dump(shard); dida.common.dump(debug_event); } pub fn dumpInto(writer: anytype, indent: u32, thing: anytype) anyerror!void { const T = @TypeOf(thing); if (T == std.mem.Allocator) { try writer.writeAll("Allocator{}"); } else if (comptime std.mem.startsWith(u8, @typeName(T), "std.array_list.ArrayList")) { try dumpInto(writer, indent, thing.items); } else if (comptime std.mem.startsWith(u8, @typeName(T), "std.hash_map.HashMap")) { var iter = thing.iterator(); const is_set = @TypeOf(iter.next().?.value_ptr.*) == void; try writer.writeAll(if (is_set) "HashSet(\n" else "HashMap(\n"); while (iter.next()) |entry| { try writer.writeByteNTimes(' ', indent + 4); try dumpInto(writer, indent + 4, entry.key_ptr.*); if (!is_set) { try writer.writeAll(" => "); try dumpInto(writer, indent + 4, entry.value_ptr.*); } try writer.writeAll(",\n"); } try writer.writeByteNTimes(' ', indent); try writer.writeAll(")"); } else { switch (T) { dida.core.Value => { switch (thing) { .Number => |number| try dumpInto(writer, indent + 4, number), .String => |string| try dumpInto(writer, indent + 4, string), } }, dida.core.Row => { try writer.writeAll("Row["); for (thing.values) |value, i| { try std.fmt.format(writer, "{}", .{value}); if (i != thing.values.len - 1) try writer.writeAll(", "); } try writer.writeAll("]"); }, dida.core.Timestamp => { try writer.writeAll("T["); for (thing.coords) |coord, i| { try std.fmt.format(writer, "{}", .{coord}); if (i != thing.coords.len - 1) try writer.writeAll(", "); } try writer.writeAll("]"); }, dida.core.Frontier => { try dumpInto(writer, indent, thing.timestamps); }, dida.core.NodeState.DistinctState => { try writer.writeAll("DistinctState{\n"); try writer.writeByteNTimes(' ', indent + 4); try writer.writeAll("index: "); try dumpInto(writer, indent + 4, thing.index); try writer.writeAll(",\n"); try writer.writeByteNTimes(' ', indent + 4); try writer.writeAll("pending_corrections: "); try dumpInto(writer, indent + 4, thing.pending_corrections); try writer.writeAll("\n"); try writer.writeByteNTimes(' ', indent); try writer.writeAll("}"); }, dida.core.NodeState.ReduceState => { try writer.writeAll("ReduceState{\n"); try writer.writeByteNTimes(' ', indent + 4); try writer.writeAll("index: "); try dumpInto(writer, indent + 4, thing.index); try writer.writeAll(",\n"); try writer.writeByteNTimes(' ', indent + 4); try writer.writeAll("pending_corrections: "); try dumpInto(writer, indent + 4, thing.pending_corrections); try writer.writeAll("\n"); try writer.writeByteNTimes(' ', indent); try writer.writeAll("}"); }, dida.core.Shard => { try writer.writeAll("Shard{\n"); for (thing.graph.node_specs) |node_spec, node_id| { try writer.writeByteNTimes(' ', indent + 4); try std.fmt.format(writer, "{}: {{\n", .{node_id}); try writer.writeByteNTimes(' ', indent + 8); try writer.writeAll("spec: "); try dumpInto(writer, indent + 8, node_spec); try writer.writeAll(",\n"); try writer.writeByteNTimes(' ', indent + 8); try writer.writeAll("state: "); try dumpInto(writer, indent + 8, thing.node_states[node_id]); try writer.writeAll(",\n"); try writer.writeByteNTimes(' ', indent + 8); try writer.writeAll("frontier: "); try dumpInto(writer, indent + 8, thing.node_frontiers[node_id]); try writer.writeAll(",\n"); try writer.writeByteNTimes(' ', indent + 8); try writer.writeAll("unprocessed_change_batches: [\n"); { for (thing.unprocessed_change_batches.items) |change_batch_at_node_input| { if (change_batch_at_node_input.node_input.node.id == node_id) { try writer.writeByteNTimes(' ', indent + 12); try dumpInto(writer, indent + 12, change_batch_at_node_input.change_batch); try writer.writeAll(",\n"); } } } try writer.writeByteNTimes(' ', indent + 8); try writer.writeAll("],\n"); try writer.writeByteNTimes(' ', indent + 4); try writer.writeAll("},\n"); } try writer.writeByteNTimes(' ', indent); try writer.writeAll("}\n"); }, else => { switch (@typeInfo(T)) { .Pointer => |pti| { switch (pti.size) { .One => { try writer.writeAll("&"); try dumpInto(writer, indent, thing.*); }, .Many => { // bail try std.fmt.format(writer, "{}", .{thing}); }, .Slice => { if (pti.child == u8) { try std.fmt.format(writer, "\"{s}\"", .{thing}); } else { try std.fmt.format(writer, "[]{s}[\n", .{pti.child}); for (thing) |elem| { try writer.writeByteNTimes(' ', indent + 4); try dumpInto(writer, indent + 4, elem); try writer.writeAll(",\n"); } try writer.writeByteNTimes(' ', indent); try writer.writeAll("]"); } }, .C => { // bail try std.fmt.format(writer, "{}", .{thing}); }, } }, .Array => |ati| { if (ati.child == u8) { try std.fmt.format(writer, "\"{s}\"", .{thing}); } else { try std.fmt.format(writer, "[{}]{s}[\n", .{ ati.len, ati.child }); for (thing) |elem| { try writer.writeByteNTimes(' ', indent + 4); try dumpInto(writer, indent + 4, elem); try writer.writeAll(",\n"); } try writer.writeByteNTimes(' ', indent); try writer.writeAll("]"); } }, .Struct => |sti| { try writer.writeAll(@typeName(@TypeOf(thing))); try writer.writeAll("{\n"); inline for (sti.fields) |field| { try writer.writeByteNTimes(' ', indent + 4); try std.fmt.format(writer, ".{s} = ", .{field.name}); try dumpInto(writer, indent + 4, @field(thing, field.name)); try writer.writeAll(",\n"); } try writer.writeByteNTimes(' ', indent); try writer.writeAll("}"); }, .Union => |uti| { if (uti.tag_type) |tag_type| { try writer.writeAll(@typeName(@TypeOf(thing))); try writer.writeAll("{\n"); inline for (@typeInfo(tag_type).Enum.fields) |fti| { if (@enumToInt(std.meta.activeTag(thing)) == fti.value) { try writer.writeByteNTimes(' ', indent + 4); try std.fmt.format(writer, ".{s} = ", .{fti.name}); try dumpInto(writer, indent + 4, @field(thing, fti.name)); try writer.writeAll("\n"); try writer.writeByteNTimes(' ', indent); try writer.writeAll("}"); } } } else { // bail try std.fmt.format(writer, "{}", .{thing}); } }, .Optional => { if (thing == null) { try writer.writeAll("null"); } else { try dumpInto(writer, indent, thing.?); } }, .Opaque => { try writer.writeAll("opaque"); }, else => { try std.fmt.format(writer, "{any}", .{thing}); }, } }, } } } const ValidationPath = []const []const u8; pub const ValidationError = union(enum) { Aliasing: [2]ValidationPath, }; const ValidationState = struct { allocator: u.Allocator, pointers: u.DeepHashMap(PointerKey, ValidationPath), errors: u.ArrayList(ValidationError), const PointerKey = struct { address: usize, typeName: []const u8, }; }; pub fn validateOrPanic(allocator: u.Allocator, shard: *const dida.core.Shard) void { var arena = u.ArenaAllocator.init(allocator); defer arena.deinit(); const errs = validate(&arena.allocator, shard); if (errs.len > 0) { u.dump(errs); u.panic("Found invalid shard state. See errors above.", .{}); } } pub fn validate(allocator: u.Allocator, shard: *const dida.core.Shard) []const ValidationError { var state = ValidationState{ .allocator = allocator, .pointers = u.DeepHashMap(ValidationState.PointerKey, ValidationPath).init(allocator), .errors = u.ArrayList(ValidationError).init(allocator), }; validateInto(&state, &.{}, shard) catch |err| { switch (err) { error.OutOfMemory => u.panic("Out of memory", .{}), } }; return state.errors.toOwnedSlice(); } // TODO this is a separate function to work around compiler bugs when using anonymous slices fn appendPath(allocator: u.Allocator, a: ValidationPath, b: []const u8) !ValidationPath { const bb: []const []const u8 = &.{b}; return std.mem.concat(allocator, []const u8, &.{ a, bb }); } pub fn validateInto(state: *ValidationState, path: ValidationPath, thing: anytype) !void { { const info = @typeInfo(@TypeOf(thing)); u.comptimeAssert(info == .Pointer and info.Pointer.size == .One, "Expected pointer, found {s}", .{@typeName(@TypeOf(thing))}); } const T = @TypeOf(thing.*); switch (T) { u.Allocator, *dida.core.NodeSpec.MapSpec.Mapper, dida.core.NodeSpec.ReduceSpec.Reducer, dida.core.Graph, => return, else => {}, } switch (@typeInfo(T)) { .Fn => return, else => {}, } if (@sizeOf(T) != 0) { const key = .{ .address = @ptrToInt(thing), .typeName = @typeName(T), }; const entry = try state.pointers.getOrPut(key); if (entry.found_existing) { try state.errors.append(.{ .Aliasing = .{ path, entry.value_ptr.*, } }); } else entry.value_ptr.* = path; } switch (@typeInfo(T)) { .Struct => |info| { if (comptime std.mem.startsWith(u8, @typeName(T), "std.array_list.ArrayList")) { for (thing.items) |*elem, i| { try validateInto( state, try appendPath(state.allocator, path, try u.format(state.allocator, "{}", .{i})), elem, ); } } else if (comptime std.mem.startsWith(u8, @typeName(T), "std.hash_map.HashMap")) { var iter = thing.iterator(); var i: usize = 0; while (iter.next()) |entry| { const new_path = try appendPath(state.allocator, path, try u.format(state.allocator, "{}", .{i})); try validateInto( state, try appendPath(state.allocator, new_path, "key"), entry.key_ptr, ); try validateInto( state, try appendPath(state.allocator, new_path, "value"), entry.value_ptr, ); i += 1; } } else inline for (info.fields) |field_info| { // Pointstamp subgraphs are borrowed from the graph if (comptime T == dida.core.Shard.Pointstamp and u.deepEqual(field_info.name, "subgraphs")) continue; try validateInto( state, try appendPath(state.allocator, path, field_info.name), &@field(thing.*, field_info.name), ); } }, .Union => |info| { if (info.tag_type) |tag_type| { inline for (@typeInfo(tag_type).Enum.fields) |field_info| { if (@enumToInt(std.meta.activeTag(thing.*)) == field_info.value) { // TODO putting this in the call below causes a compiler crash const new_path = try appendPath(state.allocator, path, field_info.name); try validateInto( state, new_path, &@field(thing.*, field_info.name), ); // TODO this shouldn't be necessary, but codegen for this `inline for` seems to be broken return; } } unreachable; } }, .Array => { for (thing.*) |*elem, i| { try validateInto( state, try appendPath(state.allocator, path, try u.format(state.allocator, "{}", .{i})), elem, ); } }, .Pointer => |info| { switch (info.size) { .One => { try validateInto( state, try appendPath(state.allocator, path, "*"), &thing.*.*, ); }, .Many => @compileError("Don't know how to validate " ++ @typeName(T)), .Slice => { for (thing.*) |*elem, i| { try validateInto( state, try appendPath(state.allocator, path, try u.format(state.allocator, "{}", .{i})), elem, ); } }, .C => @compileError("Don't know how to validate " ++ @typeName(T)), } }, .Optional => { if (thing.*) |*child| try validateInto( state, try appendPath(state.allocator, path, "?"), child, ); }, .Int, .Float, .Void, .Fn => {}, else => @compileError("Don't know how to validate " ++ @typeName(T)), } } ================================================ FILE: lib/dida/sugar.zig ================================================ // TODO this is just a proof of concept, api might change a lot const std = @import("std"); const dida = @import("../dida.zig"); const u = dida.util; fn assert_ok(result: anytype) @typeInfo(@TypeOf(result)).ErrorUnion.payload { return result catch |err| u.panic("{}", .{err}); } // TODO this needs a better name pub const Sugar = struct { allocator: u.Allocator, state: union(enum) { Building: dida.core.GraphBuilder, Running: dida.core.Shard, }, pub fn init(allocator: u.Allocator) Sugar { return .{ .allocator = allocator, .state = .{ .Building = dida.core.GraphBuilder.init(allocator) }, }; } pub fn input(self: *Sugar) Node(.Input) { const inner = assert_ok(self.state.Building.addNode(.{ .id = 0 }, .Input)); return .{ .sugar = self, .inner = inner, }; } pub fn build(self: *Sugar) void { const graph = assert_ok(self.allocator.create(dida.core.Graph)); graph.* = assert_ok(self.state.Building.finishAndReset()); self.state = .{ .Running = assert_ok(dida.core.Shard.init(self.allocator, graph)) }; } pub fn doAllWork(self: *Sugar) !void { const shard = &self.state.Running; while (shard.hasWork()) try shard.doWork(); } pub fn loop(self: *Sugar) Subgraph { const builder = &self.state.Building; const new_inner = assert_ok(builder.addSubgraph(.{ .id = 0 })); return Subgraph{ .sugar = self, .inner = new_inner, }; } }; pub const Subgraph = struct { sugar: *Sugar, inner: dida.core.Subgraph, pub fn loop(self: Subgraph) Subgraph { const builder = &self.sugar.state.Building; const new_inner = assert_ok(builder.addSubgraph(self.inner)); return Subgraph{ .sugar = self.sugar, .inner = new_inner, }; } pub fn loopNode(self: Subgraph) Node(.TimestampIncrement) { const builder = &self.sugar.state.Building; const node_inner = assert_ok(builder.addNode( self.inner, .{ .TimestampIncrement = .{ .input = null } }, )); return Node(.TimestampIncrement){ .sugar = self.sugar, .inner = node_inner, }; } // TODO would be nice to automatically add TimestampPush/Pop in node methods as needed instead of needing import/export // TODO would also be nice to cache repeated calls pub fn importNode(self: Subgraph, node: anytype) Node(.TimestampPush) { const builder = &self.sugar.state.Building; u.assert( builder.node_subgraphs.items[node.inner.id].id == builder.subgraph_parents.items[self.inner.id - 1].id, "Can only import from parent subgraph into child subgraph", .{}, ); const node_inner = assert_ok(builder.addNode( self.inner, .{ .TimestampPush = .{ .input = node.inner } }, )); return Node(.TimestampPush){ .sugar = self.sugar, .inner = node_inner, }; } pub fn exportNode(self: Subgraph, node: anytype) Node(.TimestampPop) { const builder = &self.sugar.state.Building; u.assert( builder.node_subgraphs.items[node.inner.id].id == self.inner.id, "Can only export from child subgraph into parent subgraph", .{}, ); const node_inner = assert_ok(builder.addNode( builder.subgraph_parents.items[self.inner.id - 1], .{ .TimestampPop = .{ .input = node.inner } }, )); return Node(.TimestampPop){ .sugar = self.sugar, .inner = node_inner, }; } }; pub fn Node(comptime tag_: std.meta.TagType(dida.core.NodeSpec)) type { return struct { sugar: *Sugar, inner: dida.core.Node, pub const tag = tag_; const Self = @This(); pub fn index(self: Self) Node(.Index) { const builder = &self.sugar.state.Building; const subgraph = builder.node_subgraphs.items[self.inner.id]; const new_inner = assert_ok(self.sugar.state.Building.addNode( subgraph, .{ .Index = .{ .input = self.inner } }, )); return .{ .sugar = self.sugar, .inner = new_inner, }; } pub usingnamespace if (tag.hasIndex()) struct { pub fn distinct(self: Self) Node(.Distinct) { const builder = &self.sugar.state.Building; const subgraph = builder.node_subgraphs.items[self.inner.id]; const new_inner = assert_ok(self.sugar.state.Building.addNode( subgraph, .{ .Distinct = .{ .input = self.inner } }, )); return .{ .sugar = self.sugar, .inner = new_inner, }; } pub fn join(self: Self, other: anytype, key_columns: usize) Node(.Join) { u.comptimeAssert( comptime @TypeOf(other).tag.hasIndex(), "Can only call join on nodes which have indexes (Index, Distinct), not {}", .{@TypeOf(other).tag}, ); const builder = &self.sugar.state.Building; const subgraph = builder.node_subgraphs.items[self.inner.id]; const new_inner = assert_ok(self.sugar.state.Building.addNode( subgraph, .{ .Join = .{ .inputs = .{ self.inner, other.inner }, .key_columns = key_columns, } }, )); return .{ .sugar = self.sugar, .inner = new_inner, }; } } else struct {}; pub fn map(self: Self, mapper: *dida.core.NodeSpec.MapSpec.Mapper) Node(.Map) { const builder = &self.sugar.state.Building; const subgraph = builder.node_subgraphs.items[self.inner.id]; const new_inner = assert_ok(self.sugar.state.Building.addNode( subgraph, .{ .Map = .{ .input = self.inner, .mapper = mapper, } }, )); return .{ .sugar = self.sugar, .inner = new_inner, }; } pub fn reduce(self: Self, key_columns: usize, init_value: anytype, reducer: *dida.core.NodeSpec.ReduceSpec.Reducer) Node(.Reduce) { const builder = &self.sugar.state.Building; const subgraph = builder.node_subgraphs.items[self.inner.id]; const new_inner = assert_ok(self.sugar.state.Building.addNode( subgraph, .{ .Reduce = .{ .input = self.inner, .key_columns = key_columns, .init_value = coerceAnonTo(self.sugar.allocator, dida.core.Value, init_value), .reducer = reducer, } }, )); return .{ .sugar = self.sugar, .inner = new_inner, }; } pub fn project(self: Self, columns: anytype) Node(.Map) { return self.projectInner(coerceAnonTo(self.sugar.allocator, []usize, columns)); } fn projectInner(self: Self, columns: []usize) Node(.Map) { const project_mapper = assert_ok(self.sugar.allocator.create(ProjectMapper)); project_mapper.* = ProjectMapper{ .allocator = self.sugar.allocator, .columns = columns, .mapper = .{ .map_fn = ProjectMapper.map }, }; return self.map(&project_mapper.mapper); } // TODO shame this is a reserved name, need to think of a different name pub fn union_(self: Self, other: anytype) Node(.Union) { const builder = &self.sugar.state.Building; const subgraph = builder.node_subgraphs.items[self.inner.id]; const new_inner = assert_ok(builder.addNode( subgraph, .{ .Union = .{ .inputs = .{ self.inner, other.inner } } }, )); return .{ .sugar = self.sugar, .inner = new_inner, }; } pub usingnamespace if (tag == .TimestampIncrement) struct { pub fn fixpoint(self: Self, future: anytype) void { const builder = &self.sugar.state.Building; builder.connectLoop(future.inner, self.inner); } } else struct {}; pub fn output(self: Self) Node(.Output) { const builder = &self.sugar.state.Building; const subgraph = builder.node_subgraphs.items[self.inner.id]; const new_inner = assert_ok(self.sugar.state.Building.addNode( subgraph, .{ .Output = .{ .input = self.inner } }, )); return .{ .sugar = self.sugar, .inner = new_inner, }; } pub usingnamespace if (tag == .Input) struct { pub fn push(self: Self, change: anytype) !void { // TODO self.pushInner won't compile here - circular reference? try pushInner(self, coerceAnonTo(self.sugar.allocator, dida.core.Change, change)); } fn pushInner(self: Self, change: dida.core.Change) !void { const shard = &self.sugar.state.Running; try shard.pushInput(self.inner, change); } pub fn flush(self: Self) !void { const shard = &self.sugar.state.Running; try shard.flushInput(self.inner); } pub fn advance(self: Self, timestamp: anytype) !void { try self.advanceInner(coerceAnonTo(self.sugar.allocator, dida.core.Timestamp, timestamp)); } pub fn advanceInner(self: Self, timestamp: dida.core.Timestamp) !void { const shard = &self.sugar.state.Running; try shard.advanceInput(self.inner, timestamp); } } else struct {}; pub usingnamespace if (tag == .Output) struct { pub fn pop(self: Self) ?dida.core.ChangeBatch { const shard = &self.sugar.state.Running; return shard.popOutput(self.inner); } } else struct {}; }; } pub fn coerceAnonTo(allocator: u.Allocator, comptime T: type, anon: anytype) T { const ti = @typeInfo(T); if (ti == .Pointer and ti.Pointer.size == .Slice) { const slice = assert_ok(allocator.alloc(ti.Pointer.child, anon.len)); comptime var i: usize = 0; inline while (i < anon.len) : (i += 1) { slice[i] = coerceAnonTo(allocator, ti.Pointer.child, anon[i]); } return slice; } else { switch (T) { u8 => return anon, dida.core.Timestamp => { return .{ .coords = coerceAnonTo(allocator, []usize, anon) }; }, dida.core.Change => { return .{ .row = coerceAnonTo(allocator, dida.core.Row, anon[0]), .timestamp = coerceAnonTo(allocator, dida.core.Timestamp, anon[1]), .diff = anon[2], }; }, ?dida.core.ChangeBatch => { const changes = coerceAnonTo(allocator, []dida.core.Change, anon); defer allocator.free(changes); var builder = dida.core.ChangeBatchBuilder.init(allocator); assert_ok(builder.changes.appendSlice(changes)); return assert_ok(builder.finishAndReset()); }, dida.core.ChangeBatch => { return coerceAnonTo(allocator, ?dida.core.ChangeBatch, anon).?; }, dida.core.Row => { return .{ .values = coerceAnonTo(allocator, []dida.core.Value, anon) }; }, dida.core.Value => { switch (@typeInfo(@TypeOf(anon))) { .Int, .ComptimeInt => return .{ .Number = @intCast(u64, anon) }, .Pointer => return .{ .String = coerceAnonTo(allocator, []const u8, anon) }, else => u.compileError("Don't know how to coerce {} to Value", .{@TypeOf(anon)}), } }, dida.core.FrontierChange => { return .{ .timestamp = coerceAnonTo(allocator, dida.core.Timestamp, anon[0]), .diff = anon[1], }; }, dida.core.Frontier => { const timestamps = coerceAnonTo(allocator, []dida.core.Timestamp, anon); defer { for (timestamps) |*timestamp| timestamp.deinit(allocator); allocator.free(timestamps); } var frontier = dida.core.Frontier.init(allocator); var changes_into = std.ArrayList(dida.core.FrontierChange).init(allocator); defer changes_into.deinit(); for (timestamps) |timestamp| { assert_ok(frontier.move(.Later, timestamp, &changes_into)); for (changes_into.items) |*change| change.deinit(allocator); assert_ok(changes_into.resize(0)); } return frontier; }, usize => return anon, else => u.compileError("Don't know how to coerce anon to {}", .{T}), } } } const ProjectMapper = struct { allocator: u.Allocator, columns: []usize, mapper: dida.core.NodeSpec.MapSpec.Mapper, fn map(self: *dida.core.NodeSpec.MapSpec.Mapper, input: dida.core.Row) error{OutOfMemory}!dida.core.Row { const project_mapper = @fieldParentPtr(ProjectMapper, "mapper", self); var output_values = assert_ok(project_mapper.allocator.alloc(dida.core.Value, project_mapper.columns.len)); for (output_values) |*output_value, i| { output_value.* = input.values[project_mapper.columns[i]]; } return dida.core.Row{ .values = output_values }; } }; ================================================ FILE: lib/dida/util.zig ================================================ //! Things that we use often const std = @import("std"); const dida = @import("../dida.zig"); pub const warn = std.debug.warn; pub const debug_assert = std.debug.assert; pub const max = std.math.max; pub const min = std.math.min; pub const Allocator = std.mem.Allocator; pub const ArenaAllocator = std.heap.ArenaAllocator; pub const ArrayList = std.ArrayList; pub const HashMap = std.HashMap; pub const AutoHashMap = std.AutoHashMap; pub fn panic(comptime message: []const u8, args: anytype) noreturn { // TODO should we preallocate memory for panics? var buf = ArrayList(u8).init(std.heap.page_allocator); var writer = buf.writer(); std.fmt.format(writer, message, args) catch std.mem.copy(u8, buf.items[buf.items.len - 3 .. buf.items.len], "OOM"); @panic(buf.toOwnedSlice()); } pub fn assert(condition: bool, comptime message: []const u8, args: anytype) void { if (!condition) panic(message, args); } pub fn comptimeAssert(comptime condition: bool, comptime message: []const u8, comptime args: anytype) void { if (!condition) compileError(message, args); } pub fn compileError(comptime message: []const u8, comptime args: anytype) void { @compileError(comptime std.fmt.comptimePrint(message, args)); } pub fn DeepHashMap(comptime K: type, comptime V: type) type { return std.HashMap(K, V, DeepHashContext(K), std.hash_map.default_max_load_percentage); } pub fn DeepHashSet(comptime K: type) type { return DeepHashMap(K, void); } pub fn format(allocator: Allocator, comptime fmt: []const u8, args: anytype) ![]const u8 { var buf = ArrayList(u8).init(allocator); var writer = buf.writer(); try std.fmt.format(writer, fmt, args); return buf.items; } // Chain casts from *T to *[1]T to []T pub fn ptrToSlice(comptime T: type, input: *const T) []const T { const one_input: *const [1]T = input; return one_input; } pub fn last(comptime T: type, slice: []const T) T { assert(slice.len > 0, "Tried to take last item of a 0-length slice", .{}); return slice[slice.len - 1]; } pub fn Queue(comptime T: type) type { return struct { in: ArrayList(T), out: ArrayList(T), const Self = @This(); pub fn init(allocator: Allocator) Self { return .{ .in = ArrayList(T).init(allocator), .out = ArrayList(T).init(allocator), }; } pub fn deinit(self: *Self) void { self.in.deinit(); self.out.deinit(); self.* = undefined; } pub fn push(self: *Self, item: T) !void { try self.in.append(item); } pub fn popOrNull(self: *Self) ?T { if (self.out.popOrNull()) |item| return item; std.mem.swap(ArrayList(T), &self.in, &self.out); std.mem.reverse(T, self.out.items); return self.out.popOrNull(); } }; } pub fn TODO() noreturn { panic("TODO", .{}); } // This is only for debugging pub fn dump(thing: anytype) void { const stderr_mutex = std.debug.getStderrMutex(); stderr_mutex.lock(); defer stderr_mutex.unlock(); const my_stderr = std.io.getStdErr().writer(); dida.debug.dumpInto(my_stderr, 0, thing) catch return; my_stderr.writeAll("\n") catch return; } pub fn deepEqual(a: anytype, b: @TypeOf(a)) bool { return deepOrder(a, b) == .eq; } pub fn deepOrder(a: anytype, b: @TypeOf(a)) std.math.Order { const T = @TypeOf(a); const ti = @typeInfo(T); switch (ti) { .Struct, .Enum, .Union => { if (@hasDecl(T, "deepOrder")) { return T.deepOrder(a, b); } }, else => {}, } switch (ti) { .Bool => { if (a == b) return .Equal; if (a) return .GreaterThan; return .lt; }, .Int, .Float => { if (a < b) { return .lt; } if (a > b) { return .gt; } return .eq; }, .Enum => { return deepOrder(@enumToInt(a), @enumToInt(b)); }, .Pointer => |pti| { switch (pti.size) { .One => { return deepOrder(a.*, b.*); }, .Slice => { if (a.len < b.len) { return .lt; } if (a.len > b.len) { return .gt; } for (a) |a_elem, a_ix| { const ordering = deepOrder(a_elem, b[a_ix]); if (ordering != .eq) { return ordering; } } return .eq; }, .Many, .C => compileError("Cannot deepOrder {}", .{T}), } }, .Optional => { if (a) |a_val| { if (b) |b_val| { return deepOrder(a_val, b_val); } else { return .gt; } } else { if (b) |_| { return .lt; } else { return .eq; } } }, .Array => { for (a) |a_elem, a_ix| { const ordering = deepOrder(a_elem, b[a_ix]); if (ordering != .eq) { return ordering; } } return .eq; }, .Struct => |sti| { inline for (sti.fields) |fti| { const ordering = deepOrder(@field(a, fti.name), @field(b, fti.name)); if (ordering != .eq) { return ordering; } } return .eq; }, .Union => |uti| { if (uti.tag_type) |tag_type| { const a_tag = @enumToInt(@as(tag_type, a)); const b_tag = @enumToInt(@as(tag_type, b)); if (a_tag < b_tag) { return .lt; } if (a_tag > b_tag) { return .gt; } inline for (@typeInfo(tag_type).Enum.fields) |fti| { if (a_tag == fti.value) { return deepOrder( @field(a, fti.name), @field(b, fti.name), ); } } unreachable; } else { compileError("Cannot deepOrder {}", .{T}); } }, .Void => return .eq, .ErrorUnion => { if (a) |a_ok| { if (b) |b_ok| { return deepOrder(a_ok, b_ok); } else |_| { return .lt; } } else |a_err| { if (b) |_| { return .gt; } else |b_err| { return deepOrder(a_err, b_err); } } }, .ErrorSet => return deepOrder(@errorToInt(a), @errorToInt(b)), else => compileError("Cannot deepOrder {}", .{T}), } } pub fn deepHash(key: anytype) u64 { var hasher = std.hash.Wyhash.init(0); deepHashInto(&hasher, key); return hasher.final(); } pub fn deepHashInto(hasher: anytype, key: anytype) void { const T = @TypeOf(key); const ti = @typeInfo(T); switch (ti) { .Struct, .Enum, .Union => { if (@hasDecl(T, "deepHashInto")) { return T.deepHashInto(hasher, key); } }, else => {}, } switch (ti) { .Int => @call(.{ .modifier = .always_inline }, hasher.update, .{std.mem.asBytes(&key)}), .Float => |info| deepHashInto(hasher, @bitCast(std.meta.Int(.unsigned, info.bits), key)), .Bool => deepHashInto(hasher, @boolToInt(key)), .Enum => deepHashInto(hasher, @enumToInt(key)), .Pointer => |pti| { switch (pti.size) { .One => deepHashInto(hasher, key.*), .Slice => { for (key) |element| { deepHashInto(hasher, element); } }, .Many, .C => compileError("Cannot deepHash {}", .{T}), } }, .Optional => if (key) |k| deepHashInto(hasher, k), .Array => { for (key) |element| { deepHashInto(hasher, element); } }, .Struct => |info| { inline for (info.fields) |field| { deepHashInto(hasher, @field(key, field.name)); } }, .Union => |info| { if (info.tag_type) |tag_type| { const tag = std.meta.activeTag(key); deepHashInto(hasher, tag); inline for (@typeInfo(tag_type).Enum.fields) |fti| { if (@enumToInt(std.meta.activeTag(key)) == fti.value) { deepHashInto(hasher, @field(key, fti.name)); return; } } unreachable; } else compileError("cannot deepHash {}", .{T}); }, .Void => {}, else => compileError("cannot deepHash {}", .{T}), } } pub fn DeepHashContext(comptime K: type) type { return struct { const Self = @This(); pub fn hash(_: Self, pseudo_key: K) u64 { return deepHash(pseudo_key); } pub fn eql(_: Self, pseudo_key: K, key: K) bool { return deepEqual(pseudo_key, key); } }; } // TODO this can be error-prone - maybe should explicitly list allowed types? pub fn deepClone(thing: anytype, allocator: Allocator) error{OutOfMemory}!@TypeOf(thing) { const T = @TypeOf(thing); const ti = @typeInfo(T); if (T == std.mem.Allocator) return allocator; if (comptime std.mem.startsWith(u8, @typeName(T), "std.array_list.ArrayList")) { var cloned = try ArrayList(@TypeOf(thing.items[0])).initCapacity(allocator, thing.items.len); cloned.appendSliceAssumeCapacity(thing.items); for (cloned.items) |*item| item.* = try deepClone(item.*, allocator); return cloned; } if (comptime std.mem.startsWith(u8, @typeName(T), "std.hash_map.HashMap")) { var cloned = try thing.cloneWithAllocator(allocator); var iter = cloned.iterator(); while (iter.next()) |entry| { entry.key_ptr.* = try deepClone(entry.key_ptr.*, allocator); entry.value_ptr.* = try deepClone(entry.value_ptr.*, allocator); } return cloned; } switch (ti) { .Bool, .Int, .Float, .Enum, .Void, .Fn => return thing, .Pointer => |pti| { switch (pti.size) { .One => { const cloned = try allocator.create(pti.child); cloned.* = try deepClone(thing.*, allocator); return cloned; }, .Slice => { const cloned = try allocator.alloc(pti.child, thing.len); for (thing) |item, i| cloned[i] = try deepClone(item, allocator); return cloned; }, .Many, .C => compileError("Cannot deepClone {}", .{T}), } }, .Array => { var cloned = thing; for (cloned) |*item| item.* = try deepClone(item.*, allocator); return cloned; }, .Optional => { return if (thing == null) null else try deepClone(thing.?, allocator); }, .Struct => |sti| { var cloned: T = thing; inline for (sti.fields) |fti| { @field(cloned, fti.name) = try deepClone(@field(thing, fti.name), allocator); } return cloned; }, .Union => |uti| { if (uti.tag_type) |tag_type| { const tag = @enumToInt(std.meta.activeTag(thing)); inline for (@typeInfo(tag_type).Enum.fields) |fti| { if (tag == fti.value) { return @unionInit(T, fti.name, try deepClone(@field(thing, fti.name), allocator)); } } unreachable; } else { compileError("Cannot deepClone {}", .{T}); } }, else => compileError("Cannot deepClone {}", .{T}), } } ================================================ FILE: lib/dida.zig ================================================ pub const util = @import("./dida/util.zig"); pub const core = @import("./dida/core.zig"); pub const sugar = @import("./dida/sugar.zig"); pub const debug = @import("./dida/debug.zig"); ================================================ FILE: shell.nix ================================================ with (import ./dependencies.nix); pkgs.mkShell rec { buildInputs = [ zig ]; } ================================================ FILE: test/core.zig ================================================ const std = @import("std"); const dida = @import("../lib/dida.zig"); //pub const allocator = std.heap.c_allocator; pub const allocator = std.testing.allocator; fn expectDeepEqual(expected: anytype, actual: anytype) !void { if (!dida.util.deepEqual(expected, actual)) { dida.util.dump(.{ .expected = expected, .actual = actual }); return error.TestExpectedEqual; } } fn testTimestampOrder( anon_a: anytype, anon_b: anytype, order: dida.core.PartialOrder, ) !void { var a = dida.sugar.coerceAnonTo(allocator, dida.core.Timestamp, anon_a); defer a.deinit(allocator); var b = dida.sugar.coerceAnonTo(allocator, dida.core.Timestamp, anon_b); defer b.deinit(allocator); try std.testing.expectEqual(order, a.causalOrder(b)); const reverse_order: dida.core.PartialOrder = switch (order) { .none => .none, .lt => .gt, .eq => .eq, .gt => .lt, }; try std.testing.expectEqual(reverse_order, b.causalOrder(a)); const total_order: std.math.Order = switch (order) { .none => return, .lt => .lt, .eq => .eq, .gt => .gt, }; if (order != .none) { try std.testing.expectEqual(total_order, a.lexicalOrder(b)); } var lub = try dida.core.Timestamp.leastUpperBound(allocator, a, b); defer lub.deinit(allocator); try std.testing.expect(a.causalOrder(lub).isLessThanOrEqual()); try std.testing.expect(b.causalOrder(lub).isLessThanOrEqual()); } test "timestamp order" { try testTimestampOrder(.{}, .{}, .eq); try testTimestampOrder(.{0}, .{0}, .eq); try testTimestampOrder(.{0}, .{1}, .lt); try testTimestampOrder(.{1}, .{0}, .gt); try testTimestampOrder(.{ 0, 0 }, .{ 0, 0 }, .eq); try testTimestampOrder(.{ 0, 0 }, .{ 1, 0 }, .lt); try testTimestampOrder(.{ 0, 0 }, .{ 0, 1 }, .lt); try testTimestampOrder(.{ 0, 0 }, .{ 1, 1 }, .lt); try testTimestampOrder(.{ 1, 0 }, .{ 0, 1 }, .none); } fn testTimestampLub( anon_a: anytype, anon_b: anytype, anon_lub: anytype, ) !void { var a = dida.sugar.coerceAnonTo(allocator, dida.core.Timestamp, anon_a); defer a.deinit(allocator); var b = dida.sugar.coerceAnonTo(allocator, dida.core.Timestamp, anon_b); defer b.deinit(allocator); var expected_lub = dida.sugar.coerceAnonTo(allocator, dida.core.Timestamp, anon_lub); defer expected_lub.deinit(allocator); var actual_lub = try dida.core.Timestamp.leastUpperBound(allocator, a, b); defer actual_lub.deinit(allocator); try std.testing.expectEqualSlices(usize, expected_lub.coords, actual_lub.coords); } test "timestamp lub" { try testTimestampLub(.{}, .{}, .{}); try testTimestampLub(.{0}, .{0}, .{0}); try testTimestampLub(.{0}, .{1}, .{1}); try testTimestampLub(.{1}, .{0}, .{1}); try testTimestampLub(.{ 0, 0 }, .{ 0, 0 }, .{ 0, 0 }); try testTimestampLub(.{ 0, 0 }, .{ 1, 0 }, .{ 1, 0 }); try testTimestampLub(.{ 0, 0 }, .{ 0, 1 }, .{ 0, 1 }); try testTimestampLub(.{ 0, 0 }, .{ 1, 1 }, .{ 1, 1 }); try testTimestampLub(.{ 1, 0 }, .{ 0, 1 }, .{ 1, 1 }); } fn testChangeBatchBuilder( anon_input_changes: anytype, anon_expected_changes: anytype, ) !void { const input_changes = dida.sugar.coerceAnonTo(allocator, []dida.core.Change, anon_input_changes); defer allocator.free(input_changes); const expected_changes = dida.sugar.coerceAnonTo(allocator, []dida.core.Change, anon_expected_changes); defer { for (expected_changes) |*expected_change| expected_change.deinit(allocator); allocator.free(expected_changes); } var builder = dida.core.ChangeBatchBuilder.init(allocator); defer builder.deinit(); for (input_changes) |change| { try builder.changes.append(change); } if (try builder.finishAndReset()) |_batch| { var batch = _batch; defer batch.deinit(allocator); try expectDeepEqual(expected_changes, batch.changes); for (batch.changes) |change| { try std.testing.expect(batch.lower_bound.causalOrder(change.timestamp).isLessThanOrEqual()); } } else { const actual_changes: []dida.core.Change = &[0]dida.core.Change{}; try expectDeepEqual(expected_changes, actual_changes); } } test "change batch builder" { try testChangeBatchBuilder( .{}, .{}, ); try testChangeBatchBuilder( .{ .{ .{}, .{}, 0 }, }, .{}, ); try testChangeBatchBuilder( .{ .{ .{}, .{}, 1 }, }, .{ .{ .{}, .{}, 1 }, }, ); try testChangeBatchBuilder( .{ .{ .{"a"}, .{}, 1 }, .{ .{"b"}, .{}, 1 }, .{ .{"a"}, .{}, -1 }, }, .{ .{ .{"b"}, .{}, 1 }, }, ); try testChangeBatchBuilder( .{ .{ .{"a"}, .{}, 1 }, .{ .{"b"}, .{}, 1 }, .{ .{"a"}, .{}, -1 }, .{ .{"b"}, .{}, -1 }, }, .{}, ); try testChangeBatchBuilder( .{ .{ .{"a"}, .{}, 1 }, .{ .{"b"}, .{}, 1 }, .{ .{"a"}, .{}, 1 }, .{ .{"b"}, .{}, -1 }, }, .{ .{ .{"a"}, .{}, 2 }, }, ); try testChangeBatchBuilder( .{ .{ .{"a"}, .{}, 1 }, .{ .{"a"}, .{}, -1 }, .{ .{"a"}, .{}, 1 }, }, .{ .{ .{"a"}, .{}, 1 }, }, ); try testChangeBatchBuilder( .{ .{ .{"a"}, .{}, 0 }, .{ .{"a"}, .{}, 0 }, .{ .{"a"}, .{}, 0 }, }, .{}, ); try testChangeBatchBuilder( .{ .{ .{"a"}, .{}, 0 }, .{ .{"a"}, .{}, 0 }, .{ .{"a"}, .{}, 1 }, }, .{ .{ .{"a"}, .{}, 1 }, }, ); } fn testFrontierMove( anon_frontier: anytype, comptime direction: dida.core.Frontier.Direction, anon_timestamp: anytype, anon_expected_frontier: anytype, anon_expected_changes: anytype, ) !void { const frontier_timestamps = dida.sugar.coerceAnonTo(allocator, []dida.core.Timestamp, anon_frontier); defer { for (frontier_timestamps) |*frontier_timestamp| frontier_timestamp.deinit(allocator); allocator.free(frontier_timestamps); } var frontier = dida.core.Frontier.init(allocator); defer frontier.deinit(); var changes_into = std.ArrayList(dida.core.FrontierChange).init(allocator); defer changes_into.deinit(); for (frontier_timestamps) |frontier_timestamp| { try frontier.move(.Later, frontier_timestamp, &changes_into); for (changes_into.items) |*change| change.deinit(allocator); try changes_into.resize(0); } var timestamp = dida.sugar.coerceAnonTo(allocator, dida.core.Timestamp, anon_timestamp); defer timestamp.deinit(allocator); const expected_frontier_timestamps = dida.sugar.coerceAnonTo(allocator, []dida.core.Timestamp, anon_expected_frontier); defer { for (expected_frontier_timestamps) |*expected_frontier_timestamp| expected_frontier_timestamp.deinit(allocator); allocator.free(expected_frontier_timestamps); } const expected_changes = dida.sugar.coerceAnonTo(allocator, []dida.core.FrontierChange, anon_expected_changes); defer { for (expected_changes) |*expected_change| expected_change.deinit(allocator); allocator.free(expected_changes); } var actual_changes_into = std.ArrayList(dida.core.FrontierChange).init(allocator); defer { for (actual_changes_into.items) |*change| change.deinit(allocator); actual_changes_into.deinit(); } try frontier.move(direction, timestamp, &actual_changes_into); var actual_frontier_timestamps = std.ArrayList(dida.core.Timestamp).init(allocator); defer actual_frontier_timestamps.deinit(); var iter = frontier.timestamps.iterator(); while (iter.next()) |entry| try actual_frontier_timestamps.append(entry.key_ptr.*); std.sort.sort(dida.core.Timestamp, actual_frontier_timestamps.items, {}, struct { fn lessThan(_: void, a: dida.core.Timestamp, b: dida.core.Timestamp) bool { return a.lexicalOrder(b) == .lt; } }.lessThan); std.sort.sort(dida.core.FrontierChange, actual_changes_into.items, {}, struct { fn lessThan(_: void, a: dida.core.FrontierChange, b: dida.core.FrontierChange) bool { return dida.util.deepOrder(a, b) == .lt; } }.lessThan); try expectDeepEqual(expected_frontier_timestamps, actual_frontier_timestamps.items); try expectDeepEqual(expected_changes, actual_changes_into.items); } test "test frontier move" { try testFrontierMove( .{}, .Later, .{ 0, 0 }, .{ .{ 0, 0 }, }, .{ .{ .{ 0, 0 }, 1 }, }, ); try testFrontierMove( .{ .{ 0, 0 }, }, .Later, .{ 0, 1 }, .{ .{ 0, 1 }, }, .{ .{ .{ 0, 0 }, -1 }, .{ .{ 0, 1 }, 1 }, }, ); try testFrontierMove( .{ .{ 0, 1 }, }, .Earlier, .{ 0, 0 }, .{ .{ 0, 0 }, }, .{ .{ .{ 0, 0 }, 1 }, .{ .{ 0, 1 }, -1 }, }, ); try testFrontierMove( .{ .{ 0, 1 }, .{ 1, 0 }, }, .Later, .{ 1, 1 }, .{ .{ 1, 1 }, }, .{ .{ .{ 0, 1 }, -1 }, .{ .{ 1, 0 }, -1 }, .{ .{ 1, 1 }, 1 }, }, ); try testFrontierMove( .{ .{ 0, 0, 1 }, .{ 0, 1, 0 }, }, .Later, .{ 1, 0, 0 }, .{ .{ 0, 0, 1 }, .{ 0, 1, 0 }, .{ 1, 0, 0 }, }, .{ .{ .{ 1, 0, 0 }, 1 }, }, ); } fn testFrontierOrder( anon_frontier: anytype, anon_timestamp: anytype, expected_order: dida.core.PartialOrder, ) !void { const frontier_timestamps = dida.sugar.coerceAnonTo(allocator, []dida.core.Timestamp, anon_frontier); defer { for (frontier_timestamps) |*frontier_timestamp| frontier_timestamp.deinit(allocator); allocator.free(frontier_timestamps); } var frontier = dida.core.Frontier.init(allocator); defer frontier.deinit(); var changes_into = std.ArrayList(dida.core.FrontierChange).init(allocator); defer changes_into.deinit(); for (frontier_timestamps) |frontier_timestamp| { try frontier.move(.Later, frontier_timestamp, &changes_into); for (changes_into.items) |*change| change.deinit(allocator); try changes_into.resize(0); } var timestamp = dida.sugar.coerceAnonTo(allocator, dida.core.Timestamp, anon_timestamp); defer timestamp.deinit(allocator); try std.testing.expectEqual(expected_order, frontier.causalOrder(timestamp)); } test "test frontier order" { try testFrontierOrder( .{}, .{}, .none, ); try testFrontierOrder( .{ .{ 0, 0 }, }, .{ 0, 0 }, .eq, ); try testFrontierOrder( .{ .{ 0, 0 }, }, .{ 0, 1 }, .lt, ); try testFrontierOrder( .{ .{ 0, 1 }, }, .{ 0, 0 }, .gt, ); try testFrontierOrder( .{ .{ 0, 1 }, }, .{ 1, 0 }, .none, ); try testFrontierOrder( .{ .{ 1, 0 }, .{ 0, 1 }, }, .{ 0, 2 }, .lt, ); try testFrontierOrder( .{ .{ 1, 0 }, .{ 0, 1 }, }, .{ 2, 0 }, .lt, ); try testFrontierOrder( .{ .{ 2, 0 }, .{ 0, 2 }, }, .{ 1, 1 }, .none, ); } fn testSupportFrontierUpdate( anon_support: anytype, anon_update: anytype, anon_expected_frontier: anytype, anon_expected_changes: anytype, ) !void { const support = dida.sugar.coerceAnonTo(allocator, []dida.core.FrontierChange, anon_support); defer { for (support) |*frontier_change| frontier_change.deinit(allocator); allocator.free(support); } var frontier = try dida.core.SupportedFrontier.init(allocator); defer frontier.deinit(); var changes_into = std.ArrayList(dida.core.FrontierChange).init(allocator); defer changes_into.deinit(); for (support) |change| { try frontier.update(change.timestamp, change.diff, &changes_into); for (changes_into.items) |*frontier_change| frontier_change.deinit(allocator); try changes_into.resize(0); } var update = dida.sugar.coerceAnonTo(allocator, dida.core.FrontierChange, anon_update); defer update.deinit(allocator); const expected_frontier_timestamps = dida.sugar.coerceAnonTo(allocator, []dida.core.Timestamp, anon_expected_frontier); defer { for (expected_frontier_timestamps) |*expected_frontier_timestamp| expected_frontier_timestamp.deinit(allocator); allocator.free(expected_frontier_timestamps); } const expected_changes = dida.sugar.coerceAnonTo(allocator, []dida.core.FrontierChange, anon_expected_changes); defer { for (expected_changes) |*expected_change| expected_change.deinit(allocator); allocator.free(expected_changes); } var actual_changes_into = std.ArrayList(dida.core.FrontierChange).init(allocator); defer { for (actual_changes_into.items) |*change| change.deinit(allocator); actual_changes_into.deinit(); } try frontier.update(update.timestamp, update.diff, &actual_changes_into); var actual_frontier_timestamps = std.ArrayList(dida.core.Timestamp).init(allocator); defer actual_frontier_timestamps.deinit(); var iter = frontier.frontier.timestamps.iterator(); while (iter.next()) |entry| try actual_frontier_timestamps.append(entry.key_ptr.*); std.sort.sort(dida.core.Timestamp, actual_frontier_timestamps.items, {}, struct { fn lessThan(_: void, a: dida.core.Timestamp, b: dida.core.Timestamp) bool { return a.lexicalOrder(b) == .lt; } }.lessThan); std.sort.sort(dida.core.FrontierChange, actual_changes_into.items, {}, struct { fn lessThan(_: void, a: dida.core.FrontierChange, b: dida.core.FrontierChange) bool { return dida.util.deepOrder(a, b) == .lt; } }.lessThan); try expectDeepEqual(expected_frontier_timestamps, actual_frontier_timestamps.items); try expectDeepEqual(expected_changes, actual_changes_into.items); } test "test supported frontier update" { try testSupportFrontierUpdate( .{}, .{ .{ 0, 0 }, 1 }, .{ .{ 0, 0 }, }, .{ .{ .{ 0, 0 }, 1 }, }, ); try testSupportFrontierUpdate( .{ .{ .{ 0, 0 }, 1 }, }, .{ .{ 0, 0 }, 1 }, .{ .{ 0, 0 }, }, .{}, ); try testSupportFrontierUpdate( .{ .{ .{ 0, 0 }, 1 }, }, .{ .{ 0, 0 }, -1 }, .{}, .{ .{ .{ 0, 0 }, -1 }, }, ); try testSupportFrontierUpdate( .{ .{ .{ 0, 0 }, 2 }, }, .{ .{ 0, 0 }, -1 }, .{ .{ 0, 0 }, }, .{}, ); try testSupportFrontierUpdate( .{ .{ .{ 0, 0 }, 1 }, .{ .{ 1, 1 }, 1 }, }, .{ .{ 0, 0 }, -1 }, .{ .{ 1, 1 }, }, .{ .{ .{ 0, 0 }, -1 }, .{ .{ 1, 1 }, 1 }, }, ); try testSupportFrontierUpdate( .{ .{ .{ 1, 1 }, 1 }, }, .{ .{ 0, 0 }, 1 }, .{ .{ 0, 0 }, }, .{ .{ .{ 0, 0 }, 1 }, .{ .{ 1, 1 }, -1 }, }, ); try testSupportFrontierUpdate( .{ .{ .{ 0, 0 }, 1 }, }, .{ .{ 1, 1 }, 1 }, .{ .{ 0, 0 }, }, .{}, ); try testSupportFrontierUpdate( .{ .{ .{ 2, 0 }, 1 }, .{ .{ 0, 1 }, 1 }, }, .{ .{ 1, 0 }, 1 }, .{ .{ 0, 1 }, .{ 1, 0 }, }, .{ .{ .{ 1, 0 }, 1 }, .{ .{ 2, 0 }, -1 }, }, ); } pub fn testIndexAdd( anon_change_batches: anytype, anon_expected_changess: anytype, ) !void { var index = dida.core.Index.init(allocator); defer index.deinit(); const change_batches = dida.sugar.coerceAnonTo(allocator, []dida.core.ChangeBatch, anon_change_batches); defer allocator.free(change_batches); const expected_changess = dida.sugar.coerceAnonTo(allocator, [][]dida.core.Change, anon_expected_changess); defer { for (expected_changess) |expected_changes| { for (expected_changes) |*expected_change| { expected_change.deinit(allocator); } allocator.free(expected_changes); } allocator.free(expected_changess); } for (change_batches) |change_batch| { try index.addChangeBatch(change_batch); } const actual_changess = try allocator.alloc([]dida.core.Change, index.change_batches.items.len); defer allocator.free(actual_changess); for (actual_changess) |*changes, i| changes.* = index.change_batches.items[i].changes; try expectDeepEqual(expected_changess, actual_changess); } test "test index add" { try testIndexAdd( .{}, .{}, ); try testIndexAdd( .{ .{ .{ .{"a"}, .{0}, 1 }, }, }, .{ .{ .{ .{"a"}, .{0}, 1 }, }, }, ); try testIndexAdd( .{ .{ .{ .{"a"}, .{0}, 1 }, }, .{ .{ .{"a"}, .{0}, 1 }, }, }, .{ .{ .{ .{"a"}, .{0}, 2 }, }, }, ); try testIndexAdd( .{ .{ .{ .{"a"}, .{0}, 1 }, .{ .{"b"}, .{0}, 1 }, }, .{ .{ .{"a"}, .{0}, 1 }, }, }, .{ .{ .{ .{"a"}, .{0}, 1 }, .{ .{"b"}, .{0}, 1 }, }, .{ .{ .{"a"}, .{0}, 1 }, }, }, ); try testIndexAdd( .{ .{ .{ .{"a"}, .{0}, 1 }, .{ .{"b"}, .{0}, 1 }, }, .{ .{ .{"a"}, .{0}, 1 }, .{ .{"b"}, .{0}, -1 }, }, }, .{ .{ .{ .{"a"}, .{0}, 2 }, }, }, ); try testIndexAdd( .{ .{ .{ .{"a"}, .{0}, 1 }, }, .{ .{ .{"b"}, .{0}, 1 }, }, .{ .{ .{"c"}, .{0}, 1 }, }, .{ .{ .{"d"}, .{0}, 1 }, }, .{ .{ .{"e"}, .{0}, 1 }, }, .{ .{ .{"f"}, .{0}, 1 }, }, .{ .{ .{"g"}, .{0}, 1 }, }, }, .{ .{ .{ .{"a"}, .{0}, 1 }, .{ .{"b"}, .{0}, 1 }, .{ .{"c"}, .{0}, 1 }, .{ .{"d"}, .{0}, 1 }, }, .{ .{ .{"e"}, .{0}, 1 }, .{ .{"f"}, .{0}, 1 }, }, .{ .{ .{"g"}, .{0}, 1 }, }, }, ); // TODO bulk adds break the leveling try testIndexAdd( .{ .{ .{ .{"a"}, .{0}, 1 }, }, .{ .{ .{"b"}, .{0}, 1 }, }, .{ .{ .{"c"}, .{0}, 1 }, }, .{ .{ .{"d"}, .{0}, 1 }, }, .{ .{ .{"e"}, .{0}, 1 }, }, .{ .{ .{"f"}, .{0}, 1 }, }, .{ .{ .{"g"}, .{0}, 1 }, }, .{ .{ .{"h"}, .{0}, 1 }, .{ .{"i"}, .{0}, 1 }, .{ .{"j"}, .{0}, 1 }, .{ .{"k"}, .{0}, 1 }, .{ .{"l"}, .{0}, 1 }, .{ .{"m"}, .{0}, 1 }, .{ .{"n"}, .{0}, 1 }, .{ .{"o"}, .{0}, 1 }, }, }, .{ .{ .{ .{"a"}, .{0}, 1 }, .{ .{"b"}, .{0}, 1 }, .{ .{"c"}, .{0}, 1 }, .{ .{"d"}, .{0}, 1 }, .{ .{"e"}, .{0}, 1 }, .{ .{"f"}, .{0}, 1 }, .{ .{"g"}, .{0}, 1 }, .{ .{"h"}, .{0}, 1 }, .{ .{"i"}, .{0}, 1 }, .{ .{"j"}, .{0}, 1 }, .{ .{"k"}, .{0}, 1 }, .{ .{"l"}, .{0}, 1 }, .{ .{"m"}, .{0}, 1 }, .{ .{"n"}, .{0}, 1 }, .{ .{"o"}, .{0}, 1 }, }, }, ); } fn testChangeBatchSeekRowStart( anon_changes: anytype, ix: usize, anon_row: anytype, key_columns: usize, expected_ix: usize, ) !void { const changes = dida.sugar.coerceAnonTo(allocator, []dida.core.Change, anon_changes); defer allocator.free(changes); var row = dida.sugar.coerceAnonTo(allocator, dida.core.Row, anon_row); defer row.deinit(allocator); var builder = dida.core.ChangeBatchBuilder.init(allocator); defer builder.deinit(); for (changes) |change| { try builder.changes.append(change); } var change_batch = (try builder.finishAndReset()).?; defer change_batch.deinit(allocator); const actual_ix = change_batch.seekRowStart(ix, row, key_columns); try std.testing.expectEqual(expected_ix, actual_ix); } test "test change batch seek row start" { const changes = .{ .{ .{ "a", "x" }, .{0}, 1 }, .{ .{ "a", "y" }, .{1}, 1 }, .{ .{ "a", "z" }, .{2}, 1 }, .{ .{ "c", "c" }, .{0}, 1 }, .{ .{ "e", "e" }, .{0}, 1 }, }; try testChangeBatchSeekRowStart(changes, 0, .{"a"}, 1, 0); try testChangeBatchSeekRowStart(changes, 1, .{"a"}, 1, 1); try testChangeBatchSeekRowStart(changes, 2, .{"a"}, 1, 2); try testChangeBatchSeekRowStart(changes, 3, .{"a"}, 1, 3); try testChangeBatchSeekRowStart(changes, 4, .{"a"}, 1, 4); try testChangeBatchSeekRowStart(changes, 5, .{"a"}, 1, 5); try testChangeBatchSeekRowStart(changes, 0, .{"b"}, 1, 3); try testChangeBatchSeekRowStart(changes, 1, .{"b"}, 1, 3); try testChangeBatchSeekRowStart(changes, 2, .{"b"}, 1, 3); try testChangeBatchSeekRowStart(changes, 3, .{"b"}, 1, 3); try testChangeBatchSeekRowStart(changes, 4, .{"b"}, 1, 4); try testChangeBatchSeekRowStart(changes, 5, .{"b"}, 1, 5); try testChangeBatchSeekRowStart(changes, 0, .{"c"}, 1, 3); try testChangeBatchSeekRowStart(changes, 1, .{"c"}, 1, 3); try testChangeBatchSeekRowStart(changes, 2, .{"c"}, 1, 3); try testChangeBatchSeekRowStart(changes, 3, .{"c"}, 1, 3); try testChangeBatchSeekRowStart(changes, 4, .{"c"}, 1, 4); try testChangeBatchSeekRowStart(changes, 5, .{"c"}, 1, 5); try testChangeBatchSeekRowStart(changes, 0, .{"d"}, 1, 4); try testChangeBatchSeekRowStart(changes, 1, .{"d"}, 1, 4); try testChangeBatchSeekRowStart(changes, 2, .{"d"}, 1, 4); try testChangeBatchSeekRowStart(changes, 3, .{"d"}, 1, 4); try testChangeBatchSeekRowStart(changes, 4, .{"d"}, 1, 4); try testChangeBatchSeekRowStart(changes, 5, .{"d"}, 1, 5); try testChangeBatchSeekRowStart(changes, 0, .{"e"}, 1, 4); try testChangeBatchSeekRowStart(changes, 1, .{"e"}, 1, 4); try testChangeBatchSeekRowStart(changes, 2, .{"e"}, 1, 4); try testChangeBatchSeekRowStart(changes, 3, .{"e"}, 1, 4); try testChangeBatchSeekRowStart(changes, 4, .{"e"}, 1, 4); try testChangeBatchSeekRowStart(changes, 5, .{"e"}, 1, 5); try testChangeBatchSeekRowStart(changes, 0, .{"f"}, 1, 5); try testChangeBatchSeekRowStart(changes, 1, .{"f"}, 1, 5); try testChangeBatchSeekRowStart(changes, 2, .{"f"}, 1, 5); try testChangeBatchSeekRowStart(changes, 3, .{"f"}, 1, 5); try testChangeBatchSeekRowStart(changes, 4, .{"f"}, 1, 5); try testChangeBatchSeekRowStart(changes, 5, .{"f"}, 1, 5); } fn testChangeBatchSeekRowEnd( anon_changes: anytype, ix: usize, anon_row: anytype, key_columns: usize, expected_ix: usize, ) !void { const changes = dida.sugar.coerceAnonTo(allocator, []dida.core.Change, anon_changes); defer allocator.free(changes); var row = dida.sugar.coerceAnonTo(allocator, dida.core.Row, anon_row); defer row.deinit(allocator); var builder = dida.core.ChangeBatchBuilder.init(allocator); defer builder.deinit(); for (changes) |change| { try builder.changes.append(change); } var change_batch = (try builder.finishAndReset()).?; defer change_batch.deinit(allocator); const actual_ix = change_batch.seekRowEnd(ix, row, key_columns); try std.testing.expectEqual(expected_ix, actual_ix); } test "test change batch seek current row end" { const changes = .{ .{ .{ "a", "x" }, .{0}, 1 }, .{ .{ "a", "y" }, .{1}, 1 }, .{ .{ "a", "z" }, .{2}, 1 }, .{ .{ "c", "c" }, .{0}, 1 }, .{ .{ "e", "e" }, .{0}, 1 }, }; try testChangeBatchSeekRowEnd(changes, 0, .{"a"}, 1, 3); try testChangeBatchSeekRowEnd(changes, 1, .{"a"}, 1, 3); try testChangeBatchSeekRowEnd(changes, 2, .{"a"}, 1, 3); try testChangeBatchSeekRowEnd(changes, 3, .{"a"}, 1, 3); try testChangeBatchSeekRowEnd(changes, 4, .{"a"}, 1, 4); try testChangeBatchSeekRowEnd(changes, 5, .{"a"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 0, .{"b"}, 1, 3); try testChangeBatchSeekRowEnd(changes, 1, .{"b"}, 1, 3); try testChangeBatchSeekRowEnd(changes, 2, .{"b"}, 1, 3); try testChangeBatchSeekRowEnd(changes, 3, .{"b"}, 1, 3); try testChangeBatchSeekRowEnd(changes, 4, .{"b"}, 1, 4); try testChangeBatchSeekRowEnd(changes, 5, .{"b"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 0, .{"c"}, 1, 4); try testChangeBatchSeekRowEnd(changes, 1, .{"c"}, 1, 4); try testChangeBatchSeekRowEnd(changes, 2, .{"c"}, 1, 4); try testChangeBatchSeekRowEnd(changes, 3, .{"c"}, 1, 4); try testChangeBatchSeekRowEnd(changes, 4, .{"c"}, 1, 4); try testChangeBatchSeekRowEnd(changes, 5, .{"c"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 0, .{"d"}, 1, 4); try testChangeBatchSeekRowEnd(changes, 1, .{"d"}, 1, 4); try testChangeBatchSeekRowEnd(changes, 2, .{"d"}, 1, 4); try testChangeBatchSeekRowEnd(changes, 3, .{"d"}, 1, 4); try testChangeBatchSeekRowEnd(changes, 4, .{"d"}, 1, 4); try testChangeBatchSeekRowEnd(changes, 5, .{"d"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 0, .{"e"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 1, .{"e"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 2, .{"e"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 3, .{"e"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 4, .{"e"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 5, .{"e"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 0, .{"f"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 1, .{"f"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 2, .{"f"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 3, .{"f"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 4, .{"f"}, 1, 5); try testChangeBatchSeekRowEnd(changes, 5, .{"f"}, 1, 5); } fn testChangeBatchSeekCurrentRowEnd( anon_changes: anytype, ix: usize, key_columns: usize, expected_ix: usize, ) !void { const changes = dida.sugar.coerceAnonTo(allocator, []dida.core.Change, anon_changes); defer allocator.free(changes); var builder = dida.core.ChangeBatchBuilder.init(allocator); defer builder.deinit(); for (changes) |change| { try builder.changes.append(change); } var change_batch = (try builder.finishAndReset()).?; defer change_batch.deinit(allocator); const actual_ix = change_batch.seekCurrentRowEnd(ix, key_columns); try std.testing.expectEqual(expected_ix, actual_ix); } test "test change batch seek current row end" { const changes = .{ .{ .{ "a", "x" }, .{0}, 1 }, .{ .{ "a", "y" }, .{1}, 1 }, .{ .{ "a", "z" }, .{2}, 1 }, .{ .{ "c", "c" }, .{0}, 1 }, .{ .{ "e", "e" }, .{0}, 1 }, }; try testChangeBatchSeekCurrentRowEnd(changes, 0, 1, 3); try testChangeBatchSeekCurrentRowEnd(changes, 1, 1, 3); try testChangeBatchSeekCurrentRowEnd(changes, 2, 1, 3); try testChangeBatchSeekCurrentRowEnd(changes, 3, 1, 4); try testChangeBatchSeekCurrentRowEnd(changes, 4, 1, 5); try testChangeBatchSeekCurrentRowEnd(changes, 5, 1, 5); } fn testChangeBatchJoin( anon_left_changes: anytype, anon_left_frontier: anytype, anon_right_changes: anytype, key_columns: usize, concat_order: dida.core.ConcatOrder, anon_expected_changes: anytype, ) !void { const left_changes = dida.sugar.coerceAnonTo(allocator, []dida.core.Change, anon_left_changes); defer allocator.free(left_changes); var left_frontier = dida.sugar.coerceAnonTo(allocator, dida.core.Frontier, anon_left_frontier); defer left_frontier.deinit(); const right_changes = dida.sugar.coerceAnonTo(allocator, []dida.core.Change, anon_right_changes); defer allocator.free(right_changes); const expected_changes = dida.sugar.coerceAnonTo(allocator, []dida.core.Change, anon_expected_changes); defer { for (expected_changes) |*change| change.deinit(allocator); allocator.free(expected_changes); } var left_builder = dida.core.ChangeBatchBuilder.init(allocator); defer left_builder.deinit(); for (left_changes) |change| { try left_builder.changes.append(change); } var left_change_batch = (try left_builder.finishAndReset()).?; defer left_change_batch.deinit(allocator); var right_builder = dida.core.ChangeBatchBuilder.init(allocator); defer right_builder.deinit(); for (right_changes) |change| { try right_builder.changes.append(change); } var right_change_batch = (try right_builder.finishAndReset()).?; defer right_change_batch.deinit(allocator); var output_builder = dida.core.ChangeBatchBuilder.init(allocator); defer output_builder.deinit(); try left_change_batch.mergeJoin(left_frontier, right_change_batch, key_columns, concat_order, &output_builder); if (try output_builder.finishAndReset()) |_output_change_batch| { var output_change_batch = _output_change_batch; defer output_change_batch.deinit(allocator); try expectDeepEqual(expected_changes, output_change_batch.changes); } else { const actual_changes: []dida.core.Change = &[0]dida.core.Change{}; try expectDeepEqual(expected_changes, actual_changes); } } test "test change batch join" { try testChangeBatchJoin( .{ .{ .{"a"}, .{ 0, 1 }, 2 }, }, .{ .{ 100, 100 }, }, .{ .{ .{"a"}, .{ 1, 0 }, 3 }, }, 1, .LeftThenRight, .{ .{ .{"a"}, .{ 1, 1 }, 6 }, }, ); try testChangeBatchJoin( .{ .{ .{"a"}, .{ 0, 1 }, 2 }, }, .{ .{ 100, 100 }, }, .{ .{ .{"b"}, .{ 1, 0 }, 3 }, }, 1, .LeftThenRight, .{}, ); try testChangeBatchJoin( .{ .{ .{"a"}, .{ 0, 1 }, 2 }, .{ .{"a"}, .{ 0, 2 }, 5 }, }, .{ .{ 100, 100 }, }, .{ .{ .{"a"}, .{ 1, 0 }, 3 }, .{ .{"a"}, .{ 2, 0 }, 7 }, }, 1, .LeftThenRight, .{ .{ .{"a"}, .{ 1, 1 }, 6 }, .{ .{"a"}, .{ 1, 2 }, 15 }, .{ .{"a"}, .{ 2, 1 }, 14 }, .{ .{"a"}, .{ 2, 2 }, 35 }, }, ); try testChangeBatchJoin( .{ .{ .{"a"}, .{ 0, 1 }, 2 }, .{ .{"b"}, .{ 0, 2 }, 5 }, }, .{ .{ 100, 100 }, }, .{ .{ .{"a"}, .{ 1, 0 }, 3 }, .{ .{"b"}, .{ 2, 0 }, 7 }, }, 1, .LeftThenRight, .{ .{ .{"a"}, .{ 1, 1 }, 6 }, .{ .{"b"}, .{ 2, 2 }, 35 }, }, ); { const changes = .{ .{ .{"a"}, .{0}, 1 }, .{ .{"a"}, .{1}, 1 }, .{ .{"a"}, .{2}, 1 }, .{ .{"c"}, .{0}, 1 }, .{ .{"e"}, .{0}, 1 }, }; try testChangeBatchJoin( changes, .{ .{100}, }, changes, 1, .LeftThenRight, .{ .{ .{"a"}, .{0}, 1 }, .{ .{"a"}, .{1}, 3 }, .{ .{"a"}, .{2}, 5 }, .{ .{"c"}, .{0}, 1 }, .{ .{"e"}, .{0}, 1 }, }, ); } { const changes = .{ .{ .{ "a", "x" }, .{0}, 1 }, .{ .{ "a", "y" }, .{1}, 1 }, .{ .{ "a", "z" }, .{2}, 1 }, .{ .{ "c", "c" }, .{0}, 1 }, .{ .{ "e", "e" }, .{0}, 1 }, }; try testChangeBatchJoin( changes, .{ .{100}, }, changes, 1, .LeftThenRight, .{ .{ .{ "a", "x", "x" }, .{0}, 1 }, .{ .{ "a", "x", "y" }, .{1}, 1 }, .{ .{ "a", "x", "z" }, .{2}, 1 }, .{ .{ "a", "y", "x" }, .{1}, 1 }, .{ .{ "a", "y", "y" }, .{1}, 1 }, .{ .{ "a", "y", "z" }, .{2}, 1 }, .{ .{ "a", "z", "x" }, .{2}, 1 }, .{ .{ "a", "z", "y" }, .{2}, 1 }, .{ .{ "a", "z", "z" }, .{2}, 1 }, .{ .{ "c", "c", "c" }, .{0}, 1 }, .{ .{ "e", "e", "e" }, .{0}, 1 }, }, ); try testChangeBatchJoin( changes, .{ .{100}, }, changes, 1, .RightThenLeft, .{ .{ .{ "a", "x", "x" }, .{0}, 1 }, .{ .{ "a", "x", "y" }, .{1}, 1 }, .{ .{ "a", "x", "z" }, .{2}, 1 }, .{ .{ "a", "y", "x" }, .{1}, 1 }, .{ .{ "a", "y", "y" }, .{1}, 1 }, .{ .{ "a", "y", "z" }, .{2}, 1 }, .{ .{ "a", "z", "x" }, .{2}, 1 }, .{ .{ "a", "z", "y" }, .{2}, 1 }, .{ .{ "a", "z", "z" }, .{2}, 1 }, .{ .{ "c", "c", "c" }, .{0}, 1 }, .{ .{ "e", "e", "e" }, .{0}, 1 }, }, ); try testChangeBatchJoin( changes, .{ .{2}, }, changes, 1, .LeftThenRight, .{ .{ .{ "a", "x", "x" }, .{0}, 1 }, .{ .{ "a", "x", "y" }, .{1}, 1 }, .{ .{ "a", "x", "z" }, .{2}, 1 }, .{ .{ "a", "y", "x" }, .{1}, 1 }, .{ .{ "a", "y", "y" }, .{1}, 1 }, .{ .{ "a", "y", "z" }, .{2}, 1 }, .{ .{ "c", "c", "c" }, .{0}, 1 }, .{ .{ "e", "e", "e" }, .{0}, 1 }, }, ); try testChangeBatchJoin( changes, .{ .{100}, }, changes, 2, .LeftThenRight, .{ .{ .{ "a", "x" }, .{0}, 1 }, .{ .{ "a", "y" }, .{1}, 1 }, .{ .{ "a", "z" }, .{2}, 1 }, .{ .{ "c", "c" }, .{0}, 1 }, .{ .{ "e", "e" }, .{0}, 1 }, }, ); try testChangeBatchJoin( changes, .{ .{2}, }, changes, 2, .LeftThenRight, .{ .{ .{ "a", "x" }, .{0}, 1 }, .{ .{ "a", "y" }, .{1}, 1 }, .{ .{ "c", "c" }, .{0}, 1 }, .{ .{ "e", "e" }, .{0}, 1 }, }, ); } } pub fn testIndexGetCountForRowAsOf( anon_change_batches: anytype, anon_row: anytype, anon_timestamp: anytype, expected_count: isize, ) !void { var index = dida.core.Index.init(allocator); defer index.deinit(); const change_batches = dida.sugar.coerceAnonTo(allocator, []dida.core.ChangeBatch, anon_change_batches); defer allocator.free(change_batches); var row = dida.sugar.coerceAnonTo(allocator, dida.core.Row, anon_row); defer row.deinit(allocator); var timestamp = dida.sugar.coerceAnonTo(allocator, dida.core.Timestamp, anon_timestamp); defer timestamp.deinit(allocator); for (change_batches) |change_batch| { try index.addChangeBatch(change_batch); } const actual_count = index.getCountForRowAsOf(row, timestamp); try std.testing.expectEqual(expected_count, actual_count); } test "test index get count for row as of" { const changes = .{ .{ .{ .{"a"}, .{1}, 1 }, }, .{ .{ .{"b"}, .{0}, 1 }, }, .{ .{ .{"c"}, .{0}, 1 }, }, .{ .{ .{"a"}, .{2}, 2 }, }, .{ .{ .{"e"}, .{0}, 1 }, }, .{ .{ .{"f"}, .{0}, 1 }, }, .{ .{ .{"a"}, .{0}, 1 }, }, .{ .{ .{"h"}, .{3}, 1 }, .{ .{"i"}, .{0}, 1 }, .{ .{"c"}, .{0}, 2 }, .{ .{"k"}, .{0}, 1 }, .{ .{"l"}, .{0}, 1 }, .{ .{"c"}, .{1}, -3 }, .{ .{"n"}, .{0}, 1 }, .{ .{"o"}, .{0}, 1 }, }, }; try testIndexGetCountForRowAsOf(changes, .{"a"}, .{0}, 1); try testIndexGetCountForRowAsOf(changes, .{"a"}, .{1}, 2); try testIndexGetCountForRowAsOf(changes, .{"a"}, .{2}, 4); try testIndexGetCountForRowAsOf(changes, .{"a"}, .{3}, 4); try testIndexGetCountForRowAsOf(changes, .{"c"}, .{0}, 3); try testIndexGetCountForRowAsOf(changes, .{"c"}, .{1}, 0); try testIndexGetCountForRowAsOf(changes, .{"h"}, .{0}, 0); try testIndexGetCountForRowAsOf(changes, .{"h"}, .{3}, 1); try testIndexGetCountForRowAsOf(changes, .{"z"}, .{3}, 0); } pub fn testNodeOutput(shard: *dida.core.Shard, node: dida.core.Node, anon_expected_change_batches: anytype) !void { const expected_change_batches = dida.sugar.coerceAnonTo(allocator, []dida.core.ChangeBatch, anon_expected_change_batches); defer { for (expected_change_batches) |*expected_change_batch| expected_change_batch.deinit(allocator); allocator.free(expected_change_batches); } var actual_change_batches = std.ArrayList(dida.core.ChangeBatch).init(allocator); defer { for (actual_change_batches.items) |*actual_change_batch| actual_change_batch.deinit(allocator); actual_change_batches.deinit(); } while (shard.popOutput(node)) |actual_change_batch| try actual_change_batches.append(actual_change_batch); const expected_len = expected_change_batches.len; const actual_len = actual_change_batches.items.len; var i: usize = 0; while (i < std.math.min(expected_len, actual_len) and dida.util.deepEqual(expected_change_batches[i].changes, actual_change_batches.items[i].changes)) i += 1; if (i < std.math.max(expected_len, actual_len)) { dida.util.dump(.{ .expected = expected_change_batches[i..], .actual = actual_change_batches.items[i..] }); return error.TestExpectedEqual; } } test "test shard graph reach" { var graph_builder = dida.core.GraphBuilder.init(allocator); defer graph_builder.deinit(); const subgraph_0 = dida.core.Subgraph{ .id = 0 }; const subgraph_1 = try graph_builder.addSubgraph(subgraph_0); const edges = try graph_builder.addNode(subgraph_0, .Input); const edges_1 = try graph_builder.addNode(subgraph_1, .{ .TimestampPush = .{ .input = edges } }); const reach_future = try graph_builder.addNode(subgraph_1, .{ .TimestampIncrement = .{ .input = null } }); const reach_index = try graph_builder.addNode(subgraph_1, .{ .Index = .{ .input = reach_future } }); const distinct_reach_index = try graph_builder.addNode(subgraph_1, .{ .Distinct = .{ .input = reach_index } }); var swapped_edges_mapper = dida.core.NodeSpec.MapSpec.Mapper{ .map_fn = (struct { fn swap(_: *dida.core.NodeSpec.MapSpec.Mapper, input: dida.core.Row) error{OutOfMemory}!dida.core.Row { var output_values = try allocator.alloc(dida.core.Value, 2); output_values[0] = try dida.util.deepClone(input.values[1], allocator); output_values[1] = try dida.util.deepClone(input.values[0], allocator); return dida.core.Row{ .values = output_values }; } }).swap, }; const swapped_edges = try graph_builder.addNode(subgraph_1, .{ .Map = .{ .input = edges_1, .mapper = &swapped_edges_mapper, }, }); const swapped_edges_index = try graph_builder.addNode(subgraph_1, .{ .Index = .{ .input = swapped_edges } }); const joined = try graph_builder.addNode(subgraph_1, .{ .Join = .{ .inputs = .{ distinct_reach_index, swapped_edges_index, }, .key_columns = 1, }, }); var without_middle_mapper = dida.core.NodeSpec.MapSpec.Mapper{ .map_fn = (struct { fn drop_middle(_: *dida.core.NodeSpec.MapSpec.Mapper, input: dida.core.Row) error{OutOfMemory}!dida.core.Row { var output_values = try allocator.alloc(dida.core.Value, 2); output_values[0] = try dida.util.deepClone(input.values[2], allocator); output_values[1] = try dida.util.deepClone(input.values[1], allocator); return dida.core.Row{ .values = output_values }; } }).drop_middle, }; const without_middle = try graph_builder.addNode(subgraph_1, .{ .Map = .{ .input = joined, .mapper = &without_middle_mapper, }, }); const reach = try graph_builder.addNode(subgraph_1, .{ .Union = .{ .inputs = .{ edges_1, without_middle } } }); graph_builder.connectLoop(reach, reach_future); const reach_pop = try graph_builder.addNode(subgraph_0, .{ .TimestampPop = .{ .input = distinct_reach_index } }); const reach_out = try graph_builder.addNode(subgraph_0, .{ .Output = .{ .input = reach_pop } }); var reducer = dida.core.NodeSpec.ReduceSpec.Reducer{ .reduce_fn = (struct { fn concat(_: *dida.core.NodeSpec.ReduceSpec.Reducer, reduced_value: dida.core.Value, row: dida.core.Row, count: usize) !dida.core.Value { var string = std.ArrayList(u8).init(allocator); try string.appendSlice(reduced_value.String); var i: usize = 0; while (i < count) : (i += 1) { try string.appendSlice(row.values[1].String); } return dida.core.Value{ .String = string.toOwnedSlice() }; } }).concat, }; const reach_summary = try graph_builder.addNode(subgraph_1, .{ .Reduce = .{ .input = distinct_reach_index, .key_columns = 1, .init_value = .{ .String = "" }, .reducer = &reducer, } }); const reach_summary_out = try graph_builder.addNode(subgraph_1, .{ .Output = .{ .input = reach_summary } }); var graph = try graph_builder.finishAndReset(); defer graph.deinit(); var shard = try dida.core.Shard.init(allocator, &graph); defer shard.deinit(); const timestamp0 = dida.core.Timestamp{ .coords = &[_]u64{0} }; const timestamp1 = dida.core.Timestamp{ .coords = &[_]u64{1} }; const timestamp2 = dida.core.Timestamp{ .coords = &[_]u64{2} }; const ab = dida.core.Row{ .values = &[_]dida.core.Value{ .{ .String = "a" }, .{ .String = "b" } } }; const bc = dida.core.Row{ .values = &[_]dida.core.Value{ .{ .String = "b" }, .{ .String = "c" } } }; const bd = dida.core.Row{ .values = &[_]dida.core.Value{ .{ .String = "b" }, .{ .String = "d" } } }; const ca = dida.core.Row{ .values = &[_]dida.core.Value{ .{ .String = "c" }, .{ .String = "a" } } }; try shard.pushInput(edges, .{ .row = try dida.util.deepClone(ab, allocator), .timestamp = try dida.util.deepClone(timestamp0, allocator), .diff = 1 }); try shard.pushInput(edges, .{ .row = try dida.util.deepClone(bc, allocator), .timestamp = try dida.util.deepClone(timestamp0, allocator), .diff = 1 }); try shard.pushInput(edges, .{ .row = try dida.util.deepClone(bd, allocator), .timestamp = try dida.util.deepClone(timestamp0, allocator), .diff = 1 }); try shard.pushInput(edges, .{ .row = try dida.util.deepClone(ca, allocator), .timestamp = try dida.util.deepClone(timestamp0, allocator), .diff = 1 }); try shard.pushInput(edges, .{ .row = try dida.util.deepClone(bc, allocator), .timestamp = try dida.util.deepClone(timestamp1, allocator), .diff = -1 }); try shard.flushInput(edges); try shard.advanceInput(edges, timestamp1); while (shard.hasWork()) try shard.doWork(); try testNodeOutput(&shard, reach_out, .{ .{ .{ .{ "a", "b" }, .{0}, 1 }, .{ .{ "b", "c" }, .{0}, 1 }, .{ .{ "b", "d" }, .{0}, 1 }, .{ .{ "c", "a" }, .{0}, 1 }, }, .{ .{ .{ "a", "c" }, .{0}, 1 }, .{ .{ "a", "d" }, .{0}, 1 }, .{ .{ "b", "a" }, .{0}, 1 }, .{ .{ "c", "b" }, .{0}, 1 }, }, .{ .{ .{ "a", "a" }, .{0}, 1 }, .{ .{ "b", "b" }, .{0}, 1 }, .{ .{ "c", "c" }, .{0}, 1 }, .{ .{ "c", "d" }, .{0}, 1 }, }, }); try testNodeOutput(&shard, reach_summary_out, .{ .{ .{ .{ "a", "b" }, .{ 0, 1 }, 1 }, .{ .{ "b", "cd" }, .{ 0, 1 }, 1 }, .{ .{ "c", "a" }, .{ 0, 1 }, 1 }, }, .{ .{ .{ "a", "b" }, .{ 0, 2 }, -1 }, .{ .{ "b", "cd" }, .{ 0, 2 }, -1 }, .{ .{ "c", "a" }, .{ 0, 2 }, -1 }, .{ .{ "a", "bcd" }, .{ 0, 2 }, 1 }, .{ .{ "b", "acd" }, .{ 0, 2 }, 1 }, .{ .{ "c", "ab" }, .{ 0, 2 }, 1 }, }, .{ .{ .{ "a", "bcd" }, .{ 0, 3 }, -1 }, .{ .{ "b", "acd" }, .{ 0, 3 }, -1 }, .{ .{ "c", "ab" }, .{ 0, 3 }, -1 }, .{ .{ "a", "abcd" }, .{ 0, 3 }, 1 }, .{ .{ "b", "abcd" }, .{ 0, 3 }, 1 }, .{ .{ "c", "abcd" }, .{ 0, 3 }, 1 }, }, }); try shard.advanceInput(edges, timestamp2); while (shard.hasWork()) try shard.doWork(); try testNodeOutput(&shard, reach_out, .{ .{ .{ .{ "b", "c" }, .{1}, -1 }, }, .{ .{ .{ "a", "c" }, .{1}, -1 }, .{ .{ "b", "a" }, .{1}, -1 }, }, .{ .{ .{ "a", "a" }, .{1}, -1 }, .{ .{ "b", "b" }, .{1}, -1 }, .{ .{ "c", "c" }, .{1}, -1 }, }, }); try testNodeOutput(&shard, reach_summary_out, .{ .{ .{ .{ "b", "cd" }, .{ 1, 1 }, -1 }, .{ .{ "b", "d" }, .{ 1, 1 }, 1 }, }, .{ .{ .{ "a", "bcd" }, .{ 1, 2 }, -1 }, .{ .{ "b", "cd" }, .{ 1, 2 }, 1 }, .{ .{ "b", "acd" }, .{ 1, 2 }, -1 }, .{ .{ "a", "bd" }, .{ 1, 2 }, 1 }, }, .{ .{ .{ "a", "bcd" }, .{ 1, 3 }, 1 }, .{ .{ "b", "acd" }, .{ 1, 3 }, 1 }, .{ .{ "a", "abcd" }, .{ 1, 3 }, -1 }, .{ .{ "b", "abcd" }, .{ 1, 3 }, -1 }, .{ .{ "c", "abcd" }, .{ 1, 3 }, -1 }, .{ .{ "c", "abd" }, .{ 1, 3 }, 1 }, }, }); } pub fn testShardTotalBalance() !void { var graph_builder = dida.core.GraphBuilder.init(allocator); defer graph_builder.deinit(); const subgraph_0 = dida.core.Subgraph{ .id = 0 }; // transactions look like (from, to, amount) const transactions = try graph_builder.addNode(subgraph_0, .Input); var credits_mapper = dida.core.NodeSpec.MapSpec.Mapper{ .map_fn = (struct { fn map(_: *dida.core.NodeSpec.MapSpec.Mapper, input: dida.core.Row) error{OutOfMemory}!dida.core.Row { // (to, amount) var output_values = try allocator.alloc(dida.core.Value, 2); output_values[0] = try dida.util.deepClone(input.values[1], allocator); output_values[1] = try dida.util.deepClone(input.values[2], allocator); return dida.core.Row{ .values = output_values }; } }).map, }; const account_credits = try graph_builder.addNode(subgraph_0, .{ .Map = .{ .input = transactions, .mapper = &credits_mapper, } }); const account_credits_index = try graph_builder.addNode(subgraph_0, .{ .Index = .{ .input = account_credits } }); var debits_mapper = dida.core.NodeSpec.MapSpec.Mapper{ .map_fn = (struct { fn map(_: *dida.core.NodeSpec.MapSpec.Mapper, input: dida.core.Row) error{OutOfMemory}!dida.core.Row { // (from, amount) var output_values = try allocator.alloc(dida.core.Value, 2); output_values[0] = try dida.util.deepClone(input.values[0], allocator); output_values[1] = try dida.util.deepClone(input.values[2], allocator); return dida.core.Row{ .values = output_values }; } }).map, }; const account_debits = try graph_builder.addNode(subgraph_0, .{ .Map = .{ .input = transactions, .mapper = &debits_mapper, } }); const account_debits_index = try graph_builder.addNode(subgraph_0, .{ .Index = .{ .input = account_debits } }); var summer = dida.core.NodeSpec.ReduceSpec.Reducer{ .reduce_fn = (struct { fn sum(_: *dida.core.NodeSpec.ReduceSpec.Reducer, reduced_value: dida.core.Value, row: dida.core.Row, count: usize) !dida.core.Value { return dida.core.Value{ .Number = reduced_value.Number + (row.values[1].Number * @intToFloat(f64, count)) }; } }).sum, }; const account_credit = try graph_builder.addNode(subgraph_0, .{ .Reduce = .{ .input = account_credits_index, .key_columns = 1, .init_value = .{ .Number = 0 }, .reducer = &summer, } }); const account_debit = try graph_builder.addNode(subgraph_0, .{ .Reduce = .{ .input = account_debits_index, .key_columns = 1, .init_value = .{ .Number = 0 }, .reducer = &summer, } }); const credit_and_debit = try graph_builder.addNode(subgraph_0, .{ .Join = .{ .inputs = .{ account_credit, account_debit, }, .key_columns = 1, } }); var balance_mapper = dida.core.NodeSpec.MapSpec.Mapper{ .map_fn = (struct { fn map(_: *dida.core.NodeSpec.MapSpec.Mapper, input: dida.core.Row) error{OutOfMemory}!dida.core.Row { // (account, credit - debit) var output_values = try allocator.alloc(dida.core.Value, 2); output_values[0] = try dida.util.deepClone(input.values[0], allocator); output_values[1] = .{ .Number = input.values[1].Number - input.values[2].Number }; return dida.core.Row{ .values = output_values }; } }).map, }; const balance = try graph_builder.addNode(subgraph_0, .{ .Map = .{ .input = credit_and_debit, .mapper = &balance_mapper, } }); const balance_index = try graph_builder.addNode(subgraph_0, .{ .Index = .{ .input = balance } }); const total_balance = try graph_builder.addNode(subgraph_0, .{ .Reduce = .{ .input = balance_index, .key_columns = 0, .init_value = .{ .Number = 0 }, .reducer = &summer, } }); const total_balance_out = try graph_builder.addNode(subgraph_0, .{ .Output = .{ .input = total_balance } }); var graph = try graph_builder.finishAndReset(); defer graph.deinit(); var shard = try dida.core.Shard.init(allocator, &graph); defer shard.deinit(); // TODO this is a hack to get around the fact that empty reduces don't return any results, which makes the join not work out var account: usize = 0; while (account <= std.math.maxInt(u4)) : (account += 1) { const row = dida.core.Row{ .values = &[_]dida.core.Value{ .{ .Number = @intToFloat(f64, account) }, .{ .Number = @intToFloat(f64, account) }, .{ .Number = @intToFloat(f64, 0) }, } }; const timestamp = dida.core.Timestamp{ .coords = &[_]u64{0} }; try shard.pushInput(transactions, .{ .row = try dida.util.deepClone(row, allocator), .timestamp = try dida.util.deepClone(timestamp, allocator), .diff = 1 }); } try shard.advanceInput(transactions, .{ .coords = &[_]u64{1} }); while (shard.hasWork()) try shard.doWork(); try testNodeOutput(&shard, total_balance_out, .{.{.{ .{0}, .{0}, 1 }}}); var rng = std.rand.DefaultPrng.init(0); const random = rng.random(); var time: usize = 1; while (time < 100) : (time += 1) { const from_account = random.int(u4); const to_account = random.int(u4); const amount = random.int(u8); const skew = random.int(u3); const row = dida.core.Row{ .values = &[_]dida.core.Value{ .{ .Number = @intToFloat(f64, from_account) }, .{ .Number = @intToFloat(f64, to_account) }, .{ .Number = @intToFloat(f64, amount) }, } }; const timestamp = dida.core.Timestamp{ .coords = &[_]u64{time + @as(usize, skew)} }; try shard.pushInput(transactions, .{ .row = try dida.util.deepClone(row, allocator), .timestamp = try dida.util.deepClone(timestamp, allocator), .diff = 1 }); try shard.advanceInput(transactions, .{ .coords = &[_]u64{time + 1} }); while (shard.hasWork()) try shard.doWork(); try testNodeOutput(&shard, total_balance_out, .{}); } // this time, add all the inputs before doing work while (time < 200) : (time += 1) { const from_account = random.int(u4); const to_account = random.int(u4); const amount = random.int(u8); const skew = random.int(u3); const row = dida.core.Row{ .values = &[_]dida.core.Value{ .{ .Number = @intToFloat(f64, from_account) }, .{ .Number = @intToFloat(f64, to_account) }, .{ .Number = @intToFloat(f64, amount) }, } }; const timestamp = dida.core.Timestamp{ .coords = &[_]u64{time + @as(usize, skew)} }; try shard.pushInput(transactions, .{ .row = try dida.util.deepClone(row, allocator), .timestamp = try dida.util.deepClone(timestamp, allocator), .diff = 1 }); try shard.advanceInput(transactions, .{ .coords = &[_]u64{time + 1} }); } while (shard.hasWork()) try shard.doWork(); try testNodeOutput(&shard, total_balance_out, .{}); // this time, do one big input batch while (time < 300) : (time += 1) { const from_account = random.int(u4); const to_account = random.int(u4); const amount = random.int(u8); const skew = random.int(u3); const row = dida.core.Row{ .values = &[_]dida.core.Value{ .{ .Number = @intToFloat(f64, from_account) }, .{ .Number = @intToFloat(f64, to_account) }, .{ .Number = @intToFloat(f64, amount) }, } }; const timestamp = dida.core.Timestamp{ .coords = &[_]u64{time + @as(usize, skew)} }; try shard.pushInput(transactions, .{ .row = try dida.util.deepClone(row, allocator), .timestamp = try dida.util.deepClone(timestamp, allocator), .diff = 1 }); } try shard.advanceInput(transactions, .{ .coords = &[_]u64{time + 1} }); while (shard.hasWork()) try shard.doWork(); try testNodeOutput(&shard, total_balance_out, .{}); } test "test shard total balance" { try testShardTotalBalance(); } ================================================ FILE: test/should_panic/reentry.zig ================================================ const std = @import("std"); const dida = @import("../../lib/dida.zig"); var gpa = std.heap.GeneralPurposeAllocator(.{ .safety = true, .never_unmap = true, }){}; var arena = std.heap.ArenaAllocator.init(&gpa.allocator); const allocator = &arena.allocator; pub fn main() !void { var sugar = dida.sugar.Sugar.init(allocator); const edges = sugar.input(); const loop = sugar.loop(); _ = loop.importNode(loop.exportNode(loop.importNode(edges))); sugar.build(); } ================================================ FILE: test.sh ================================================ #! /usr/bin/env bash set -ue echo 'Running unit tests' nix-shell ./shell.nix --run 'zig test test/core.zig --main-pkg-path ./' echo 'Checking that node bindings build' pushd bindings/node nix-shell ./shell.nix --run 'zig build install && zig build run-codegen' popd echo 'Checking that wasm bindings build' pushd bindings/wasm nix-shell ./shell.nix --run 'zig build install && zig build run-codegen' popd echo 'Checking that debugger builds' pushd debugger nix-shell ./shell.nix --run 'zig build install' popd