Repository: ocaml-multicore/domainslib Branch: main Commit: 2a884868ff69 Files: 45 Total size: 80.3 KB Directory structure: gitextract_mn8ugyhl/ ├── .github/ │ └── workflows/ │ └── main.yml ├── .gitignore ├── CHANGES.md ├── CODE_OF_CONDUCT.md ├── LICENSE.md ├── Makefile ├── README.md ├── domainslib.opam ├── dune-project ├── lib/ │ ├── chan.ml │ ├── chan.mli │ ├── domainslib.ml │ ├── dune │ ├── fun_queue.ml │ ├── fun_queue.mli │ ├── multi_channel.ml │ ├── task.ml │ └── task.mli └── test/ ├── LU_decomposition_multicore.ml ├── backtrace.ml ├── chan_stm_tests.ml ├── dune ├── enumerate_par.ml ├── fib.ml ├── fib_par.ml ├── game_of_life.ml ├── game_of_life_multicore.ml ├── kcas_integration.ml ├── off_by_one.ml ├── prefix_sum.ml ├── spectralnorm2.ml ├── spectralnorm2_multicore.ml ├── sum_par.ml ├── summed_area_table.ml ├── task_more_deps.ml ├── task_one_dep.ml ├── task_parallel.ml ├── task_throughput.ml ├── test_chan.ml ├── test_deadlock.ml ├── test_parallel_find.ml ├── test_parallel_scan.ml ├── test_task.ml ├── test_task_crash.ml └── test_task_empty.ml ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/main.yml ================================================ name: main on: pull_request: push: schedule: # Prime the caches every Monday - cron: 0 1 * * MON jobs: windows: runs-on: windows-latest env: QCHECK_MSG_INTERVAL: '60' steps: - name: Checkout code uses: actions/checkout@v2 - name: Use OCaml ${{ matrix.ocaml-compiler }} uses: ocaml/setup-ocaml@v2 with: opam-pin: false opam-depext: false ocaml-compiler: ocaml.5.0.0,ocaml-option-mingw opam-repositories: | dra27: https://github.com/dra27/opam-repository.git#windows-5.0 default: https://github.com/ocaml-opam/opam-repository-mingw.git#sunset upstream: https://github.com/ocaml/opam-repository.git cache-prefix: ${{ steps.multicore_hash.outputs.commit }} - run: opam install . --deps-only --with-test - run: opam exec -- dune build - run: opam exec -- dune runtest ================================================ FILE: .gitignore ================================================ *~ _build .merlin ================================================ FILE: CHANGES.md ================================================ ## 0.5.2 * Upgrade to Saturn 1.0 (#129, @Sudha247) * Update README.md instruction to use OCaml 5.1.0 (#123, @punchagan) * Fix Saturn.Queue function (#121, @Sudha247) * Make parallel_scan work on noncommutative functions (#118, @aytao) * Test condition tweaks (#113, @jmid) * Adjust PBTs based on recommended_domain_count (#112, @jmid) ## 0.5.1 * Add parallel_find (#90, @gasche) * Update CI (#93, @Sudha247) * Optimisation to work-stealing (#96, @art-w) * Improve docs presentation (#99, @metanivek) * Property based tests (#100, jmid) * Task: avoid double handler installation (#101, @gasche & @clef-men) * Fix a benign data-race in Chan reported by ocaml-tsan (#103, @art-w) * Dune, opam, and GitHub Actions fixes (#105, @MisterDA) * domain local await support (#107, @polytypic) * Windows run on GitHub Actions (#110, @Sudha247) * Adjust PBTs based on recommended_domain_count (#112, @jmid) * Test condition tweaks (#113, @jmid) ## 0.5.0 This release includes: * Bug fix for `parallel_for_reduce` on empty loops. * Make Chan.t and Task.promise injective #69 * Add lockfree dependency #70 * CI fixes (#73, #76) * Breaking change: Rename `num_additional_domains` to `num_domains` for setup_pool * Documentation updates (#80, #81, #82) ## 0.4.2 Includes Effect.eff -> Effect.t change from OCaml trunk. (#65) ## 0.4.1 This release fixes compatibility with OCaml 5.00.0+trunk in #61. Breaks compatibility with older Multicore variants 4.12.0+domains and 4.12.0+domains+effects ## 0.4.0 This release includes: * Usage of effect handlers for task creation. This introduces a breaking change; all computations need to be enclosed in a Task.run function. See #51. * Multi_channel uses a per-channel domain-local key, removing the global key. #50 * Bug fixes in parallel_scan. #60 ## 0.3.2 Corresponding updates for breaking changes introduced in ocaml-multicore/ocaml-multicore#704 * Updated with the new interface Domain.cpu_relax * Domain.timer_ticks replaced with Mirage clock. ## 0.3.1 * #45 adds support for named pools. This is a breaking change with setup_pool taking an optional name parameter and an extra unit parameter. * A minor bug fix in parallel_for_reduce. ## 0.3.0 This release includes: * A breaking change for Task pools where the num_domains argument has been renamed num_additional_domains to clear up potential confusion; see #31. * A new work-stealing scheduler for Task pools using domain local Chase Lev deques #29; this can improve performance significantly for some workloads. * A removal of closure allocation in Chan #28. * A move to using the Mutex & Condition modules for the implementation of Chan #24. * Various documentation and packaging improvements (#21, #27, #30, #32). ## 0.2.2 Updates to: * parallel_for to use new task distribution algorithm and allow default chunk_size (#16) * parallel_for_reduce to use new task distribution algorithm and allow default chunk_size parameter (#18) ## 0.2.1 * `recv_poll` made non-allocating * Addition of parallel_scan #5 ## 0.2.0 * New Tasks library with support for async/await parallelism and parallel for loops. * Adds support for non-blocking Chan.send_poll and Chan.recv_poll. Thanks to @gasche for API design discussions. ## 0.1.0 Initial release ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Code of Conduct This project has adopted the [OCaml Code of Conduct](https://github.com/ocaml/code-of-conduct/blob/main/CODE_OF_CONDUCT.md). # Enforcement This project follows the OCaml Code of Conduct [enforcement policy](https://github.com/ocaml/code-of-conduct/blob/main/CODE_OF_CONDUCT.md#enforcement). To report any violations, please contact: * KC Sivaramakrishnan * Sudha Parimala * Vesa Karvonen ================================================ FILE: LICENSE.md ================================================ Copyright (c) 2016 KC Sivaramakrishnan Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ================================================ FILE: Makefile ================================================ all: dune build @install run_test: OCAMLRUNPARAM="b=1" dune runtest -f clean: dune clean ================================================ FILE: README.md ================================================ # Domainslib - Nested-parallel programming Domainslib provides support for nested-parallel programming. Domainslib provides async/await mechanism for spawning parallel tasks and awaiting their results. On top of this mechanism, domainslib provides parallel iteration functions. At its core, domainslib has an efficient implementation of work-stealing queue in order to efficiently share tasks with other domains. Here is a _sequential_ program that computes nth Fibonacci number using recursion: ```ocaml (* fib.ml *) let n = try int_of_string Sys.argv.(1) with _ -> 1 let rec fib n = if n < 2 then 1 else fib (n - 1) + fib (n - 2) let main () = let r = fib n in Printf.printf "fib(%d) = %d\n%!" n r let _ = main () ``` We can parallelise this program using Domainslib: ```ocaml (* fib_par.ml *) let num_domains = try int_of_string Sys.argv.(1) with _ -> 1 let n = try int_of_string Sys.argv.(2) with _ -> 1 (* Sequential Fibonacci *) let rec fib n = if n < 2 then 1 else fib (n - 1) + fib (n - 2) module T = Domainslib.Task let rec fib_par pool n = if n > 20 then begin let a = T.async pool (fun _ -> fib_par pool (n-1)) in let b = T.async pool (fun _ -> fib_par pool (n-2)) in T.await pool a + T.await pool b end else (* Call sequential Fibonacci if the available work is small *) fib n let main () = let pool = T.setup_pool ~num_domains:(num_domains - 1) () in let res = T.run pool (fun _ -> fib_par pool n) in T.teardown_pool pool; Printf.printf "fib(%d) = %d\n" n res let _ = main () ``` The parallel program scales nicely compared to the sequential version. The results presented below were obtained on a 2.3 GHz Quad-Core Intel Core i7 MacBook Pro with 4 cores and 8 hardware threads. ```bash $ hyperfine './fib.exe 42' './fib_par.exe 2 42' \ './fib_par.exe 4 42' './fib_par.exe 8 42' Benchmark 1: ./fib.exe 42 Time (mean ± sd): 1.217 s ± 0.018 s [User: 1.203 s, System: 0.004 s] Range (min … max): 1.202 s … 1.261 s 10 runs Benchmark 2: ./fib_par.exe 2 42 Time (mean ± sd): 628.2 ms ± 2.9 ms [User: 1243.1 ms, System: 4.9 ms] Range (min … max): 625.7 ms … 634.5 ms 10 runs Benchmark 3: ./fib_par.exe 4 42 Time (mean ± sd): 337.6 ms ± 23.4 ms [User: 1321.8 ms, System: 8.4 ms] Range (min … max): 318.5 ms … 377.6 ms 10 runs Benchmark 4: ./fib_par.exe 8 42 Time (mean ± sd): 250.0 ms ± 9.4 ms [User: 1877.1 ms, System: 12.6 ms] Range (min … max): 242.5 ms … 277.3 ms 11 runs Summary './fib_par2.exe 8 42' ran 1.35 ± 0.11 times faster than './fib_par.exe 4 42' 2.51 ± 0.10 times faster than './fib_par.exe 2 42' 4.87 ± 0.20 times faster than './fib.exe 42' ``` More example programs are available [here](https://github.com/ocaml-multicore/domainslib/tree/master/test). ## Installation You can install this library using `OPAM`. ```bash $ opam switch create 5.3.0 $ opam install domainslib ``` ## Development If you are interested in hacking on the implementation, then `opam pin` this repository: ```bash $ opam switch create 5.0.0+trunk --repo=default,alpha=git+https://github.com/kit-ty-kate/opam-alpha-repository.git $ git clone https://github.com/ocaml-multicore/domainslib $ cd domainslib $ opam pin add domainslib file://`pwd` ``` ================================================ FILE: domainslib.opam ================================================ # This file is generated by dune, edit dune-project instead opam-version: "2.0" synopsis: "Parallel Structures over Domains for Multicore OCaml" maintainer: ["KC Sivaramakrishnan " "Sudha Parimala"] authors: ["KC Sivaramakrishnan "] license: "ISC" homepage: "https://github.com/ocaml-multicore/domainslib" doc: "https://ocaml-multicore.github.io/domainslib/doc" bug-reports: "https://github.com/ocaml-multicore/domainslib/issues" depends: [ "dune" {>= "3.0"} "ocaml" {>= "5.0"} "saturn" {>= "1.0.0"} "domain-local-await" {>= "0.1.0"} "kcas" {>= "0.3.0" & with-test} "mirage-clock-unix" {with-test & >= "4.2.0"} "qcheck-core" {with-test & >= "0.20"} "qcheck-multicoretests-util" {with-test & >= "0.1"} "qcheck-stm" {with-test & >= "0.1"} "odoc" {with-doc} ] build: [ ["dune" "subst"] {dev} [ "dune" "build" "-p" name "-j" jobs "@install" "@runtest" {with-test} "@doc" {with-doc} ] ] dev-repo: "git+https://github.com/ocaml-multicore/domainslib.git" ================================================ FILE: dune-project ================================================ (lang dune 3.0) (name domainslib) (formatting disabled) (generate_opam_files true) (source (github ocaml-multicore/domainslib)) (authors "KC Sivaramakrishnan ") (maintainers "KC Sivaramakrishnan " "Sudha Parimala") (documentation "https://ocaml-multicore.github.io/domainslib/doc") (license "ISC") (package (name domainslib) (synopsis "Parallel Structures over Domains for Multicore OCaml") (depends (ocaml (>= "5.0")) (saturn (>= "0.4.0")) (domain-local-await (>= 0.1.0)) (kcas (and (>= 0.3.0) :with-test)) (mirage-clock-unix (and :with-test (>= "4.2.0"))) (qcheck-core (and :with-test (>= "0.20"))) (qcheck-multicoretests-util (and :with-test (>= "0.1"))) (qcheck-stm (and :with-test (>= "0.1"))))) ================================================ FILE: lib/chan.ml ================================================ (* mutex_condvar will be used per domain; so multiple fibers or systhreads may share a mutex_condvar variable *) type mutex_condvar = { mutex: Mutex.t; condition: Condition.t } type waiting_notified = | Waiting | Notified type 'a contents = | Empty of {receivers: ('a option ref * mutex_condvar) Fun_queue.t} | NotEmpty of {senders: ('a * waiting_notified ref * mutex_condvar) Fun_queue.t; messages: 'a Fun_queue.t} type 'a t = { buffer_size: int option; contents: 'a contents Atomic.t } let mutex_condvar_key = Domain.DLS.new_key (fun () -> let m = Mutex.create () in let c = Condition.create () in {mutex=m; condition=c}) let make_bounded n = if n < 0 then invalid_arg "Chan.make_bounded" ; {buffer_size= Some n; contents = Atomic.make (Empty {receivers= Fun_queue.empty; })} let make_unbounded () = {buffer_size= None; contents = Atomic.make (Empty {receivers= Fun_queue.empty})} (* [send'] is shared by both the blocking and polling versions. Returns a * boolean indicating whether the send was successful. Hence, it always returns * [true] if [polling] is [false]. *) let rec send' {buffer_size; contents} v ~polling = let open Fun_queue in let old_contents = Atomic.get contents in match old_contents with | Empty {receivers} -> begin (* The channel is empty (no senders) *) match pop receivers with | None -> (* The channel is empty (no senders) and no waiting receivers *) if buffer_size = Some 0 then (* The channel is empty (no senders), no waiting receivers, and * buffer size is 0 *) begin if not polling then begin (* The channel is empty (no senders), no waiting receivers, * buffer size is 0 and we're not polling *) let mc = Domain.DLS.get mutex_condvar_key in let cond_slot = ref Waiting in let new_contents = NotEmpty {messages= empty; senders= push empty (v, cond_slot, mc)} in if Atomic.compare_and_set contents old_contents new_contents then begin Mutex.lock mc.mutex; while !cond_slot = Waiting do Condition.wait mc.condition mc.mutex done; Mutex.unlock mc.mutex; true end else send' {buffer_size; contents} v ~polling end else (* The channel is empty (no senders), no waiting receivers, * buffer size is 0 and we're polling *) false end else (* The channel is empty (no senders), no waiting receivers, and * the buffer size is non-zero *) let new_contents = NotEmpty {messages= push empty v; senders= empty} in if Atomic.compare_and_set contents old_contents new_contents then true else send' {buffer_size; contents} v ~polling | Some ((r, mc), receivers') -> (* The channel is empty (no senders) and there are waiting receivers * *) let new_contents = Empty {receivers= receivers'} in if Atomic.compare_and_set contents old_contents new_contents then begin Mutex.lock mc.mutex; r := Some v; Mutex.unlock mc.mutex; Condition.broadcast mc.condition; true end else send' {buffer_size; contents} v ~polling end | NotEmpty {senders; messages} -> (* The channel is not empty *) if buffer_size = Some (length messages) then (* The channel is not empty, and the buffer is full *) begin if not polling then (* The channel is not empty, the buffer is full and we're not * polling *) let cond_slot = ref Waiting in let mc = Domain.DLS.get mutex_condvar_key in let new_contents = NotEmpty {senders= push senders (v, cond_slot, mc); messages} in if Atomic.compare_and_set contents old_contents new_contents then begin Mutex.lock mc.mutex; while !cond_slot = Waiting do Condition.wait mc.condition mc.mutex; done; Mutex.unlock mc.mutex; true end else send' {buffer_size; contents} v ~polling else (* The channel is not empty, the buffer is full and we're * polling *) false end else (* The channel is not empty, and the buffer is not full *) let new_contents = NotEmpty {messages= push messages v; senders} in if Atomic.compare_and_set contents old_contents new_contents then true else send' {buffer_size; contents} v ~polling let send c v = let r = send' c v ~polling:false in assert r let send_poll c v = send' c v ~polling:true (* [recv'] is shared by both the blocking and polling versions. Returns a an * optional value indicating whether the receive was successful. Hence, it * always returns [Some v] if [polling] is [false]. *) let rec recv' {buffer_size; contents} ~polling = let open Fun_queue in let old_contents = Atomic.get contents in match old_contents with | Empty {receivers} -> (* The channel is empty (no senders) *) if not polling then begin (* The channel is empty (no senders), and we're not polling *) let msg_slot = ref None in let mc = Domain.DLS.get mutex_condvar_key in let new_contents = Empty {receivers= push receivers (msg_slot, mc)} in if Atomic.compare_and_set contents old_contents new_contents then begin Mutex.lock mc.mutex; while !msg_slot = None do Condition.wait mc.condition mc.mutex; done; Mutex.unlock mc.mutex; !msg_slot end else recv' {buffer_size; contents} ~polling end else (* The channel is empty (no senders), and we're polling *) None | NotEmpty {senders; messages} -> (* The channel is not empty *) match (pop messages, pop senders) with | None, None -> (* The channel is not empty, but no senders or messages *) failwith "Chan.recv: Impossible - channel state" | Some (m, messages'), None -> (* The channel is not empty, there is a message and no * waiting senders *) let new_contents = if length messages' = 0 then Empty {receivers = empty} else NotEmpty {messages= messages'; senders} in if Atomic.compare_and_set contents old_contents new_contents then Some m else recv' {buffer_size; contents} ~polling | None, Some ((m, c, mc), senders') -> (* The channel is not empty, there are no messages, and there * is a waiting sender. This is only possible is the buffer * size is 0. *) assert (buffer_size = Some 0) ; let new_contents = if length senders' = 0 then Empty {receivers = empty} else NotEmpty {messages; senders= senders'} in if Atomic.compare_and_set contents old_contents new_contents then begin Mutex.lock mc.mutex; c := Notified; Mutex.unlock mc.mutex; Condition.broadcast mc.condition; Some m end else recv' {buffer_size; contents} ~polling | Some (m, messages'), Some ((ms, sc, mc), senders') -> (* The channel is not empty, there is a message, and there is a * waiting sender. *) let new_contents = NotEmpty {messages= push messages' ms; senders= senders'} in if Atomic.compare_and_set contents old_contents new_contents then begin Mutex.lock mc.mutex; sc := Notified; Mutex.unlock mc.mutex; Condition.broadcast mc.condition; Some m end else recv' {buffer_size; contents} ~polling let recv c = match recv' c ~polling:false with | None -> failwith "Chan.recv: impossible - no message" | Some m -> m let recv_poll c = match Atomic.get c.contents with | Empty _ -> None | _ -> recv' c ~polling:true ================================================ FILE: lib/chan.mli ================================================ type !'a t (** The type of channels *) val make_bounded : int -> 'a t (** [make_bounded n] makes a bounded channel with a buffer of size [n]. Raises [Invalid_argument "Chan.make_bounded"] if the buffer size is less than 0. With a buffer size of 0, the send operation becomes synchronous. With a buffer size of 1, you get the familiar MVar structure. The channel may be shared between many sending and receiving domains. *) val make_unbounded : unit -> 'a t (** Returns an unbounded channel *) val send : 'a t -> 'a -> unit (** [send c v] sends the values [v] over the channel [c]. If the channel buffer is full then the sending domain blocks until space becomes available. *) val send_poll : 'a t -> 'a -> bool (** [send_poll c v] attempts to send the value [v] over the channel [c]. If the channel buffer is not full, the message is sent and returns [true]. Otherwise, returns [false]. *) val recv : 'a t -> 'a (** [recv c] returns a value [v] received over the channel. If the channel buffer is empty then the domain blocks until a message is sent on the channel. *) val recv_poll : 'a t -> 'a option (** [recv_poll c] attempts to receive a message on the channel [c]. If a message [v] is available on the channel then [Some v] is returned. Otherwise, returns [None]. *) ================================================ FILE: lib/domainslib.ml ================================================ module Chan = Chan module Task = Task ================================================ FILE: lib/dune ================================================ (library (name domainslib) (public_name domainslib) (libraries saturn domain-local-await)) ================================================ FILE: lib/fun_queue.ml ================================================ type 'a t = {length: int; front: 'a list; back: 'a list} let empty = {length= 0; front= []; back= []} let push {length; front; back} v = {length= length + 1; front; back= v :: back} let length {length; _} = length let pop {length; front; back} = match front with | [] -> ( match List.rev back with | [] -> None | x :: xs -> Some (x, {front= xs; length= length - 1; back= []}) ) | x :: xs -> Some (x, {front= xs; length= length - 1; back}) ================================================ FILE: lib/fun_queue.mli ================================================ type 'a t (** The type of functional queue *) val empty : 'a t (** Empty queue *) val length : 'a t -> int (** Returns the length of the queue *) val push : 'a t -> 'a -> 'a t (** [push q v] returns a new queue with [v] pushed to the back of [q] *) val pop : 'a t -> ('a * 'a t) option (** [pop q] returns [None] if the queue is empty. If the queue is non-empty, it returns [Some (v,q')] where [v] is the element popped from the head of [q] and [q'] is the rest of the queue. *) ================================================ FILE: lib/multi_channel.ml ================================================ (* * Copyright (c) 2021, Tom Kelly * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) module Ws_deque = Saturn.Work_stealing_deque type mutex_condvar = { mutex: Mutex.t; condition: Condition.t } type waiting_status = | Waiting | Released type dls_state = { mutable id: int; mutable steal_offsets: int array; rng_state: Random.State.t; mc: mutex_condvar; } module Foreign_queue = Saturn.Queue type 'a t = { channels: 'a Ws_deque.t array; (* Queue for enqueuing work from outside of the pool. *) foreign_queue: 'a Foreign_queue.t; waiters: (waiting_status ref * mutex_condvar ) Chan.t; next_domain_id: int Atomic.t; recv_block_spins: int; dls_key: dls_state Domain.DLS.key; } let dls_make_key () = Domain.DLS.new_key (fun () -> { id = -1; steal_offsets = Array.make 1 0; rng_state = Random.State.make_self_init (); mc = {mutex=Mutex.create (); condition=Condition.create ()}; }) let rec log2 n = if n <= 1 then 0 else 1 + (log2 (n asr 1)) let make ?(recv_block_spins = 2048) n = { channels = Array.init n (fun _ -> Ws_deque.create ()); foreign_queue = Foreign_queue.create (); waiters = Chan.make_unbounded (); next_domain_id = Atomic.make 0; recv_block_spins; dls_key = dls_make_key () } let register_domain mchan = let id = Atomic.fetch_and_add mchan.next_domain_id 1 in assert(id < Array.length mchan.channels); id let init_domain_state mchan dls_state = let id = register_domain mchan in let len = Array.length mchan.channels in dls_state.id <- id; dls_state.steal_offsets <- Array.init (len - 1) (fun i -> (id + i + 1) mod len); dls_state [@@inline never] let get_local_state mchan = let dls_state = Domain.DLS.get mchan.dls_key in if dls_state.id >= 0 then begin assert (dls_state.id < Array.length mchan.channels); dls_state end else (init_domain_state mchan dls_state) [@@inline] let clear_local_state mchan = let dls_state = Domain.DLS.get mchan.dls_key in dls_state.id <- (-1) let rec check_waiters mchan = match Chan.recv_poll mchan.waiters with | None -> () | Some (status, mc) -> (* avoid the lock if we possibly can *) if !status = Released then check_waiters mchan else begin Mutex.lock mc.mutex; match !status with | Waiting -> begin status := Released; Mutex.unlock mc.mutex; Condition.broadcast mc.condition end | Released -> begin (* this waiter is already released, it might have found something on a poll *) Mutex.unlock mc.mutex; check_waiters mchan end end let send_foreign mchan v = Foreign_queue.push mchan.foreign_queue v; check_waiters mchan let send mchan v = let id = (get_local_state mchan).id in Ws_deque.push (Array.unsafe_get mchan.channels id) v; check_waiters mchan let rec recv_poll_loop mchan dls cur_offset = let offsets = dls.steal_offsets in let k = (Array.length offsets) - cur_offset in if k = 0 then raise Exit else begin let idx = cur_offset + (Random.State.int dls.rng_state k) in let t = Array.unsafe_get offsets idx in let channel = Array.unsafe_get mchan.channels t in try Ws_deque.steal_exn channel with | Saturn.Work_stealing_deque.Empty -> begin Array.unsafe_set offsets idx (Array.unsafe_get offsets cur_offset); Array.unsafe_set offsets cur_offset t; recv_poll_loop mchan dls (cur_offset+1) end end let recv_poll_with_dls mchan dls = try Ws_deque.pop_exn (Array.unsafe_get mchan.channels dls.id) with | Saturn.Work_stealing_deque.Empty -> match Foreign_queue.pop_opt mchan.foreign_queue with | None -> recv_poll_loop mchan dls 0 | Some v -> v [@@inline] let recv_poll mchan = recv_poll_with_dls mchan (get_local_state mchan) let rec recv_poll_repeated mchan dls repeats = try recv_poll_with_dls mchan dls with | Exit -> if repeats = 1 then raise Exit else begin Domain.cpu_relax (); recv_poll_repeated mchan dls (repeats - 1) end let rec recv mchan = let dls = get_local_state mchan in try recv_poll_repeated mchan dls mchan.recv_block_spins with Exit -> begin (* Didn't find anything, prepare to block: * - enqueue our wait block in the waiter queue * - check the queue again * - go to sleep if our wait block has not been notified * - when notified retry the recieve *) let status = ref Waiting in let mc = dls.mc in Chan.send mchan.waiters (status, mc); try let v = recv_poll mchan in (* need to check the status as might take an item which is not the one an existing sender has woken us to take *) Mutex.lock mc.mutex; begin match !status with | Waiting -> (status := Released; Mutex.unlock mc.mutex) | Released -> (* we were simultaneously released from a sender; so need to release a waiter *) (Mutex.unlock mc.mutex; check_waiters mchan) end; v with | Exit -> if !status = Waiting then begin Mutex.lock mc.mutex; while !status = Waiting do Condition.wait mc.condition mc.mutex done; Mutex.unlock mc.mutex end; recv mchan end ================================================ FILE: lib/task.ml ================================================ open Effect open Effect.Deep type 'a task = unit -> 'a type message = | Work of (unit -> unit) (* Invariant: the Work function does not need to run under the 'step' handler, it installs its own handler or re-invokes a deep-handler continuation. *) | Quit type task_chan = message Multi_channel.t type pool_data = { domains : unit Domain.t array; task_chan : task_chan; name: string option } type pool = pool_data option Atomic.t type 'a promise_state = Returned of 'a | Raised of exn * Printexc.raw_backtrace | Pending of (('a, unit) continuation * task_chan) list type 'a promise = 'a promise_state Atomic.t type _ t += Wait : 'a promise * task_chan -> 'a t let get_pool_data p = match Atomic.get p with | None -> invalid_arg "pool already torn down" | Some p -> p let cont v (k, c) = Multi_channel.send c (Work (fun _ -> continue k v)) let discont e bt (k, c) = Multi_channel.send c (Work (fun _ -> discontinue_with_backtrace k e bt)) let do_task (type a) (f : unit -> a) (p : a promise) : unit = let action, result = try let v = f () in cont v, Returned v with e -> let bt = Printexc.get_raw_backtrace () in discont e bt, Raised (e, bt) in match Atomic.exchange p result with | Pending l -> List.iter action l | _ -> failwith "Task.do_task: impossible, can only set result of task once" let await pool promise = let pd = get_pool_data pool in match Atomic.get promise with | Returned v -> v | Raised (e, bt) -> Printexc.raise_with_backtrace e bt | Pending _ -> perform (Wait (promise, pd.task_chan)) let step (type a) (f : a -> unit) (v : a) : unit = try_with f v { effc = fun (type a) (e : a t) -> match e with | Wait (p,c) -> Some (fun (k : (a, _) continuation) -> let rec loop () = let old = Atomic.get p in match old with | Pending l -> if Atomic.compare_and_set p old (Pending ((k,c)::l)) then () else (Domain.cpu_relax (); loop ()) | Returned v -> continue k v | Raised (e,bt) -> discontinue_with_backtrace k e bt in loop ()) | _ -> None } let async pool f = let pd = get_pool_data pool in let p = Atomic.make (Pending []) in Multi_channel.send pd.task_chan (Work (fun _ -> step (do_task f) p)); p let prepare_for_await chan () = let promise = Atomic.make (Pending []) in let release () = match Atomic.get promise with | (Returned _ | Raised _) -> () | Pending _ -> match Atomic.exchange promise (Returned ()) with | Pending ks -> ks |> List.iter @@ fun (k, c) -> Multi_channel.send_foreign c (Work (fun _ -> continue k ())) | _ -> () and await () = match Atomic.get promise with | (Returned _ | Raised _) -> () | Pending _ -> perform (Wait (promise, chan)) in Domain_local_await.{ release; await } let rec worker task_chan = match Multi_channel.recv task_chan with | Quit -> Multi_channel.clear_local_state task_chan | Work f -> f (); worker task_chan let worker task_chan = Domain_local_await.using ~prepare_for_await:(prepare_for_await task_chan) ~while_running:(fun () -> worker task_chan) let run (type a) pool (f : unit -> a) : a = let pd = get_pool_data pool in let p = Atomic.make (Pending []) in step (fun _ -> do_task f p) (); let rec loop () : a = match Atomic.get p with | Pending _ -> begin try match Multi_channel.recv_poll pd.task_chan with | Work f -> f () | Quit -> failwith "Task.run: tasks are active on pool" with Exit -> Domain.cpu_relax () end; loop () | Returned v -> v | Raised (e, bt) -> Printexc.raise_with_backtrace e bt in loop () let run pool f = Domain_local_await.using ~prepare_for_await:(prepare_for_await (get_pool_data pool).task_chan) ~while_running:(fun () -> run pool f) let named_pools = Hashtbl.create 8 let named_pools_mutex = Mutex.create () let setup_pool ?name ~num_domains () = if num_domains < 0 then invalid_arg "Task.setup_pool: num_domains must be at least 0" else let task_chan = Multi_channel.make (num_domains+1) in let domains = Array.init num_domains (fun _ -> Domain.spawn (fun _ -> worker task_chan)) in let p = Atomic.make (Some {domains; task_chan; name}) in begin match name with | None -> () | Some x -> Mutex.lock named_pools_mutex; Hashtbl.add named_pools x p; Mutex.unlock named_pools_mutex end; p let teardown_pool pool = let pd = get_pool_data pool in for _i=1 to Array.length pd.domains do Multi_channel.send pd.task_chan Quit done; Multi_channel.clear_local_state pd.task_chan; Array.iter Domain.join pd.domains; (* Remove the pool from the table *) begin match pd.name with | None -> () | Some n -> Mutex.lock named_pools_mutex; Hashtbl.remove named_pools n; Mutex.unlock named_pools_mutex end; Atomic.set pool None let lookup_pool name = Mutex.lock named_pools_mutex; let p = Hashtbl.find_opt named_pools name in Mutex.unlock named_pools_mutex; p let get_num_domains pool = let pd = get_pool_data pool in Array.length pd.domains + 1 let parallel_for_reduce ?(chunk_size=0) ~start ~finish ~body pool reduce_fun init = let pd = get_pool_data pool in let chunk_size = if chunk_size > 0 then chunk_size else begin let n_domains = (Array.length pd.domains) + 1 in let n_tasks = finish - start + 1 in if n_domains = 1 then n_tasks else max 1 (n_tasks/(8*n_domains)) end in let rec work s e = if e - s < chunk_size then let rec loop i acc = if i > e then acc else loop (i+1) (reduce_fun acc (body i)) in loop (s+1) (body s) else begin let d = s + ((e - s) / 2) in let p = async pool (fun _ -> work s d) in let right = work (d+1) e in let left = await pool p in reduce_fun left right end in if finish < start then init else reduce_fun init (work start finish) let parallel_for ?(chunk_size=0) ~start ~finish ~body pool = let pd = get_pool_data pool in let chunk_size = if chunk_size > 0 then chunk_size else begin let n_domains = (Array.length pd.domains) + 1 in let n_tasks = finish - start + 1 in if n_domains = 1 then n_tasks else max 1 (n_tasks/(8*n_domains)) end in let rec work pool fn s e = if e - s < chunk_size then for i = s to e do fn i done else begin let d = s + ((e - s) / 2) in let left = async pool (fun _ -> work pool fn s d) in work pool fn (d+1) e; await pool left end in work pool body start finish let parallel_scan pool op elements = let pd = get_pool_data pool in let n = Array.length elements in let p = min (n - 1) ((Array.length pd.domains) + 1) in let prefix_s = Array.copy elements in let scan_part op elements prefix_sum start finish = assert (Array.length elements > (finish - start)); for i = (start + 1) to finish do prefix_sum.(i) <- op prefix_sum.(i - 1) elements.(i) done in if p < 2 then begin (* Do a sequential scan when number of domains or array's length is less than 2 *) scan_part op elements prefix_s 0 (n - 1); prefix_s end else begin let add_offset op prefix_sum offset start finish = assert (Array.length prefix_sum > (finish - start)); for i = start to finish do prefix_sum.(i) <- op offset prefix_sum.(i) done in parallel_for pool ~chunk_size:1 ~start:0 ~finish:(p - 1) ~body:(fun i -> let s = (i * n) / (p ) in let e = (i + 1) * n / (p ) - 1 in scan_part op elements prefix_s s e); let x = ref prefix_s.(n/p - 1) in for i = 2 to p do let ind = i * n / p - 1 in x := op !x prefix_s.(ind); prefix_s.(ind) <- !x done; parallel_for pool ~chunk_size:1 ~start:1 ~finish:(p - 1) ~body:( fun i -> let s = i * n / (p) in let e = (i + 1) * n / (p) - 2 in let offset = prefix_s.(s - 1) in add_offset op prefix_s offset s e ); prefix_s end let parallel_find (type a) ?(chunk_size=0) ~start ~finish ~body pool = let pd = get_pool_data pool in let found : a option Atomic.t = Atomic.make None in let chunk_size = if chunk_size > 0 then chunk_size else begin let n_domains = (Array.length pd.domains) + 1 in let n_tasks = finish - start + 1 in if n_domains = 1 then n_tasks else max 1 (n_tasks/(8*n_domains)) end in let rec work pool fn s e = if e - s < chunk_size then let i = ref s in while !i <= e && Option.is_none (Atomic.get found) do begin match fn !i with | None -> () | Some _ as some -> Atomic.set found some end; incr i; done else if Option.is_some (Atomic.get found) then () else begin let d = s + ((e - s) / 2) in let left = async pool (fun _ -> work pool fn s d) in work pool fn (d+1) e; await pool left end in work pool body start finish; Atomic.get found ================================================ FILE: lib/task.mli ================================================ type 'a task = unit -> 'a (** Type of task *) type !'a promise (** Type of promises *) type pool (** Type of task pool *) val setup_pool : ?name:string -> num_domains:int -> unit -> pool (** Sets up a task execution pool with [num_domains] new domains. If [name] is provided, the pool is mapped to [name] which can be looked up later with [lookup_pool name]. When [num_domains] is 0, the new pool will be empty, and when an empty pool is in use, every function in this module will run effectively sequentially, using the calling domain as the only available domain. Raises {!Invalid_argument} when [num_domains] is less than 0. *) val teardown_pool : pool -> unit (** Tears down the task execution pool. *) val lookup_pool : string -> pool option (** [lookup_pool name] returns [Some pool] if [pool] is associated to [name] or returns [None] if no value is associated to it. *) val get_num_domains : pool -> int (** [get_num_domains pool] returns the total number of domains in [pool] including the parent domain. *) val run : pool -> 'a task -> 'a (** [run p t] runs the task [t] synchronously with the calling domain and the domains in the pool [p]. If the task [t] blocks on a promise, then tasks from the pool [p] are executed until the promise blocking [t] is resolved. This function should be used at the top level to enclose the calls to other functions that may await on promises. This includes {!await}, {!parallel_for} and its variants. Otherwise, those functions will raise [Unhandled] exception. *) val async : pool -> 'a task -> 'a promise (** [async p t] runs the task [t] asynchronously in the pool [p]. The function returns a promise [r] in which the result of the task [t] will be stored. *) val await : pool -> 'a promise -> 'a (** [await p r] waits for the promise [r] to be resolved. During the resolution, other tasks in the pool [p] might be run using the calling domain and/or the domains in the pool [p]. If the task associated with the promise have completed successfully, then the result of the task will be returned. If the task have raised an exception, then [await] raises the same exception. Must be called with a call to {!run} in the dynamic scope to handle the internal algebraic effects for task synchronization. *) val parallel_for : ?chunk_size:int -> start:int -> finish:int -> body:(int -> unit) -> pool -> unit (** [parallel_for c s f b p] behaves similar to [for i=s to f do b i done], but runs the for loop in parallel with the calling domain and/or the domains in the pool [p]. The chunk size [c] determines the number of body applications done in one task; this will default to [max(1, (finish-start + 1) / (8 * num_domains))]. Individual iterations may be run in any order. Tasks are distributed to the participating domains using a divide-and-conquer scheme. Must be called with a call to {!run} in the dynamic scope to handle the internal algebraic effects for task synchronization. *) val parallel_for_reduce : ?chunk_size:int -> start:int -> finish:int -> body:(int -> 'a) -> pool -> ('a -> 'a -> 'a) -> 'a -> 'a (** [parallel_for_reduce c s f b p r i] is similar to [parallel_for] except that the result returned by each iteration is reduced with [r] with initial value [i]. The reduce operations are performed in an arbitrary order and the reduce function needs to be associative in order to obtain a deterministic result. Must be called with a call to {!run} in the dynamic scope to handle the internal algebraic effects for task synchronization. *) val parallel_scan : pool -> ('a -> 'a -> 'a) -> 'a array -> 'a array (** [parallel_scan p op a] computes the scan of the array [a] in parallel with binary operator [op] and returns the result array, using the calling domain and/or the domains in the pool [p]. Scan is similar to [Array.fold_left] but returns an array of reduced intermediate values. The reduce operations are performed in an arbitrary order and the reduce function needs to be associative in order to obtain a deterministic result. Must be called with a call to {!run} in the dynamic scope to handle the internal algebraic effects for task synchronization. *) val parallel_find : ?chunk_size:int -> start:int -> finish:int -> body:(int -> 'a option) -> pool -> 'a option (** [parallel_find ~start ~finish ~body pool] calls [body] in parallel on the indices from [start] to [finish], in any order, until at least one of them returns [Some v]. Search stops when a value is found, but there is no guarantee that it stops as early as possible, other calls to [body] may happen in parallel or afterwards. See {!parallel_for} for the description of the [chunk_size] parameter and the scheduling strategy. Must be called with a call to {!run} in the dynamic scope to handle the internal algebraic effects for task synchronization. *) ================================================ FILE: test/LU_decomposition_multicore.ml ================================================ module T = Domainslib.Task let num_domains = try int_of_string Sys.argv.(1) with _ -> 1 let mat_size = try int_of_string Sys.argv.(2) with _ -> 1200 let k = Domain.DLS.new_key Random.State.make_self_init module SquareMatrix = struct let parallel_create pool f : float array = let fa = Array.create_float (mat_size * mat_size) in T.parallel_for pool ~start:0 ~finish:( mat_size * mat_size - 1) ~body:(fun i -> fa.(i) <- f (i / mat_size) (i mod mat_size)); fa let get (m : float array) r c = m.(r * mat_size + c) let set (m : float array) r c v = m.(r * mat_size + c) <- v let parallel_copy pool a = let n = Array.length a in let copy_part a b i = let s = (i * n / num_domains) in let e = (i+1) * n / num_domains - 1 in Array.blit a s b s (e - s + 1) in let b = Array.create_float n in let rec aux acc num_domains i = if (i = num_domains) then (List.iter (fun e -> T.await pool e) acc) else begin aux ((T.async pool (fun _ -> copy_part a b i))::acc) num_domains (i+1) end in aux [] num_domains 0; b end open SquareMatrix let lup pool (a0 : float array) = let a = parallel_copy pool a0 in for k = 0 to (mat_size - 2) do T.parallel_for pool ~start:(k + 1) ~finish:(mat_size -1) ~body:(fun row -> let factor = get a row k /. get a k k in for col = k + 1 to mat_size-1 do set a row col (get a row col -. factor *. (get a k col)) done; set a row k factor ) done ; a let () = let pool = T.setup_pool ~num_domains:(num_domains - 1) () in T.run pool (fun _ -> let a = parallel_create pool (fun _ _ -> (Random.State.float (Domain.DLS.get k) 100.0) +. 1.0 ) in let lu = lup pool a in let _l = parallel_create pool (fun i j -> if i > j then get lu i j else if i = j then 1.0 else 0.0) in let _u = parallel_create pool (fun i j -> if i <= j then get lu i j else 0.0) in ()); T.teardown_pool pool ================================================ FILE: test/backtrace.ml ================================================ module T = Domainslib.Task let rec foo i = if i = 0 then () else begin ignore (failwith "exn"); foo i end [@@inline never] let rec bar i = if i = 0 then () else begin foo i; bar i end [@@inline never] let main () = let pool = T.setup_pool ~num_domains:0 () in T.run pool (fun _ -> let p = T.async pool (fun _ -> bar 42) in T.await pool p; Printf.printf "should not reach here\n%!"); T.teardown_pool pool let _ = Printexc.record_backtrace true; try main () with _ -> let open Printexc in let bt = get_raw_backtrace () in let bt_slot_arr = Option.get (backtrace_slots bt) in let name = Option.get (Slot.name bt_slot_arr.(1)) in assert (name = "Backtrace.foo" || name = "Dune__exe__Backtrace.foo"); let s = raw_backtrace_to_string bt in print_string s ================================================ FILE: test/chan_stm_tests.ml ================================================ open QCheck open Domainslib open STM (** This contains sequential and parallel model-based tests of [Domainslib.Chan] *) module ChConf = struct type state = int list type sut = int Domainslib.Chan.t type cmd = | Send of int | Send_poll of int | Recv | Recv_poll let show_cmd c = match c with | Send i -> "Send" ^ (string_of_int i) | Send_poll i -> "Send_poll" ^ (string_of_int i) | Recv -> "Recv" | Recv_poll -> "Recv_poll" let capacity = 8 let arb_cmd s = let int_gen = Gen.nat in QCheck.make ~print:show_cmd (if s=[] then Gen.oneof [Gen.map (fun i -> Send i) int_gen; Gen.map (fun i -> Send_poll i) int_gen; Gen.return Recv_poll] (* don't generate blocking Recv cmds on an empty channel *) else if List.length s >= capacity then Gen.oneof [Gen.map (fun i -> Send_poll i) int_gen; Gen.return Recv; Gen.return Recv_poll] (* don't generate blocking Send cmds on a full channel *) else Gen.oneof [Gen.map (fun i -> Send i) int_gen; Gen.map (fun i -> Send_poll i) int_gen; Gen.return Recv; Gen.return Recv_poll]) let init_state = [] let init_sut () = Chan.make_bounded capacity let cleanup _ = () let next_state c s = match c with | Send i -> if List.length s < capacity then s@[i] else s | Send_poll i -> if List.length s < capacity then s@[i] else s | Recv -> begin match s with [] -> [] | _::s' -> s' end | Recv_poll -> begin match s with [] -> [] | _::s' -> s' end let precond c s = match c,s with | Recv, [] -> false | Send _, _ -> List.length s < capacity | _, _ -> true let run c chan = match c with | Send i -> Res (unit, Chan.send chan i) | Send_poll i -> Res (bool, Chan.send_poll chan i) | Recv -> Res (int, Chan.recv chan) | Recv_poll -> Res (option int, Chan.recv_poll chan) let postcond c s res = match c,res with | Send _, Res ((Unit,_),_) -> (List.length s < capacity) | Send_poll _, Res ((Bool,_),res) -> res = (List.length s < capacity) | Recv, Res ((Int,_),res) -> (match s with [] -> false | res'::_ -> Int.equal res res') | Recv_poll, Res ((Option Int,_),opt) -> (match s with [] -> None | res'::_ -> Some res') = opt | _,_ -> false end module ChT_seq = STM_sequential.Make(ChConf) module ChT_dom = STM_domain.Make(ChConf) let () = let count = 500 in QCheck_base_runner.run_tests_main [ ChT_seq.agree_test ~count ~name:"STM Domainslib.Chan test sequential"; ChT_dom.agree_test_par ~count ~name:"STM Domainslib.Chan test parallel"; ] ================================================ FILE: test/dune ================================================ (test (name test_chan) (libraries domainslib) (modules test_chan)) (test (name fib) (modules fib)) (test (name fib_par) (libraries domainslib) (modules fib_par)) (test (name kcas_integration) (libraries domainslib kcas) (modules kcas_integration)) (test (name enumerate_par) (libraries domainslib) (modules enumerate_par)) (test (name game_of_life) (modules game_of_life)) (test (name game_of_life_multicore) (libraries domainslib) (modules game_of_life_multicore)) (test (name LU_decomposition_multicore) (libraries domainslib) (flags (:standard -runtime-variant d)) (modules LU_decomposition_multicore) (enabled_if (or (= %{arch_sixtyfour} true) (<> %{architecture} arm)))) ;; disabled temporarily on arm32 due to failure: ocaml/ocaml#12267 (test (name spectralnorm2) (modules spectralnorm2)) (test (name sum_par) (libraries domainslib) (modules sum_par)) (test (name task_throughput) (libraries domainslib mirage-clock-unix) (modules task_throughput)) (test (name spectralnorm2_multicore) (libraries domainslib) (modules spectralnorm2_multicore)) (test (name summed_area_table) (libraries domainslib) (modules summed_area_table)) (test (name prefix_sum) (libraries domainslib unix) (modules prefix_sum)) (test (name test_task) (libraries domainslib) (modules test_task)) (test (name test_parallel_find) (libraries domainslib) (modules test_parallel_find)) (test (name test_parallel_scan) (libraries domainslib) (modules test_parallel_scan)) (test (name test_deadlock) (libraries domainslib) (modules test_deadlock)) (test (name test_task_crash) (libraries domainslib) (modules test_task_crash)) (test (name test_task_empty) (libraries domainslib) (modules test_task_empty)) (test (name backtrace) (libraries domainslib) (modules backtrace) (enabled_if (<> %{system} mingw64)) ;; triggers a known issue on mingw https://github.com/ocaml/ocaml/pull/12231 (modes byte native)) ;; byte_complete .exes don't include debug+trace info https://github.com/ocaml/dune/issues/7845 ;; so on a bytecode switch/platform we build a plain bytecode version w/trace info ;; and rename it to .exe (rule (target backtrace.exe) (action (copy backtrace.bc backtrace.exe)) (enabled_if (and (= %{bin-available:ocamlopt} false) (<> %{system} mingw64)))) (test (name off_by_one) (libraries domainslib) (modules off_by_one)) ;; Custom property-based tests using QCheck (test (name task_one_dep) (modules task_one_dep) (libraries qcheck-multicoretests-util qcheck-core qcheck-core.runner domainslib) (enabled_if %{bin-available:ocamlopt}) ;; takes forever on bytecode (action (run %{test} --verbose))) (test (name task_more_deps) (modules task_more_deps) (libraries qcheck-multicoretests-util qcheck-core qcheck-core.runner domainslib) (enabled_if %{bin-available:ocamlopt}) ;; takes forever on bytecode (action (run %{test} --verbose))) (test (name task_parallel) (modules task_parallel) (libraries qcheck-multicoretests-util qcheck-core qcheck-core.runner domainslib) (action (run %{test} --verbose))) ;; STM_sequential and STM_domain test of Domainslib.Chan (test (name chan_stm_tests) (modules chan_stm_tests) (libraries qcheck-stm.sequential qcheck-stm.domain domainslib) (action (run %{test} --verbose))) ================================================ FILE: test/enumerate_par.ml ================================================ let num_domains = try int_of_string Sys.argv.(1) with _ -> 1 let n = try int_of_string Sys.argv.(2) with _ -> 100 module T = Domainslib.Task let _ = let p = T.setup_pool ~num_domains:(num_domains - 1) () in T.run p (fun _ -> T.parallel_for p ~start:0 ~finish:(n-1) ~chunk_size:16 ~body:(fun i -> print_string @@ Printf.sprintf "[%d] %d\n%!" (Domain.self () :> int) i)); T.teardown_pool p ================================================ FILE: test/fib.ml ================================================ let n = try int_of_string Sys.argv.(1) with _ -> 43 let rec fib n = if n < 2 then 1 else fib (n-1) + fib (n-2) let _ = Printf.printf "fib(%d) = %d\n" n (fib n) ================================================ FILE: test/fib_par.ml ================================================ let num_domains = try int_of_string Sys.argv.(1) with _ -> 1 let n = try int_of_string Sys.argv.(2) with _ -> 43 module T = Domainslib.Task let rec fib n = if n < 2 then 1 else fib (n-1) + fib (n-2) let rec fib_par pool n = if n <= 40 then fib n else let a = T.async pool (fun _ -> fib_par pool (n-1)) in let b = T.async pool (fun _ -> fib_par pool (n-2)) in T.await pool a + T.await pool b let main = let pool = T.setup_pool ~num_domains:(num_domains - 1) () in let res = T.run pool (fun _ -> fib_par pool n) in T.teardown_pool pool; Printf.printf "fib(%d) = %d\n" n res let () = main ================================================ FILE: test/game_of_life.ml ================================================ let n_times = try int_of_string Sys.argv.(1) with _ -> 20 let board_size = try int_of_string Sys.argv.(2) with _ -> 16 let rg = ref (Array.init board_size (fun _ -> Array.init board_size (fun _ -> Random.int 2))) let rg' = ref (Array.init board_size (fun _ -> Array.init board_size (fun _ -> Random.int 2))) let buf = Bytes.create board_size let get g x y = try g.(x).(y) with _ -> 0 let neighbourhood g x y = (get g (x-1) (y-1)) + (get g (x-1) (y )) + (get g (x-1) (y+1)) + (get g (x ) (y-1)) + (get g (x ) (y+1)) + (get g (x+1) (y-1)) + (get g (x+1) (y )) + (get g (x+1) (y+1)) let next_cell g x y = let n = neighbourhood g x y in match g.(x).(y), n with | 1, 0 | 1, 1 -> 0 (* lonely *) | 1, 4 | 1, 5 | 1, 6 | 1, 7 | 1, 8 -> 0 (* overcrowded *) | 1, 2 | 1, 3 -> 1 (* lives *) | 0, 3 -> 1 (* get birth *) | _ (* 0, (0|1|2|4|5|6|7|8) *) -> 0 (* barren *) let print g = for x = 0 to board_size - 1 do for y = 0 to board_size - 1 do if g.(x).(y) = 0 then Bytes.set buf y '.' else Bytes.set buf y 'o' done; print_endline (Bytes.unsafe_to_string buf) done; print_endline "" let next () = let g = !rg in let new_g = !rg' in for x = 0 to board_size - 1 do for y = 0 to board_size - 1 do new_g.(x).(y) <- next_cell g x y done done; rg := new_g; rg' := g let rec repeat n = match n with | 0 -> () | _ -> next (); repeat (n-1) let ()= print !rg; repeat n_times; print !rg ================================================ FILE: test/game_of_life_multicore.ml ================================================ let num_domains = try int_of_string Sys.argv.(1) with _ -> 1 let n_times = try int_of_string Sys.argv.(2) with _ -> 20 let board_size = try int_of_string Sys.argv.(3) with _ -> 16 module T = Domainslib.Task let rg = ref (Array.init board_size (fun _ -> Array.init board_size (fun _ -> Random.int 2))) let rg' = ref (Array.init board_size (fun _ -> Array.init board_size (fun _ -> Random.int 2))) let buf = Bytes.create board_size let get g x y = try g.(x).(y) with _ -> 0 let neighbourhood g x y = (get g (x-1) (y-1)) + (get g (x-1) (y )) + (get g (x-1) (y+1)) + (get g (x ) (y-1)) + (get g (x ) (y+1)) + (get g (x+1) (y-1)) + (get g (x+1) (y )) + (get g (x+1) (y+1)) let next_cell g x y = let n = neighbourhood g x y in match g.(x).(y), n with | 1, 0 | 1, 1 -> 0 (* lonely *) | 1, 4 | 1, 5 | 1, 6 | 1, 7 | 1, 8 -> 0 (* overcrowded *) | 1, 2 | 1, 3 -> 1 (* lives *) | 0, 3 -> 1 (* get birth *) | _ (* 0, (0|1|2|4|5|6|7|8) *) -> 0 (* barren *) let print g = for x = 0 to board_size - 1 do for y = 0 to board_size - 1 do if g.(x).(y) = 0 then Bytes.set buf y '.' else Bytes.set buf y 'o' done; print_endline (Bytes.unsafe_to_string buf) done; print_endline "" let next pool = let g = !rg in let new_g = !rg' in T.parallel_for pool ~start:0 ~finish:(board_size - 1) ~body:(fun x -> for y = 0 to board_size - 1 do new_g.(x).(y) <- next_cell g x y done); rg := new_g; rg' := g let rec repeat pool n = match n with | 0-> () | _-> next pool; repeat pool (n-1) let ()= let pool = T.setup_pool ~num_domains:(num_domains - 1) () in print !rg; T.run pool (fun _ -> repeat pool n_times); print !rg; T.teardown_pool pool ================================================ FILE: test/kcas_integration.ml ================================================ open Kcas module T = Domainslib.Task let var = Loc.make None let () = let n = 100 in let pool_domain = Domain.spawn @@ fun () -> let pool = T.setup_pool ~num_domains:(Domain.recommended_domain_count () - 2) () in T.run pool (fun () -> T.parallel_for ~start:1 ~finish:n ~body:(fun i -> ignore @@ Loc.update var @@ function None -> Some i | _ -> Retry.later ()) pool); T.teardown_pool pool; Printf.printf "Done\n%!" in for _ = 1 to n do match Loc.update var @@ function None -> Retry.later () | Some _ -> None with | None -> failwith "impossible" | Some i -> Printf.printf "Got %d\n%!" i done; Domain.join pool_domain ================================================ FILE: test/off_by_one.ml ================================================ open Domainslib let print_array a = let b = Buffer.create 25 in Buffer.add_string b "[|"; Array.iter (fun elem -> Buffer.add_string b (string_of_int elem ^ "; ")) a; Buffer.add_string b "|]"; Buffer.contents b let r = Array.init 20 (fun i -> i + 1) let scan_task num_doms = try let pool = Task.setup_pool ~num_domains:num_doms () in let a = Task.run pool (fun () -> Task.parallel_scan pool (+) (Array.make 20 1)) in Task.teardown_pool pool; Printf.printf "%i: %s\n%!" num_doms (print_array a); assert (a = r) with Failure msg -> begin assert (msg = "failed to allocate domain"); Printf.printf "Failed to allocate %i domains, recommended_domain_count: %i\n%!" num_doms (Domain.recommended_domain_count ()); end ;; for num_dom=0 to 21 do scan_task num_dom; done ================================================ FILE: test/prefix_sum.ml ================================================ module T = Domainslib.Task let num_domains = try int_of_string Sys.argv.(1) with _ -> 4 let n = try int_of_string Sys.argv.(2) with _ -> 100000 let gen n = Array.make n 1 (*(fun _ -> Random.int n)*) let prefix_sum pool = T.parallel_scan pool (+) let _ = let pool = T.setup_pool ~num_domains:(num_domains - 1) () in let arr = gen n in let t = Unix.gettimeofday() in ignore (T.run pool (fun _ -> prefix_sum pool arr)); Printf.printf "Execution time: %fs\n" (Unix.gettimeofday() -. t); T.teardown_pool pool ================================================ FILE: test/spectralnorm2.ml ================================================ (* The Computer Language Benchmarks Game * https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ * * Contributed by Sebastien Loisel * Cleanup by Troestler Christophe * Modified by Mauricio Fernandez *) let n = try int_of_string Sys.argv.(1) with _ -> 2000 let eval_A i j = 1. /. float((i+j)*(i+j+1)/2+i+1) let eval_A_times_u u v = let n = Array.length v - 1 in for i = 0 to n do let vi = ref 0. in for j = 0 to n do vi := !vi +. eval_A i j *. u.(j) done; v.(i) <- !vi done let eval_At_times_u u v = let n = Array.length v -1 in for i = 0 to n do let vi = ref 0. in for j = 0 to n do vi := !vi +. eval_A j i *. u.(j) done; v.(i) <- !vi done let eval_AtA_times_u u v = let w = Array.make (Array.length u) 0.0 in eval_A_times_u u w; eval_At_times_u w v let () = let u = Array.make n 1.0 and v = Array.make n 0.0 in for _i = 0 to 9 do eval_AtA_times_u u v; eval_AtA_times_u v u done; let vv = ref 0.0 and vBv = ref 0.0 in for i=0 to n-1 do vv := !vv +. v.(i) *. v.(i); vBv := !vBv +. u.(i) *. v.(i) done; Printf.printf "%0.9f\n" (sqrt(!vBv /. !vv)) ================================================ FILE: test/spectralnorm2_multicore.ml ================================================ (* The Computer Language Benchmarks Game * https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ * * Contributed by Sebastien Loisel * Cleanup by Troestler Christophe * Modified by Mauricio Fernandez *) let num_domains = try int_of_string Sys.argv.(1) with _ -> 1 let n = try int_of_string Sys.argv.(2) with _ -> 2000 module T = Domainslib.Task let eval_A i j = 1. /. float((i+j)*(i+j+1)/2+i+1) let eval_A_times_u pool u v = let n = Array.length v - 1 in T.parallel_for pool ~start:0 ~finish:n ~chunk_size:(n/num_domains) ~body:(fun i -> let vi = ref 0. in for j = 0 to n do vi := !vi +. eval_A i j *. u.(j) done; v.(i) <- !vi) let eval_At_times_u pool u v = let n = Array.length v -1 in T.parallel_for pool ~start:0 ~finish:n ~chunk_size:(n/num_domains) ~body:(fun i -> let vi = ref 0. in for j = 0 to n do vi := !vi +. eval_A j i *. u.(j) done; v.(i) <- !vi) let eval_AtA_times_u pool u v = let w = Array.make (Array.length u) 0.0 in eval_A_times_u pool u w; eval_At_times_u pool w v let () = let pool = T.setup_pool ~num_domains:(num_domains - 1) () in let u = Array.make n 1.0 and v = Array.make n 0.0 in T.run pool (fun _ -> for _i = 0 to 9 do eval_AtA_times_u pool u v; eval_AtA_times_u pool v u done); T.teardown_pool pool; let vv = ref 0.0 and vBv = ref 0.0 in for i=0 to n-1 do vv := !vv +. v.(i) *. v.(i); vBv := !vBv +. u.(i) *. v.(i) done; Printf.printf "%0.9f\n" (sqrt(!vBv /. !vv)) ================================================ FILE: test/sum_par.ml ================================================ let num_domains = try int_of_string Sys.argv.(1) with _ -> 2 let n = try int_of_string Sys.argv.(2) with _ -> 100 module T = Domainslib.Task let _ = (* use parallel_for_reduce *) let p = T.setup_pool ~num_domains:(num_domains - 1) () in let sum = T.run p (fun _ -> T.parallel_for_reduce p (+) 0 ~chunk_size:(n/(4*num_domains)) ~start:0 ~finish:(n-1) ~body:(fun _i -> 1)) in T.teardown_pool p; Printf.printf "Sum is %d\n" sum; assert (sum = n) let _ = (* explictly use empty pool and default chunk_size *) let p = T.setup_pool ~num_domains:0 () in let sum = Atomic.make 0 in T.run p (fun _ -> T.parallel_for p ~start:0 ~finish:(n-1) ~body:(fun _i -> ignore (Atomic.fetch_and_add sum 1))); let sum = Atomic.get sum in T.teardown_pool p; Printf.printf "Sum is %d\n" sum; assert (sum = n) let _ = (* configured num_domains and default chunk_size *) let p = T.setup_pool ~num_domains:(num_domains - 1) () in let sum = Atomic.make 0 in T.run p (fun _ -> T.parallel_for p ~start:0 ~finish:(n-1) ~body:(fun _i -> ignore (Atomic.fetch_and_add sum 1))); let sum = Atomic.get sum in T.teardown_pool p; Printf.printf "Sum is %d\n" sum; assert (sum = n) ================================================ FILE: test/summed_area_table.ml ================================================ module T = Domainslib.Task let num_domains = try int_of_string Sys.argv.(1) with _ -> 4 let size = try int_of_string Sys.argv.(2) with _ -> 100 let transpose a = let r = Array.length a in let c = Array.length a.(0) in let b = Array.copy a in for i = 0 to (pred r) do for j = 0 to (pred c) do b.(j).(i) <- a.(i).(j) done done; b let calc_table pool mat = let l = Array.length mat in let res = Array.copy mat in for i = 0 to (l - 1) do res.(i) <- T.parallel_scan pool (fun x y -> x + y) mat.(i) done; let k = transpose res in for i = 0 to (l - 1) do res.(i) <- T.parallel_scan pool (fun x y -> x + y) k.(i) done; (transpose res) let _ = let m = Array.make_matrix size size 1 (*Array.init size (fun _ -> Array.init size (fun _ -> Random.int size))*) in let pool = T.setup_pool ~num_domains:(num_domains - 1) () in let _ = T.run pool (fun _ -> calc_table pool m) in (* for i = 0 to size-1 do for j = 0 to size-1 do print_int a.(i).(j); print_string " " done; print_newline() done; *) T.teardown_pool pool ================================================ FILE: test/task_more_deps.ml ================================================ (** Generate tests of async+await from Domainslib.Task. It does so by generating a random, acyclic dependency graph of [async] tasks, each [await]ing on its dependency. *) open QCheck open Domainslib (* a simple work item, from ocaml/testsuite/tests/misc/takc.ml *) let rec tak x y z = if x > y then tak (tak (x-1) y z) (tak (y-1) z x) (tak (z-1) x y) else z let work () = for _ = 1 to 200 do assert (7 = tak 18 12 6); done (* Generates a DAG of dependencies *) (* Each task is represented by an array index w/a deps.list *) (* This example DAG A/0 <--- B/1 < ^. \ \ \ `- C/2 <--- D/3 is represented as: [| []; [0]; [0]; [1;2] |] *) let gen_dag n st = Array.init n (fun i -> let deps = ref [] in for dep = 0 to i-1 do if Gen.bool st then deps := dep :: !deps done; List.rev !deps) type test_input = { num_domains : int; length : int; dependencies : int list array } let show_test_input t = Printf.sprintf "{ num_domains : %i\n length : %i\n dependencies : %s }" t.num_domains t.length Print.(array (list int) t.dependencies) let shrink_deps test_input = let ls = Array.to_list test_input.dependencies in let is = Shrink.list ~shrink:Shrink.list ls in Iter.map (fun deps -> let len = List.length deps in let arr = Array.of_list deps in let deps = Array.mapi (fun i i_deps -> match i,i_deps with | 0, _ | _,[] -> [] | _,[0] -> [0] | _, _ -> List.map (fun j -> if j<0 || j>=len || j>=i (* ensure reduced dep is valid *) then ((j + i) mod i) else j) i_deps) arr in { test_input with length=len; dependencies=deps }) is let arb_deps domain_bound promise_bound = let gen_deps = Gen.(pair (int_bound (domain_bound-1)) (int_bound promise_bound) >>= fun (num_domains,length) -> let num_domains = succ num_domains in let length = succ length in gen_dag length >>= fun dependencies -> return { num_domains; length; dependencies }) in make ~print:show_test_input ~shrink:(shrink_deps) gen_deps let build_dep_graph pool test_input = let len = test_input.length in let deps = test_input.dependencies in let rec build i promise_acc = if i=len then promise_acc else let p = (match deps.(i) with | [] -> Task.async pool work | deps -> Task.async pool (fun () -> work (); List.iter (fun dep -> Task.await pool (List.nth promise_acc (i-1-dep))) deps)) in build (i+1) (p::promise_acc) in build 0 [] let test_one_pool ~domain_bound ~promise_bound = Test.make ~name:"Domainslib.Task.async/await, more deps, 1 work pool" ~count:100 (arb_deps domain_bound promise_bound) (Util.repeat 10 (fun test_input -> let pool = Task.setup_pool ~num_domains:test_input.num_domains () in Task.run pool (fun () -> let ps = build_dep_graph pool test_input in List.iter (fun p -> Task.await pool p) ps); Task.teardown_pool pool; true)) let () = QCheck_base_runner.run_tests_main [test_one_pool ~domain_bound:8 ~promise_bound:10] ================================================ FILE: test/task_one_dep.ml ================================================ (** Generate tests of async+await from Domainslib.Task. It does so by generating a random, acyclic dependency graph of [async] tasks, each [await]ing on its dependency. *) open QCheck open Domainslib (* a simple work item, from ocaml/testsuite/tests/misc/takc.ml *) let rec tak x y z = if x > y then tak (tak (x-1) y z) (tak (y-1) z x) (tak (z-1) x y) else z let work () = for _ = 1 to 200 do assert (7 = tak 18 12 6); done (* Generates a sparse DAG of dependencies *) (* Each task is represented by an array index w/at most 1 dep. each *) (* This example DAG A/0 <--- B/1 ^. \ `- C/2 <--- D/3 is represented as: [| None; Some 0; Some 0; Some 2 |] *) let gen_deps n st = let a = Array.make n None in for i=1 to n-1 do if Gen.bool st then a.(i) <- Some (Gen.int_bound (i-1) st) done; a type test_input = { num_domains : int; length : int; dependencies : int option array } let show_test_input t = Printf.sprintf "{ num_domains : %i\n length : %i\n dependencies : %s }" t.num_domains t.length Print.(array (option int) t.dependencies) let shrink_deps test_input = let ls = Array.to_list test_input.dependencies in let is = Shrink.list ~shrink:Shrink.(option nil) ls in Iter.map (fun deps -> let len = List.length deps in let arr = Array.of_list deps in let deps = Array.mapi (fun i j_opt -> match i,j_opt with | 0, _ | _,None -> None | _,Some 0 -> Some 0 | _, Some j -> if j<0 || j>=len || j>=i (* ensure reduced dep is valid *) then Some ((j + i) mod i) else Some j) arr in { test_input with length=len; dependencies=deps }) is let arb_deps domain_bound promise_bound = let gen_deps = Gen.(pair (int_bound (domain_bound-1)) (int_bound promise_bound) >>= fun (num_domains,length) -> let num_domains = succ num_domains in let length = succ length in gen_deps length >>= fun dependencies -> return { num_domains; length; dependencies }) in let shrink_input input = Iter.append (Iter.map (fun doms' -> { input with num_domains = doms' }) (Shrink.int input.num_domains)) (shrink_deps input) in make ~print:show_test_input ~shrink:shrink_input gen_deps let build_dep_graph pool test_input = let len = test_input.length in let deps = test_input.dependencies in let rec build i promise_acc = if i=len then promise_acc else let p = (match deps.(i) with | None -> Task.async pool work | Some dep -> Task.async pool (fun () -> work(); Task.await pool (List.nth promise_acc (i-1-dep)))) in build (i+1) (p::promise_acc) in build 0 [] let test_one_pool ~domain_bound ~promise_bound = Test.make ~name:"Domainslib.Task.async/await, one dep, 1 work pool" ~count:100 (arb_deps domain_bound promise_bound) (Util.repeat 10 @@ fun input -> let pool = Task.setup_pool ~num_domains:input.num_domains () in Task.run pool (fun () -> let ps = build_dep_graph pool input in List.iter (fun p -> Task.await pool p) ps); Task.teardown_pool pool; true) let test_two_pools_sync_last ~domain_bound ~promise_bound = let gen = arb_deps domain_bound promise_bound in Test.make ~name:"Domainslib.Task.async/await, one dep, w.2 pools, syncing at the end" ~count:100 (pair gen gen) (Util.repeat 10 @@ fun (input1,input2) -> try let pool1 = Task.setup_pool ~num_domains:input1.num_domains () in let pool2 = Task.setup_pool ~num_domains:input2.num_domains () in let ps1 = build_dep_graph pool1 input1 in let ps2 = build_dep_graph pool2 input2 in Task.run pool1 (fun () -> List.iter (fun p -> Task.await pool1 p) ps1); Task.run pool2 (fun () -> List.iter (fun p -> Task.await pool2 p) ps2); Task.teardown_pool pool1; Task.teardown_pool pool2; true with Failure err -> err = "failed to allocate domain") let test_two_nested_pools ~domain_bound ~promise_bound = let gen = arb_deps domain_bound promise_bound in Test.make ~name:"Domainslib.Task.async/await, one dep, w.2 nested pools" ~count:100 (pair gen gen) (Util.repeat 10 @@ fun (input1,input2) -> try let pool1 = Task.setup_pool ~num_domains:input1.num_domains () in let pool2 = Task.setup_pool ~num_domains:input2.num_domains () in Task.run pool1 (fun () -> Task.run pool2 (fun () -> let ps1 = build_dep_graph pool1 input1 in let ps2 = build_dep_graph pool2 input2 in List.iter (fun p -> Task.await pool1 p) ps1; List.iter (fun p -> Task.await pool2 p) ps2)); Task.teardown_pool pool1; Task.teardown_pool pool2; true with Failure err -> err = "failed to allocate domain") let () = let domain_bound = max 1 (Domain.recommended_domain_count () / 2) in let promise_bound = max 2 domain_bound in QCheck_base_runner.run_tests_main [ test_one_pool ~domain_bound ~promise_bound; test_two_pools_sync_last ~domain_bound ~promise_bound; test_two_nested_pools ~domain_bound ~promise_bound; ] ================================================ FILE: test/task_parallel.ml ================================================ open QCheck open Domainslib (** Property-based QCheck tests of Task.parallel_* *) let count = 250 let test_parallel_for = Test.make ~name:"Domainslib.Task.parallel_for test" ~count (triple (int_bound 10) small_nat small_nat) (fun (num_domains,array_size,chunk_size) -> let pool = Task.setup_pool ~num_domains () in let res = Task.run pool (fun () -> let a = Atomic.make 0 in Task.parallel_for ~chunk_size ~start:0 ~finish:(array_size-1) ~body:(fun _ -> Atomic.incr a) pool; Atomic.get a) in Task.teardown_pool pool; res = array_size) let test_parallel_for_reduce = Test.make ~name:"Domainslib.Task.parallel_for_reduce test" ~count (triple (int_bound 10) small_nat small_nat) (fun (num_domains,array_size,chunk_size) -> let pool = Task.setup_pool ~num_domains () in let res = Task.run pool (fun () -> Task.parallel_for_reduce ~chunk_size ~start:0 ~finish:(array_size-1) ~body:(fun _ -> 1) pool (+) 0) in Task.teardown_pool pool; res = array_size) let test_parallel_scan = Test.make ~name:"Domainslib.Task.parallel_scan test" ~count (pair (int_bound 10) small_nat) (fun (num_domains,array_size) -> let pool = Task.setup_pool ~num_domains () in let a = Task.run pool (fun () -> Task.parallel_scan pool (+) (Array.make array_size 1)) in Task.teardown_pool pool; a = Array.init array_size (fun i -> i + 1)) let () = QCheck_base_runner.run_tests_main [ test_parallel_for; test_parallel_for_reduce; test_parallel_scan; ] ================================================ FILE: test/task_throughput.ml ================================================ let n_domains = try int_of_string Sys.argv.(1) with _ -> 1 let n_iterations = try int_of_string Sys.argv.(2) with _ -> 1024 let n_tasks = try int_of_string Sys.argv.(3) with _ -> 1024 module T = Domainslib.Task module TimingHist = struct type t = { data: int array; min_n: int; max_n: int; mutable count: int; mutable sum : float; } let make min_n max_n = { data=Array.make (max_n - min_n) 0; min_n; max_n; count=0; sum=0. } let rec log2 n = if n <= 1 then 0 else 1 + log2(n asr 1) let add_point a x = let i = (log2 x) in let i = max (i-a.min_n+1) 0 in let i = min i ((Array.length a.data)-1) in a.data.(i) <- a.data.(i) + 1; a.sum <- a.sum +. (float_of_int x); a.count <- a.count + 1 let mean a = a.sum /. (float_of_int a.count) let print_hist a = Printf.printf "Timings (ns): n=%d mean=%.1f\n" a.count (mean a); let fn n = (Int.shift_left 1 (a.min_n+n)) in let len = Array.length a.data in for i = 0 to (len - 1) do match i with | i when i=0 -> Printf.printf " (%8d, %8d): %6d\n" 0 (fn i) a.data.(i); | i when i=(len-1) -> Printf.printf " [%8d, Inf): %6d\n" (fn (i-1)) a.data.(i); | i -> Printf.printf " [%8d, %8d): %6d\n" (fn (i-1)) (fn i) a.data.(i); done end let _ = Printf.printf "n_iterations: %d n_units: %d n_domains: %d\n" n_iterations n_tasks n_domains; let pool = T.setup_pool ~num_domains:(n_domains - 1) () in let hist = TimingHist.make 5 25 in for _ = 1 to n_iterations do let t0 = Mclock.elapsed_ns() in T.run pool (fun _ -> T.parallel_for pool ~start:1 ~finish:n_tasks ~body:(fun _ -> ())); let t = Int64.sub (Mclock.elapsed_ns ()) t0 in TimingHist.add_point hist (Int64.to_int t); done; TimingHist.print_hist hist; T.teardown_pool pool ================================================ FILE: test/test_chan.ml ================================================ let buffer_size = try int_of_string Sys.argv.(1) with _ -> 1 let num_items = try int_of_string Sys.argv.(2) with _ -> 100 let num_senders = try int_of_string Sys.argv.(3) with _ -> 1 let num_receivers = try int_of_string Sys.argv.(4) with _ -> 1 module C = Domainslib.Chan let c = C.make_bounded buffer_size let rec receiver i n = if i = n then print_endline @@ Printf.sprintf "Receiver on domain %d done" (Domain.self () :> int) else ( ignore @@ C.recv c; receiver (i+1) n ) let rec sender i n = if i = n then print_endline @@ Printf.sprintf "Sender on domain %d done" (Domain.self () :> int) else ( C.send c i; sender (i+1) n ) let _ = assert (num_items mod num_senders == 0); assert (num_items mod num_receivers == 0); let senders = Array.init num_senders (fun _ -> Domain.spawn (fun _ -> sender 0 (num_items / num_senders))) in let receivers = Array.init num_receivers (fun _ -> Domain.spawn (fun _ -> receiver 0 (num_items / num_receivers))) in Array.iter Domain.join senders; Array.iter Domain.join receivers; begin match C.recv_poll c with | None -> () | Some _ -> assert false end; for _i=1 to buffer_size do C.send c 0 done; for _i=1 to buffer_size do ignore (C.recv c) done; begin match C.recv_poll c with | None -> () | Some _ -> assert false end ================================================ FILE: test/test_deadlock.ml ================================================ (* Despite what the name says, this test will not deadlock. A similar test will * deadlock in the version not using effect handlers. See * https://github.com/ocaml-multicore/ocaml-multicore/issues/670 *) module T = Domainslib.Task let n = try int_of_string Sys.argv.(1) with _ -> 1_000_000 let rec loop n = if n = 0 then Printf.printf "Looping finished on domain %d\n%!" (Domain.self () :> int) else (Domain.cpu_relax (); loop (n-1)) let () = let pool = T.setup_pool ~num_domains:2 () in T.run pool (fun _ -> let a = T.async pool (fun _ -> Printf.printf "Task A running on domain %d\n%!" (Domain.self () :> int); loop n) in let b = T.async pool (fun _ -> Printf.printf "Task B running on domain %d\n%!" (Domain.self () :> int); T.await pool a) in let c = T.async pool (fun _ -> Printf.printf "Task C running on domain %d\n%!" (Domain.self () :> int); T.await pool b) in loop n; T.await pool c); T.teardown_pool pool ================================================ FILE: test/test_parallel_find.ml ================================================ let len = 1_000_000 let nb_needles = 4 let () = Random.init 42 let needles = Array.init nb_needles (fun _ -> Random.int len) let input = let t = Array.make len false in needles |> Array.iter (fun needle -> t.(needle) <- true ); t open Domainslib let search_needle pool ~chunk_size = Task.parallel_find pool ~chunk_size ~start:0 ~finish:(len - 1) ~body:(fun i -> if input.(i) then Some i else None ) let test_search pool ~chunk_size = match search_needle pool ~chunk_size with | None -> assert false | Some needle -> assert (Array.exists ((=) needle) needles) let () = (* [num_domains] is the number of *new* domains spawned by the pool performing computations in addition to the current domain. *) let num_domains = Domain.recommended_domain_count () - 1 in Printf.eprintf "test_parallel_find on %d domains.\n" (num_domains + 1); let pool = Task.setup_pool ~num_domains ~name:"pool" () in Task.run pool begin fun () -> [0; 16; 32; 1000] |> List.iter (fun chunk_size -> test_search pool ~chunk_size) end; Task.teardown_pool pool; prerr_endline "Success."; ================================================ FILE: test/test_parallel_scan.ml ================================================ let len = 1_000_000 let singleton_interval i = (i, i + 1) let combine_intervals interval1 interval2 = let b1, e1 = interval1 and b2, e2 = interval2 in if e1 <> b2 then begin Printf.eprintf "Invalid intervals: (%d, %d), (%d, %d)\n" b1 e1 b2 e2; assert false end else (b1, e2) open Domainslib let test_scan_ordering pool = let check_interval i interval = let (b, e) = interval in assert (b = 0 && e = i + 1) in Array.init len singleton_interval |> Task.parallel_scan pool combine_intervals |> Array.iteri check_interval let () = (* [num_domains] is the number of *new* domains spawned by the pool performing computations in addition to the current domain. *) let num_domains = Domain.recommended_domain_count () - 1 in Printf.eprintf "test_parallel_scan on %d domains.\n" (num_domains + 1); let pool = Task.setup_pool ~num_domains ~name:"pool" () in Task.run pool begin fun () -> test_scan_ordering pool end; Task.teardown_pool pool; prerr_endline "Success."; ================================================ FILE: test/test_task.ml ================================================ (* Generic tests for the task module *) (* Parallel for *) open Domainslib let modify_arr pool chunk_size = fun () -> let arr1 = Array.init 100 (fun i -> i + 1) in Task.parallel_for ~chunk_size ~start:0 ~finish:99 ~body:(fun i -> arr1.(i) <- arr1.(i) * 2) pool; let arr_res = Array.init 100 (fun i -> (i + 1) * 2) in assert (arr1 = arr_res) let inc_ctr pool chunk_size = fun () -> let ctr = Atomic.make 0 in Task.parallel_for ~chunk_size ~start:1 ~finish:1000 ~body:(fun _ -> Atomic.incr ctr) pool; assert (Atomic.get ctr = 1000) (* Parallel for reduce *) let sum_sequence pool chunk_size init = fun () -> let v = Task.parallel_for_reduce ~chunk_size ~start:1 ~finish:100 ~body:(fun i -> i) pool (+) init in assert (v = 5050 + init) (* Parallel scan *) let prefix_sum pool = fun () -> let prefix_s l = List.rev (List.fold_left (fun a y -> match a with | [] -> [y] | x::_ -> (x+y)::a) [] l) in let arr = Array.make 1000 1 in let v1 = Task.parallel_scan pool (+) arr in let ls = Array.to_list arr in let v2 = prefix_s ls in assert (v1 = Array.of_list v2) let () = let pool1 = Task.setup_pool ~num_domains:2 ~name:"pool1" () in let pool2 = Task.setup_pool ~num_domains:2 ~name:"pool2" () in Task.run pool1 (fun _ -> let p1 = Option.get @@ Task.lookup_pool "pool1" in modify_arr pool1 0 (); modify_arr pool1 25 (); modify_arr pool1 100 (); inc_ctr p1 0 (); inc_ctr p1 16 (); inc_ctr p1 32 (); inc_ctr p1 1000 ()); Task.run pool2 (fun _ -> let p2 = Option.get @@ Task.lookup_pool "pool2" in sum_sequence pool2 0 0 (); sum_sequence pool2 10 10 (); sum_sequence pool2 1 0 (); sum_sequence p2 1 10 (); sum_sequence p2 100 10 (); sum_sequence p2 100 100 (); prefix_sum p2 ()); Task.teardown_pool pool1; Task.teardown_pool pool2; try sum_sequence pool2 0 0 (); assert false with Invalid_argument _ -> (); assert (Task.lookup_pool "pool1" = None); try let _ = Task.setup_pool ~num_domains:(-1) () in () with Invalid_argument _ -> (); print_endline "ok" ================================================ FILE: test/test_task_crash.ml ================================================ open Domainslib (* a simple work item, from ocaml/testsuite/tests/misc/takc.ml *) let rec tak x y z = if x > y then tak (tak (x-1) y z) (tak (y-1) z x) (tak (z-1) x y) else z let work () = for _ = 1 to 200 do assert (7 = tak 18 12 6); done ;; begin let pool1 = Task.setup_pool ~num_domains:2 () in let pool2 = Task.setup_pool ~num_domains:1 () in let pool1_prom0 = Task.async pool1 work in let pool2_prom0 = Task.async pool2 work in let pool2_prom1 = Task.async pool2 work in Task.run pool1 (fun () -> List.iter (fun p -> Task.await pool1 p) [pool1_prom0]); Task.run pool2 (fun () -> List.iter (fun p -> Task.await pool2 p) [pool2_prom0; pool2_prom1]); Task.teardown_pool pool1; Task.teardown_pool pool2; end ================================================ FILE: test/test_task_empty.ml ================================================ open Domainslib let array_size = 0 let pool = Task.setup_pool ~num_domains:0 () let res = Task.run pool (fun () -> Task.parallel_for_reduce ~chunk_size:0 ~start:0 ~finish:(array_size-1) ~body:(fun _ -> 1) pool (+) 0);; Task.teardown_pool pool;; assert(res = array_size)