Repository: pramalhe/OneFile Branch: master Commit: 49654893f081 Files: 412 Total size: 3.5 MB Directory structure: gitextract_33ubg_0h/ ├── LICENSE.txt ├── README.md ├── common/ │ ├── HazardEras.hpp │ ├── HazardPointers.hpp │ ├── HazardPointersSimQueue.hpp │ ├── README.md │ ├── RIStaticPerThread.hpp │ ├── ThreadRegistry.cpp │ ├── ThreadRegistry.hpp │ └── pfences.h ├── datastructures/ │ ├── generic/ │ │ ├── TMHashMap.hpp │ │ ├── TMLinkedListQueue.hpp │ │ ├── TMLinkedListSet.hpp │ │ └── TMRedBlackBST.hpp │ ├── hashmaps/ │ │ ├── CRWWPSTMResizableHashSet.hpp │ │ ├── ESTMResizableHashSet.hpp │ │ ├── OFLFResizableHashSet.hpp │ │ ├── OFWFResizableHashSet.hpp │ │ └── TinySTMResizableHashSet.hpp │ ├── linkedlists/ │ │ ├── CRWWPLinkedListSet.hpp │ │ ├── ESTMLinkedListSet.hpp │ │ ├── HazardEras.hpp │ │ ├── HazardPointers.hpp │ │ ├── MagedHarrisLinkedListSetHE.hpp │ │ ├── MagedHarrisLinkedListSetHP.hpp │ │ ├── OFLFLinkedListSet.hpp │ │ ├── OFWFLinkedListSet.hpp │ │ ├── STMLinkedListSet.hpp │ │ └── TinySTMLinkedListSet.hpp │ ├── queues/ │ │ ├── CRWWPLinkedListQueue.hpp │ │ ├── ESTMArrayLinkedListQueue.hpp │ │ ├── ESTMLinkedListQueue.hpp │ │ ├── FAAArrayQueue.hpp │ │ ├── HazardPointers.hpp │ │ ├── HazardPointersSimQueue.hpp │ │ ├── LCRQueue.hpp │ │ ├── MichaelScottQueue.hpp │ │ ├── OFLFArrayLinkedListQueue.hpp │ │ ├── OFLFArrayQueue.hpp │ │ ├── OFLFLinkedListQueue.hpp │ │ ├── OFWFArrayLinkedListQueue.hpp │ │ ├── OFWFLinkedListQueue.hpp │ │ ├── README.md │ │ ├── SimQueue.hpp │ │ ├── TinySTMArrayLinkedListQueue.hpp │ │ ├── TinySTMLinkedListQueue.hpp │ │ └── TurnQueue.hpp │ ├── sequential/ │ │ ├── HashSet.hpp │ │ ├── LinkedListQueue.hpp │ │ ├── LinkedListSet.hpp │ │ ├── RedBlackBST.hpp │ │ ├── SortedArraySet.hpp │ │ ├── SortedVectorSet.hpp │ │ └── TreeSet.hpp │ ├── treemaps/ │ │ ├── ESTMRedBlackTree.hpp │ │ ├── HazardEras.hpp │ │ ├── NatarajanTreeHE.hpp │ │ ├── OFLFRedBlackTree.hpp │ │ ├── OFWFRedBlackTree.hpp │ │ └── TinySTMRedBlackTree.hpp │ ├── trevor_brown_abtree/ │ │ ├── Makefile │ │ ├── TrevorBrownABTree.hpp │ │ ├── common/ │ │ │ ├── atomic_ops/ │ │ │ │ ├── atomic_ops/ │ │ │ │ │ ├── generalize-small.h │ │ │ │ │ ├── generalize.h │ │ │ │ │ └── sysdeps/ │ │ │ │ │ ├── README │ │ │ │ │ ├── acquire_release_volatile.h │ │ │ │ │ ├── aligned_atomic_load_store.h │ │ │ │ │ ├── all_acquire_release_volatile.h │ │ │ │ │ ├── all_aligned_atomic_load_store.h │ │ │ │ │ ├── all_atomic_load_store.h │ │ │ │ │ ├── ao_t_is_int.h │ │ │ │ │ ├── armcc/ │ │ │ │ │ │ └── arm_v6.h │ │ │ │ │ ├── atomic_load_store.h │ │ │ │ │ ├── char_acquire_release_volatile.h │ │ │ │ │ ├── char_atomic_load_store.h │ │ │ │ │ ├── emul_cas.h │ │ │ │ │ ├── gcc/ │ │ │ │ │ │ ├── alpha.h │ │ │ │ │ │ ├── arm.h │ │ │ │ │ │ ├── avr32.h │ │ │ │ │ │ ├── cris.h │ │ │ │ │ │ ├── hppa.h │ │ │ │ │ │ ├── ia64.h │ │ │ │ │ │ ├── m68k.h │ │ │ │ │ │ ├── mips.h │ │ │ │ │ │ ├── powerpc.h │ │ │ │ │ │ ├── s390.h │ │ │ │ │ │ ├── sh.h │ │ │ │ │ │ ├── sparc.h │ │ │ │ │ │ ├── x86.h │ │ │ │ │ │ └── x86_64.h │ │ │ │ │ ├── generic_pthread.h │ │ │ │ │ ├── hpc/ │ │ │ │ │ │ ├── hppa.h │ │ │ │ │ │ └── ia64.h │ │ │ │ │ ├── ibmc/ │ │ │ │ │ │ └── powerpc.h │ │ │ │ │ ├── icc/ │ │ │ │ │ │ └── ia64.h │ │ │ │ │ ├── int_acquire_release_volatile.h │ │ │ │ │ ├── int_aligned_atomic_load_store.h │ │ │ │ │ ├── int_atomic_load_store.h │ │ │ │ │ ├── msftc/ │ │ │ │ │ │ ├── arm.h │ │ │ │ │ │ ├── common32_defs.h │ │ │ │ │ │ ├── x86.h │ │ │ │ │ │ └── x86_64.h │ │ │ │ │ ├── ordered.h │ │ │ │ │ ├── ordered_except_wr.h │ │ │ │ │ ├── read_ordered.h │ │ │ │ │ ├── short_acquire_release_volatile.h │ │ │ │ │ ├── short_aligned_atomic_load_store.h │ │ │ │ │ ├── short_atomic_load_store.h │ │ │ │ │ ├── standard_ao_double_t.h │ │ │ │ │ ├── sunc/ │ │ │ │ │ │ ├── sparc.h │ │ │ │ │ │ ├── x86.h │ │ │ │ │ │ └── x86_64.h │ │ │ │ │ ├── test_and_set_t_is_ao_t.h │ │ │ │ │ └── test_and_set_t_is_char.h │ │ │ │ └── atomic_ops.h │ │ │ ├── dcss/ │ │ │ │ ├── dcss_plus.h │ │ │ │ ├── dcss_plus_impl.h │ │ │ │ └── testing.cpp │ │ │ ├── descriptors/ │ │ │ │ ├── descriptors.h │ │ │ │ ├── descriptors_impl.h │ │ │ │ └── descriptors_impl2.h │ │ │ ├── errors.h │ │ │ ├── plaf.h │ │ │ ├── recordmgr/ │ │ │ │ ├── allocator_bump.h │ │ │ │ ├── allocator_interface.h │ │ │ │ ├── allocator_new.h │ │ │ │ ├── allocator_new_segregated.h │ │ │ │ ├── allocator_once.h │ │ │ │ ├── arraylist.h │ │ │ │ ├── blockbag.h │ │ │ │ ├── blockpool.h │ │ │ │ ├── debug_info.h │ │ │ │ ├── debugcounter.h │ │ │ │ ├── debugprinting.h │ │ │ │ ├── globals.h │ │ │ │ ├── hashtable.h │ │ │ │ ├── lockfreeblockbag.h │ │ │ │ ├── pool_interface.h │ │ │ │ ├── pool_none.h │ │ │ │ ├── pool_perthread_and_shared.h │ │ │ │ ├── reclaimer_debra.h │ │ │ │ ├── reclaimer_debraplus.h │ │ │ │ ├── reclaimer_hazardptr.h │ │ │ │ ├── reclaimer_interface.h │ │ │ │ ├── reclaimer_none.h │ │ │ │ ├── reclaimer_rcu.h │ │ │ │ ├── record_manager.h │ │ │ │ ├── record_manager_single_type.h │ │ │ │ └── recovery_manager.h │ │ │ ├── rq/ │ │ │ │ ├── rq_dcssp.h │ │ │ │ ├── rq_debugging.h │ │ │ │ ├── rq_htm_rwlock.h │ │ │ │ ├── rq_provider.h │ │ │ │ ├── rq_rwlock.h │ │ │ │ ├── rq_snapcollector.h │ │ │ │ ├── rq_unsafe.h │ │ │ │ └── snapcollector/ │ │ │ │ ├── reportitem.h │ │ │ │ ├── snapcollector.h │ │ │ │ └── snapcollector_test.cpp │ │ │ └── rwlock.h │ │ ├── ds/ │ │ │ └── brown_ext_abtree_lf/ │ │ │ ├── brown_ext_abtree_lf.h │ │ │ ├── brown_ext_abtree_lf_adapter.h │ │ │ └── brown_ext_abtree_lf_impl.h │ │ └── minimal_example.cpp │ └── trevor_brown_natarajan/ │ ├── TrevorBrownNatarajanTree.hpp │ └── ds/ │ └── natarajan_ext_bst_lf/ │ ├── natarajan_ext_bst_lf_adapter.h │ ├── natarajan_ext_bst_lf_stage1.h │ └── natarajan_ext_bst_lf_stage2_impl.h ├── graphs/ │ ├── BenchmarkLatencyCounter.hpp │ ├── BenchmarkLatencyQueues.hpp │ ├── BenchmarkMaps.hpp │ ├── BenchmarkQueues.hpp │ ├── BenchmarkSPS.hpp │ ├── BenchmarkSets.hpp │ ├── Makefile │ ├── PBenchmarkQueues.hpp │ ├── PBenchmarkSPS.hpp │ ├── PBenchmarkSets.hpp │ ├── README.md │ ├── bin/ │ │ └── .gitignore │ ├── data/ │ │ └── README.md │ ├── latency-counter.cpp │ ├── lib/ │ │ └── .gitignore │ ├── plots/ │ │ ├── caption.gp │ │ ├── latency-counter.gp │ │ ├── pcaption.gp │ │ ├── plot-all.sh │ │ ├── plot.sh │ │ ├── pq-enq-deq.gp │ │ ├── pq-ll-enq-deq.gp │ │ ├── pset-hash-1k.gp │ │ ├── pset-ll-1k.gp │ │ ├── pset-tree-1k.gp │ │ ├── pset-tree-1m.gp │ │ ├── psps-integer.gp │ │ ├── q-array-enq-deq.gp │ │ ├── q-enq-deq.gp │ │ ├── q-ll-enq-deq.gp │ │ ├── set-hash-1k.gp │ │ ├── set-ll-10k.gp │ │ ├── set-ll-1k.gp │ │ ├── set-tree-10k.gp │ │ ├── set-tree-1k.gp │ │ ├── sps-integer.gp │ │ ├── sps-object.gp │ │ ├── stress-multi-process-q.gp │ │ └── styles.inc │ ├── pq-ll-enq-deq.cpp │ ├── pread-while-writing.cpp │ ├── pset-hash-1k.cpp │ ├── pset-ll-10k.cpp │ ├── pset-ll-1k.cpp │ ├── pset-tree-1k.cpp │ ├── pset-tree-1m.cpp │ ├── psps-integer.cpp │ ├── q-array-enq-deq.cpp │ ├── q-ll-enq-deq.cpp │ ├── run-all-aws.sh │ ├── set-hash-1k.cpp │ ├── set-ll-10k.cpp │ ├── set-ll-1k.cpp │ ├── set-tree-10k.cpp │ ├── set-tree-1k.cpp │ ├── set-tree-1m.cpp │ ├── sps-integer.cpp │ └── sps-object.cpp ├── pdatastructures/ │ ├── README.md │ ├── TMHashMap.hpp │ ├── TMHashMapByRef.hpp │ ├── TMLinkedListQueue.hpp │ ├── TMLinkedListSet.hpp │ ├── TMLinkedListSetByRef.hpp │ ├── TMRedBlackTree.hpp │ ├── TMRedBlackTreeByRef.hpp │ └── pqueues/ │ ├── HazardPointers.hpp │ ├── MichaelScottQueue.hpp │ ├── PFriedmanQueue.hpp │ ├── PMDKLinkedListQueue.hpp │ ├── PMichaelScottQueue.hpp │ ├── POFLFLinkedListQueue.hpp │ ├── POFLFMPLinkedListQueue.hpp │ ├── POFWFLinkedListQueue.hpp │ ├── RomLRLinkedListQueue.hpp │ └── RomLogLinkedListQueue.hpp ├── ptms/ │ ├── OneFilePTMLF.hpp │ ├── OneFilePTMLFMultiProcess.hpp │ ├── OneFilePTMWF.hpp │ ├── PMDKTM.hpp │ ├── README.md │ ├── atlas/ │ │ ├── README.md │ │ └── atlas.patch │ ├── romuluslog/ │ │ ├── RomulusLog.cpp │ │ ├── RomulusLog.hpp │ │ └── malloc.cpp │ ├── romuluslr/ │ │ ├── RomulusLR.cpp │ │ ├── RomulusLR.hpp │ │ └── malloc.cpp │ └── rwlocks/ │ ├── CRWWP.hpp │ └── CRWWP_SpinLock.hpp └── stms/ ├── CRWWPSTM.hpp ├── ESTM.hpp ├── OneFileLF.hpp ├── OneFileWF.hpp ├── TinySTM.hpp ├── estm-0.3.0/ │ ├── .gitignore │ ├── AUTHORS │ ├── COPYING │ ├── Makefile │ ├── Makefile.in │ ├── README │ ├── VERSIONS │ ├── include/ │ │ ├── mod_local.h │ │ ├── mod_mem.h │ │ ├── mod_print.h │ │ ├── mod_stats.h │ │ ├── stm.h │ │ └── wrappers.h │ └── src/ │ ├── atomic.h │ ├── atomic_ops/ │ │ ├── AUTHORS │ │ ├── COPYING │ │ ├── README │ │ ├── aligned_atomic_load_store.h │ │ ├── all_acquire_release_volatile.h │ │ ├── ao_t_is_int.h │ │ ├── atomic_ops.h │ │ ├── generalize-small.h │ │ ├── generalize.h │ │ ├── ia64.h │ │ ├── ordered_except_wr.h │ │ ├── powerpc.h │ │ ├── read_ordered.h │ │ ├── sparc.h │ │ ├── standard_ao_double_t.h │ │ ├── test_and_set_t_is_ao_t.h │ │ ├── test_and_set_t_is_char.h │ │ ├── x86.h │ │ └── x86_64.h │ ├── gc.c │ ├── gc.h │ ├── mod_local.c │ ├── mod_mem.c │ ├── mod_print.c │ ├── mod_stats.c │ ├── stm.c │ └── wrappers.c └── tinystm/ ├── ChangeLog ├── Doxyfile ├── GNU-LICENSE.txt ├── MIT-LICENSE.txt ├── Makefile ├── Makefile.clang ├── Makefile.common ├── Makefile.gcc ├── Makefile.icc ├── Makefile.suncc ├── README.md ├── abi/ │ ├── Makefile │ ├── Makefile.common │ ├── abi.c │ ├── arch_x86.S │ ├── dtmc/ │ │ ├── Makefile │ │ ├── arch.S │ │ ├── libitm.h │ │ ├── libtanger-stm.public-symbols │ │ ├── libtanger-stm.support │ │ ├── tanger-stm-internal.h │ │ ├── tanger.c │ │ ├── tanger.h │ │ └── tm_macros.h │ ├── gcc/ │ │ ├── Makefile │ │ ├── alloc_cpp.c │ │ ├── arch.S │ │ ├── clone.c │ │ ├── eh.c │ │ ├── libitm.h │ │ └── tm_macros.h │ ├── intel/ │ │ ├── Makefile │ │ ├── alloc.c │ │ ├── arch.S │ │ ├── libitm.h │ │ └── tm_macros.h │ ├── libitm.h.tpl.cpp │ ├── libitm.h.tpl.footer │ ├── libitm.h.tpl.header │ ├── libitm.h.tpl.unifdef │ ├── oracle/ │ │ ├── Makefile │ │ ├── arch.S │ │ └── otm.c │ ├── pthread_wrapper.h │ ├── test/ │ │ └── Makefile │ └── tm_macros.h ├── include/ │ ├── mod_ab.h │ ├── mod_cb.h │ ├── mod_log.h │ ├── mod_mem.h │ ├── mod_order.h │ ├── mod_print.h │ ├── mod_stats.h │ ├── stm.h │ └── wrappers.h ├── lib/ │ └── .gitignore ├── src/ │ ├── .gitignore │ ├── atomic.h │ ├── atomic_ops/ │ │ ├── AUTHORS │ │ ├── COPYING │ │ ├── README │ │ ├── aligned_atomic_load_store.h │ │ ├── all_acquire_release_volatile.h │ │ ├── ao_t_is_int.h │ │ ├── atomic_ops.h │ │ ├── generalize-small.h │ │ ├── generalize.h │ │ ├── ia64.h │ │ ├── ordered_except_wr.h │ │ ├── powerpc.h │ │ ├── read_ordered.h │ │ ├── sparc.h │ │ ├── standard_ao_double_t.h │ │ ├── test_and_set_t_is_ao_t.h │ │ ├── test_and_set_t_is_char.h │ │ ├── x86.h │ │ └── x86_64.h │ ├── gc.c │ ├── gc.h │ ├── mod_ab.c │ ├── mod_cb_mem.c │ ├── mod_log.c │ ├── mod_order.c │ ├── mod_print.c │ ├── mod_stats.c │ ├── stm.c │ ├── stm_internal.h │ ├── stm_wbctl.h │ ├── stm_wbetl.h │ ├── stm_wt.h │ ├── tls.h │ ├── utils.h │ └── wrappers.c └── test/ ├── Makefile ├── intset/ │ ├── .gitignore │ ├── Makefile │ ├── README.rbtree │ ├── intset.c │ ├── rbtree.c │ ├── rbtree.h │ ├── tm.h │ └── types.h └── regression/ ├── .gitignore ├── Makefile ├── irrevocability.c ├── perf.c └── types.c ================================================ FILE CONTENTS ================================================ ================================================ FILE: LICENSE.txt ================================================ Copyright (c) 2017-2018 Andreia Correia Pedro Ramalhete Pascal Felber Nachshon Cohen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # OneFile PTM / STM OneFile is a Software Transactional Memory (STM) meant to make it easy to implement lock-free and wait-free data structures. It is based on the paper "[OneFile: A Wait-free Persistent Transactional Memory](https://github.com/pramalhe/OneFile/blob/master/OneFile-2019.pdf)" by Ramalhete, Correia, Felber and Cohen https://github.com/pramalhe/OneFile/blob/master/OneFile-2019.pdf It provides multi-word atomic updates on *tmtype* objects, where T must be word-size, typically a pointer or integer. During a transaction, each store on an *tmtpye* is transformed into a double-word-compare-and-swap DCAS() and one more regular CAS() is done to complete the transaction. It does this with a store-log (write-set) which other writers can help apply. This is a "redo-log" based technique, which means that both store and loads need to be interposed. Stores will be interposed to save them in the log and loads will be interposed to lookup on the log the most recent value. If there is a transaction currently ongoing, the readers will have to check on each *tmtype::pload()* if the variable we're tying to read is part of the current transaction. If a value is read whose 'seq' is higher than the transaction we initially read, the whole read-only operation will be restarted, by throwing an exception in the *tmtype::pload()* interposing method and catching this exception by the TM. All of this logic is handled internally by OF without any explicit user interaction. Because of operator overloading, the assignment and reading of *tmtype* types is done transparently to the user, with a pure library implementation, without any need for compiler instrumentation. This means that the user can write the code as if it was a sequential implementation of the data structure, apart from the change of types (type annotation). In this sense, OneFile is a "quasi-universal construction" with lock-free progress. Our design goal with OneFile was to provide a non-blocking STM so that non-experts could implement their own lock-free and wait-free data structures. OneFile is not designed to transform regular everyday code into lock-free applications. Such use-cases require a lot more of engineering work and likely a completely different approach from what we took with OneFile (CX is a much better option for that purpose). We've made two implementations in the form of Persistent Transactional Memory (PTM) which are STMs meant for Persistent Memory, like Intel's Optane DC Persistent Memory. We've implemented four diferent variants of this design: - OneFile-LF: The simplest of the four, has lock-free progress and lock-free memory reclamation using Hazard Eras; - OneFile-WF: Uses aggregation (like flat-combining) and a new wait-free consensus to provide wait-free bounded progress. Has wait-free bounded memory reclamation; - POneFile-LF: A PTM with durable transactions (ACID) and lock-free progress. Memory reclamation is lock-free using an optimistic technique. Allocation and de-allocation of user objects is lock-free; - POnefile-WF: A PTM with durable transactions (ACID) and wait-free progress. Memory reclamation for user objects is wait-free using an optimistic technique, while memory reclamation of the transactional objects is done using Hazard Eras, also wait-free. Allocation and de-allocation of user-objects is wait-free; See the respective .hpp files for implementation details. Each implementation is a single header file. Yes, it's that small :) ## Quickstart ## If you just want to use OneFile in your own application or benchmarks then follow these steps: - Choose one of the four OneFile implementations, depending on whether you want and STM, a PTM, lock-free or wait-free progress: [stms/OneFileLF.hpp](https://github.com/pramalhe/OneFile/blob/master/stms/OneFileLF.hpp) STM with lock-free transactions [stms/OneFileWF.hpp](https://github.com/pramalhe/OneFile/blob/master/stms/OneFileWF.hpp) STM with wait-free transactions [ptms/POneFileLF.hpp](https://github.com/pramalhe/OneFile/blob/master/ptms/OneFilePTMLF.hpp) PTM with lock-free transactions [ptms/POneFileWF.hpp](https://github.com/pramalhe/OneFile/blob/master/ptms/OneFilePTMWF.hpp) PTM with wait-free transactions - Copy the header to your development folder - Include the header from a single .cpp. If you include from multiple compilation units (.cpp files) then move the last block in the .hpp to one of the .cpp files. - If you want a data structure that is already made then take a look at what's on these folders: datastructures/ Data structures for volatile memory (needs one of the STMs) pdatastructures/ Data structures for persistent memory (needs one of the PTMs) ### Design ### In OneFile STM a transaction goes through three phases. The first phase is to convert the operation (lambda) into a store-log (write-set). There is no need to save the loads (read-set) because unlike other approaches, a transaction does not need to re-check for changes at commit time: it does a check in-flight on each load of whether or not the value has changed since the beginning of the transaction, by looking at a sequence number associated with every read value, a technique similar to TL2 or TinySTM but without the need for keeping a read-set because all write transactions are executed one at a time, effectively serialized. The second phase is to commit the transaction by advancing the current transaction (curTx). The third phase is to apply the store-log using DCAS. The first phase is implicitly serializable. Even if each thread publishes its operation, there is no way to parallelize this work among threads. The best that could be done would be that each thread to transform its own operation into its own store-log which it then appends to a global store-log. Unfortunately this is possible only for disjoint-access parallel transactions, and these are not easy to detect, therefore, our implementation of OneFile does not do this. Instead, we attempt to parallelize the second stage, where the store-log is applied. This task is easier to split among multiple threads, thus parallelizing it. Adding Flat-Combining or other similar aggregation techniques to the first stage, means that each thread will produce a store-log containing the operations of all other threads. This can be a bottleneck if the operations involves heavy computations and small store-logs. For data structures, this is not the case, and OneFile is designed to implement and work with data structures or other scenarios where the transactions are short in time, therefore, we found it acceptable to go with such an approach. The parallelization of the third phase can be done with at least two different approaches: blocking and non-blocking. In the blocking approach, the store-log can be divided into chunks (for example, one chunk per thread), each chunk having a lock, and the thread that takes the lock is responsible for applying that chunk. In the non-blocking approach (OF-LF and OF-WF), each thread tries to apply an entry of the store-log at a time. To avoid ABA issues, a double-word compare-and-swap (DCAS) must be used. In summary, OneFile does *not* do disjoint-access parallel transactions. If you absolutely need that functionality, then go and take a look at TinySTM. ## Requirements ## - OneFile needs a double-word CAS, which limits it to x86. The algorithm can be modified to use LL/SC or even single-word CAS at the cost of losing its generic capability because bits would have to be stolen from a 64 bit wordl - The user must "instrument" the code where the atomic updates take place by wrapping the types with *tmtype*. Even then, the operator overloading will not cover all the cases and there will be situations where the user has to annotate the code with .pload() or .pstore() respectively; - The *T* type must be the size of a word, i.e. 64 bits. Anything bigger and it needs to be splitted into multiple *tmtype* objects; - If memory reclamation is needed, then the objects need to derive from the *tmbase* base class, need to be allocated with *tmNew()* or *tmMalloc()* and deallocated with *tmDelete()* or *tmFree()*; ## Memory Reclamation ## We're using a customized implementation of Hazard Eras, lock-free/wait-free memory reclamation: [https://github.com/pramalhe/ConcurrencyFreaks/blob/master/papers/hazarderas-2017.pdf](https://github.com/pramalhe/ConcurrencyFreaks/blob/master/papers/hazarderas-2017.pdf) [https://dl.acm.org/citation.cfm?id=3087588](https://dl.acm.org/citation.cfm?id=3087588) See the HazardErasOF class in each implementation for more details. As far as we know, there is only one wait-free data structure that has integrated wait-free memory reclamation: [https://github.com/pramalhe/ConcurrencyFreaks/blob/master/papers/crturnqueue-2016.pdf]([https://github.com/pramalhe/ConcurrencyFreaks/blob/master/papers/crturnqueue-2016.pdf) OneFile and CX are the first time that someone has made a generic mechanism for wait-free memory reclamation. ## How to use this ## 1. Annotate all the objects that are shared among threads, namely, everything that is *std::atomic* should be changed to *tmtype*; 2. Use only *pstore()* and *pload()* (or just use '='). Do *not* call compare_exchange_strong(), exchange() or fetch_add(); 3. Replace calls to "obj = new T(args)" with "obj = tmNew(args)"; 4. Replace calls to "delete obj" with "tmDelete(obj)"; 5. The T types must derive from the base class *tmbase*; 6. Place your methods in a lambda, capturing whatever you need, and pass the lambda to *updateTx()*; That's it, you've now got your own lock-free data structure! For an example of a simple linked-list set, take a look datastructures/linkedlists/OFLFLinkedListSet.hpp ## Disadvantages ## - All mutative operations are serialized; - Types must be broken down to 64 bit sizes; - Requires Double-word-compare-and-swap (DCAS); ## Advantages ## - Lock-free programming was never so easy, all the user code has to do is loads and stores on *tmtypes* types and those get transformed into a DCAS() based transaction that provides correct linearizable lock-free progress, without ABA issues; - Memory reclamation is also handled by OF using Hazard Eras, a lock-free/wait-free memory reclamation technique; - Compared to hand-written lock-free data structures, on the uncontended case, we are replacing each CAS with a DCAS and adding one extra (regular) CAS on the currTrans, which is a small price to pay for the atomicity; - This technique provides full linearizability for generic code, even mutative iterators, something which is nearly impossible to do with hand-written lock-free data structures; - Multiple helping threads can help apply starting on different places. A good heuristic is to start from the (tid % numStores); - OneFile-WF is the first STM with wait-free bounded progress and it's the first to have wait-free bounded progress with wait-free bounded memory reclamation. - Read-only transactions are lightweight and they can run concurrently with write transactions as long as they're disjoint; The biggest advantage of all is that it's way easier to use OneFile than it is to implement a hand-made lock-free or wait-free data structure. ## Examples ## There are some working examples in the "datastructures/" folder: OFLFBoundedQueue.hpp: An array based queue (memory-bounded) OFLFLinkedListQueue.hpp: A linked list based queue (memory unbounded) OFLFLinkedListSet.hpp: A linked list based set OFLFRedBlackBST.hpp: A Red-Black (balanced) tree map ## Benchmarks ## To build the benchmarks you need to build ESTM and TinySTM, and then you need to pull PMDK (PMEM/NVML) and build it: cd ~/onefile/stms/ cd estm-0.3.0 make clean ; make cd .. cd tinystm make clean ; make cd .. cd ~ git clone https://github.com/pmem/pmdk.git cd pmdk make -j12 sudo make install export PMEM_IS_PMEM_FORCE=1 cd ~/onefile/graphs make -j12 ## Tests ## The four implementations of OneFile were executed during thousands of cpu hours and heavily stress tested with invariant checking and using tools like address sanitizer and valgrind. This is a lot more than what other STMs on github provide, but it doesn't mean there are no bugs in it ;) If you see a crash or invariant failure, run the same code under a global rw-lock to make sure the bug is not in your code. If you really believe it's in OneFile, then please open a bug on github and add as much information as you can, namely, stack trace and files needed to reproduce. We'll do our best to address it. ================================================ FILE: common/HazardEras.hpp ================================================ /****************************************************************************** * Copyright (c) 2016-2017, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _HAZARD_ERAS_H_ #define _HAZARD_ERAS_H_ #include #include #include #include /* *

Hazard Eras

* This a light-weight implementation of hazard eras, where each thread has a * thread-local list of retired objects. * * This is based on the paper "Hazard Eras - Non-Blocking Memory Reclamation" * by Pedro Ramalhete and Andreia Correia: * github... * * The type T is for the objects/nodes and it's expected to have the following members: * newEra, delEra, delNext. * * R is zero. * *

* @author Pedro Ramalhete * @author Andreia Correia */ template class HazardEras { private: static const uint64_t NONE = 0; static const int HE_MAX_THREADS = 128; static const int MAX_HES = 5; // This is named 'K' in the HP paper static const int CLPAD = 128/sizeof(std::atomic); static const int HE_THRESHOLD_R = 0; // This is named 'R' in the HP paper const int maxHEs; const int maxThreads; alignas(128) std::atomic eraClock {1}; alignas(128) std::atomic* he[HE_MAX_THREADS]; alignas(128) std::vector retiredList[HE_MAX_THREADS*CLPAD]; // It's not nice that we have a lot of empty vectors public: HazardEras(int maxHEs=MAX_HES, int maxThreads=HE_MAX_THREADS) : maxHEs{maxHEs}, maxThreads{maxThreads} { for (int it = 0; it < HE_MAX_THREADS; it++) { he[it] = new std::atomic[CLPAD*2]; // We allocate four cache lines to allow for many hps and without false sharing retiredList[it*CLPAD].reserve(maxThreads*maxHEs); for (int ihe = 0; ihe < MAX_HES; ihe++) { he[it][ihe].store(NONE, std::memory_order_relaxed); } } static_assert(std::is_same::value, "T::newEra must be uint64_t"); static_assert(std::is_same::value, "T::delEra must be uint64_t"); } ~HazardEras() { for (int it = 0; it < HE_MAX_THREADS; it++) { delete[] he[it]; // Clear the current retired nodes for (unsigned iret = 0; iret < retiredList[it*CLPAD].size(); iret++) { delete retiredList[it*CLPAD][iret]; } } } inline uint64_t getEra() { return eraClock.load(); } /** * Progress Condition: wait-free bounded (by maxHEs) */ inline void clear(const int tid) { for (int ihe = 0; ihe < maxHEs; ihe++) { he[tid][ihe].store(NONE, std::memory_order_release); } } /** * Progress Condition: lock-free */ inline T* get_protected(int index, const std::atomic& atom, const int tid) { auto prevEra = he[tid][index].load(std::memory_order_relaxed); while (true) { T* ptr = atom.load(); auto era = eraClock.load(std::memory_order_acquire); if (era == prevEra) return ptr; he[tid][index].store(era); prevEra = era; } } inline void protectEraRelease(int index, int other, const int tid) { auto era = he[tid][other].load(std::memory_order_relaxed); if (he[tid][index].load(std::memory_order_relaxed) == era) return; he[tid][index].store(era, std::memory_order_release); } /* * Does a single iteration. Must be integrated into the algorithm that's using HE. * In other words, we must re-check if era has changed * * Progress Condition: wait-free population oblivious */ inline T* protectPtr(int index, const std::atomic& atom, uint64_t& prevEra, const int tid) { T* ptr = atom.load(std::memory_order_acquire); auto era = eraClock.load(); if (prevEra != era) { prevEra = era; he[tid][index].store(era, std::memory_order_relaxed); std::atomic_thread_fence(std::memory_order_seq_cst); } return ptr; } /** * Retire an object (node) * Progress Condition: wait-free bounded */ void retire(T* ptr, const int mytid) { auto currEra = eraClock.load(); ptr->delEra = currEra; auto& rlist = retiredList[mytid*CLPAD]; rlist.push_back(ptr); if (eraClock == currEra) eraClock.fetch_add(1); for (unsigned iret = 0; iret < rlist.size();) { auto obj = rlist[iret]; if (canDelete(obj, mytid)) { rlist.erase(rlist.begin() + iret); delete obj; continue; } iret++; } } private: bool canDelete(T* obj, const int mytid) { for (int tid = 0; tid < maxThreads; tid++) { for (int ihe = 0; ihe < maxHEs; ihe++) { const auto era = he[tid][ihe].load(std::memory_order_acquire); if (era == NONE || era < obj->newEra || era > obj->delEra) continue; return false; } } return true; } }; #endif /* _HAZARD_ERAS_H_ */ ================================================ FILE: common/HazardPointers.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _HAZARD_POINTERS_H_ #define _HAZARD_POINTERS_H_ #include #include /** * This is a customized version of Hazard Pointers to be used with CXMutation */ // TODO: use std::vector instead of arrays for the retired objects (keep the padding) template class HazardPointers { private: static const int MAX_THREADS = 128; static const int MAX_HPS = 5; static const int MAX_RETIRED = MAX_THREADS*MAX_HPS; static const int HP_THRESHOLD_R = 0; // This is named 'R' in the HP paper static const int CLPAD = 128/sizeof(std::atomic); const int maxHPs; const int maxThreads; alignas(128) std::atomic* hp[MAX_THREADS*CLPAD]; alignas(128) T** retiredObjects[MAX_THREADS*CLPAD]; // List of retired nodes that need to be 'deleted' for the current thread alignas(128) long numRetiredObjects[MAX_THREADS*CLPAD]; // Number of nodes in the retired list // Used specifically for CXMutation alignas(128) std::atomic heads[2*MAX_THREADS*CLPAD]; public: HazardPointers(int maxHPs=MAX_HPS, int maxThreads=MAX_THREADS) : maxHPs{maxHPs}, maxThreads{maxThreads} { for (int ih = 0; ih < 2*MAX_THREADS; ih++) { heads[ih*CLPAD].store(nullptr, std::memory_order_relaxed); } for (int ithread = 0; ithread < MAX_THREADS; ithread++) { numRetiredObjects[ithread*CLPAD] = 0; hp[ithread*CLPAD] = new std::atomic[MAX_HPS]; for (int ihp = 0; ihp < MAX_HPS; ihp++) { hp[ithread*CLPAD][ihp].store(nullptr, std::memory_order_relaxed); } retiredObjects[ithread*CLPAD] = new T*[MAX_RETIRED]; for (int iret = 0; iret < MAX_RETIRED; iret++) { retiredObjects[ithread*CLPAD][iret] = nullptr; } } } ~HazardPointers() { for (int ithread = 0; ithread < MAX_THREADS; ithread++) { // Clear the current retired nodes for (int iret = 0; iret < numRetiredObjects[ithread*CLPAD]; iret++) { delete (T*)retiredObjects[ithread*CLPAD][iret]; } delete[] hp[ithread*CLPAD]; delete[] retiredObjects[ithread*CLPAD]; } } /** * Progress Condition: wait-free bounded (by maxHPs) * * It's ok to use relaxed loads here because: * - For progress: we know that the store will eventually become visible, * or another publish() will take its place; * - For correctness: it can be re-ordered below, but at most it will protect * an object for longer than required, i.e. until the next publish overwrites it. * Or it gets re-ordered above, but only up to a seq-cst store on the same * variable in publish(), which _must_ be it, even if the store in the publish * is a release store (which is the case for publishRelease()). */ void clear(const int tid) { for (int ihp = 0; ihp < maxHPs; ihp++) { hp[tid*CLPAD][ihp].store(nullptr, std::memory_order_relaxed); } } /** * Progress Condition: wait-free population oblivious */ void clearOne(int ihp, const int tid) { hp[tid*CLPAD][ihp].store(nullptr,std::memory_order_relaxed); } /** * Progress Condition: lock-free */ T* protect(int index, const std::atomic& atom, const int tid) { T* n = nullptr; T* ret; while ((ret = atom.load()) != n) { hp[tid*CLPAD][index].store(ret); n = ret; } return ret; } inline T* get_protected(int index, const std::atomic& atom, const int tid) { return protect(index, atom, tid); } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free population oblivious */ T* protectPtr(int index, T* ptr, const int tid) { hp[tid*CLPAD][index].store(ptr); return ptr; } /** * This assumes that the ptr lhead is already protected by a "regular" hazard pointers */ void protectHead(int combinedIndex, T* lhead) { heads[combinedIndex*CLPAD].store(lhead, std::memory_order_release); } std::atomic* getHeads() { return heads; } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free population oblivious */ T* protectRelease(int index, T* ptr, const int tid) { hp[tid*CLPAD][index].store(ptr, std::memory_order_release); return ptr; } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free bounded (by the number of threads squared) */ void retire(T* ptr, const int tid) { if (numRetiredObjects[tid*CLPAD] >= HP_THRESHOLD_R) scanAndDelete(tid); retiredObjects[tid*CLPAD][numRetiredObjects[tid*CLPAD]++] = ptr; } void copyPtr(int index, int other, const int tid) { auto ptr = hp[tid*CLPAD][other].load(std::memory_order_relaxed); hp[tid*CLPAD][index].store(ptr, std::memory_order_release); } private: void scanAndDelete(const int tid) { for (int iret = 0; iret < numRetiredObjects[tid*CLPAD]; ) { bool ptrInUse = false; auto ptr = (T*)retiredObjects[tid*CLPAD][iret]; for (int it = 0; it < maxThreads; it++) { for (int ihp = maxHPs-1; ihp >= 0; ihp--) { if (ptr == hp[it*CLPAD][ihp].load()) ptrInUse = true; } } if (ptrInUse) { iret++; continue; } // Scan the array of heads before deleting the pointer for (int icomb = 0; icomb < 2*MAX_THREADS; icomb++) { if (ptr == heads[icomb*CLPAD].load()) ptrInUse = true; } if (ptrInUse) { iret++; continue; } for (int i = iret; i < numRetiredObjects[tid*CLPAD]-1; i++) retiredObjects[tid*CLPAD][i] = retiredObjects[tid*CLPAD][i+1]; numRetiredObjects[tid*CLPAD]--; delete ptr; } } }; #endif /* _HAZARD_POINTERS_H_ */ ================================================ FILE: common/HazardPointersSimQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _HAZARD_POINTERS_SIM_QUEUE_H_ #define _HAZARD_POINTERS_SIM_QUEUE_H_ #include #include #include #include /* * The main difference from this implementation to regular Hazard Pointers is * that the constructor takes a function pointer to function 'find' which * acts as a callback, returning true if the pointer is still stored somewhere * in the data structure. This is used by SimQueue to indicate if there is a * pointer to the object we're trying to de-allocate in the array of enqReused. */ template class HazardPointersSimQueue { private: static const int HP_MAX_THREADS = 128; static const int HP_MAX_HPS = 11; // This is named 'K' in the HP paper static const int CLPAD = 128/sizeof(std::atomic); static const int HP_THRESHOLD_R = 0; // This is named 'R' in the HP paper static const int MAX_RETIRED = HP_MAX_THREADS*HP_MAX_HPS; // Maximum number of retired objects per thread const int maxHPs; const int maxThreads; std::atomic hp[HP_MAX_THREADS*CLPAD][HP_MAX_HPS]; // It's not nice that we have a lot of empty vectors, but we need padding to avoid false sharing std::vector retiredList[HP_MAX_THREADS*CLPAD]; std::function findPtr; public: HazardPointersSimQueue(std::function& find, int maxHPs=HP_MAX_HPS, int maxThreads=HP_MAX_THREADS) : maxHPs{maxHPs}, maxThreads{maxThreads} { findPtr = find; for (int ithread = 0; ithread < HP_MAX_THREADS; ithread++) { for (int ihp = 0; ihp < HP_MAX_HPS; ihp++) { hp[ithread*CLPAD][ihp].store(nullptr, std::memory_order_relaxed); } } } ~HazardPointersSimQueue() { for (int ithread = 0; ithread < HP_MAX_THREADS; ithread++) { // Clear the current retired nodes for (unsigned iret = 0; iret < retiredList[ithread*CLPAD].size(); iret++) { delete retiredList[ithread*CLPAD][iret]; } } } /** * Progress Condition: wait-free bounded (by maxHPs) */ void clear(const int tid) { for (int ihp = 0; ihp < maxHPs; ihp++) { hp[tid*CLPAD][ihp].store(nullptr, std::memory_order_release); } } /** * Progress Condition: wait-free population oblivious */ void clearOne(int ihp, const int tid) { hp[tid*CLPAD][ihp].store(nullptr, std::memory_order_release); } /** * Progress Condition: lock-free */ T* protect(int index, const std::atomic& atom, const int tid) { T* n = nullptr; T* ret; while ((ret = atom.load()) != n) { hp[tid*CLPAD][index].store(ret); n = ret; } return ret; } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free population oblivious */ T* protectPtr(int index, T* ptr, const int tid) { hp[tid*CLPAD][index].store(ptr); return ptr; } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free population oblivious */ T* protectRelease(int index, T* ptr, const int tid) { hp[tid*CLPAD][index].store(ptr, std::memory_order_release); return ptr; } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free bounded (by the number of threads squared) */ void retire(T* ptr, const int tid) { retiredList[tid*CLPAD].push_back(ptr); for (unsigned iret = 0; iret < retiredList[tid*CLPAD].size();) { auto obj = retiredList[tid*CLPAD][iret]; if (findPtr(obj)) { iret++; continue; } bool canDelete = true; for (int tid = 0; tid < maxThreads && canDelete; tid++) { for (int ihp = maxHPs-1; ihp >= 0; ihp--) { if (hp[tid*CLPAD][ihp].load() == obj) { canDelete = false; break; } } } if (canDelete) { retiredList[tid*CLPAD].erase(retiredList[tid*CLPAD].begin() + iret); delete obj; continue; } iret++; } } }; #endif /* _HAZARD_POINTERS_H_ */ ================================================ FILE: common/README.md ================================================ Here are some files that are needed by other libraries and data structures: HazardEras.hpp Used by some of the lock-free data structures for memory reclamation HazardPointers.hpp Used by some of the lock-free data structures for memory reclamation HazardPointersSimQueue.hpp Used by SimQueue for memory reclamation. Notice that the original SimQueue implementation in C does not ha memory reclamation. This implementation in C++ with this modified version of Hazard Pointers was done by Correia and Ramalhete pfences.h Used by Romulus RIStaticPerThread.hpp Used by Romulus ThreadRegistry.cpp Used by Romulus ThreadRegistry.hpp Used by Romulus ================================================ FILE: common/RIStaticPerThread.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _RISTATIC_H_ #define _RISTATIC_H_ #include #include #include // TODO: change all calls that need the tid to use a function argument // TODO: use std::vector instead of arrays for the retired objects (keep the padding) class RIStaticPerThread { private: const int maxThreads; alignas(128) std::atomic* states; static const uint64_t NOT_READING = 0; static const uint64_t READING = 1; static const int CLPAD = 128/sizeof(uint64_t); public: RIStaticPerThread(int maxThreads) : maxThreads{maxThreads} { states = new std::atomic[maxThreads*CLPAD]; for (int tid = 0; tid < maxThreads; tid++) { states[tid*CLPAD].store(NOT_READING, std::memory_order_relaxed); } } ~RIStaticPerThread() { delete[] states; } // Will attempt to pass all current READING states to inline void abortRollback() noexcept { for (int tid = 0; tid < maxThreads; tid++) { if (states[tid*CLPAD].load() != READING) continue; uint64_t read = READING; states[tid*CLPAD].compare_exchange_strong(read, READING+1); } } // Returns true if the arrival was successfully rollbacked. // If there was a writer changing the state to READING+1 then it will // return false, meaning that the arrive() is still valid and visible. inline bool rollbackArrive(const int tid) noexcept { return (states[tid*CLPAD].fetch_add(-1) == READING); } inline void arrive(const int tid) noexcept { states[tid*CLPAD].store(READING); } inline void depart(const int tid) noexcept { states[tid*CLPAD].store(NOT_READING); // Making this "memory_order_release" will cause overflows! } inline bool isEmpty() noexcept { for (int tid = 0; tid < maxThreads; tid++) { if (states[tid*CLPAD].load() != NOT_READING) return false; } return true; } }; #endif /* RISTATIC_H */ ================================================ FILE: common/ThreadRegistry.cpp ================================================ /* * Contains all global variables. */ #include "common/ThreadRegistry.hpp" // Global/singleton to hold all the thread registry functionality ThreadRegistry gThreadRegistry {}; // This is where every thread stores the tid it has been assigned when it calls getTID() for the first time. // When the thread dies, the destructor of ThreadCheckInCheckOut will be called and de-register the thread. thread_local ThreadCheckInCheckOut tl_tcico {}; void thread_registry_deregister_thread(const int tid) { gThreadRegistry.deregister_thread(tid); } ================================================ FILE: common/ThreadRegistry.hpp ================================================ #ifndef _THREAD_REGISTRY_H_ #define _THREAD_REGISTRY_H_ #include #include #include #include // Increase this if 128 threads is not enough static const int REGISTRY_MAX_THREADS = 128; extern void thread_registry_deregister_thread(const int tid); // An helper class to do the checkin and checkout of the thread registry struct ThreadCheckInCheckOut { static const int NOT_ASSIGNED = -1; int tid { NOT_ASSIGNED }; ~ThreadCheckInCheckOut() { if (tid == NOT_ASSIGNED) return; thread_registry_deregister_thread(tid); } }; extern thread_local ThreadCheckInCheckOut tl_tcico; // Forward declaration of global/singleton instance class ThreadRegistry; extern ThreadRegistry gThreadRegistry; /* *

Registry for threads

* * This is singleton type class that allows assignement of a unique id to each thread. * The first time a thread calls ThreadRegistry::getTID() it will allocate a free slot in 'usedTID[]'. * This tid wil be saved in a thread-local variable of the type ThreadCheckInCheckOut which * upon destruction of the thread will call the destructor of ThreadCheckInCheckOut and free the * corresponding slot to be used by a later thread. * RomulusLR relies on this to work properly. */ class ThreadRegistry { private: alignas(128) std::atomic usedTID[REGISTRY_MAX_THREADS]; // Which TIDs are in use by threads alignas(128) std::atomic maxTid {-1}; // Highest TID (+1) in use by threads public: ThreadRegistry() { for (int it = 0; it < REGISTRY_MAX_THREADS; it++) { usedTID[it].store(false, std::memory_order_relaxed); } } /* * Progress Condition: wait-free bounded (by the number of threads) */ int register_thread_new(void) { for (int tid = 0; tid < REGISTRY_MAX_THREADS; tid++) { if (usedTID[tid].load(std::memory_order_acquire)) continue; bool unused = false; if (!usedTID[tid].compare_exchange_strong(unused, true)) continue; // Increase the current maximum to cover our thread id int curMax = maxTid.load(); while (curMax <= tid) { maxTid.compare_exchange_strong(curMax, tid+1); curMax = maxTid.load(); } tl_tcico.tid = tid; return tid; } std::cout << "ERROR: Too many threads, registry can only hold " << REGISTRY_MAX_THREADS << " threads\n"; assert(false); } /* * Progress condition: wait-free population oblivious */ inline void deregister_thread(const int tid) { usedTID[tid].store(false, std::memory_order_release); } /* * Progress condition: wait-free population oblivious */ static inline uint64_t getMaxThreads(void) { return gThreadRegistry.maxTid.load(std::memory_order_acquire); } /* * Progress condition: wait-free bounded (by the number of threads) */ static inline int getTID(void) { int tid = tl_tcico.tid; if (tid != ThreadCheckInCheckOut::NOT_ASSIGNED) return tid; return gThreadRegistry.register_thread_new(); } }; #endif /* _THREAD_REGISTRY_H_ */ ================================================ FILE: common/pfences.h ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _PERSISTENT_FENCES_ #define _PERSISTENT_FENCES_ /* * The naming for these macros and respective operations were taken from the excellent * "Preserving Happens-before in Persistent Memory" by Izraelevitz, Mendes, and Scott * https://www.cs.rochester.edu/u/jhi1/papers/2016-spaa-transform * * We have five different definitions of pwb/pfence/psync: * - Emulated: We introduce a delay on stores, like Mnemosyne does * - Nothing: only works with process restart persistency, i.e. process failures, but not system failure * - Define pwb as clflush (Broadwell cpus) * - Define pwb as clflushopt (most x86 cpus) * - Define pwb as clwb (only very recent cpus have this instruction) */ /* * We copied the methods from Menmosyne: * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.221.5462&rep=rep1&type=pdf */ static inline unsigned long long asm_rdtsc(void) { unsigned hi, lo; __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); } // Change this depending on the clock cycle of your cpu. For Cervino it's 2100, for my laptop it's 2712. #define EMULATED_CPUFREQ 2100 #define NS2CYCLE(__ns) ((__ns) * EMULATED_CPUFREQ / 1000) static inline void emulate_latency_ns(int ns) { uint64_t stop; uint64_t start = asm_rdtsc(); uint64_t cycles = NS2CYCLE(ns); do { /* RDTSC doesn't necessarily wait for previous instructions to complete * so a serializing instruction is usually used to ensure previous * instructions have completed. However, in our case this is a desirable * property since we want to overlap the latency we emulate with the * actual latency of the emulated instruction. */ stop = asm_rdtsc(); } while (stop - start < cycles); } /* * We use the settings on the delays for emulation from the NVMOVE paper: * http://www.cs.utexas.edu/~vijay/papers/inflow16-nvmove.pdf */ #ifdef PWB_IS_STT /* Delays for emulating STT in DRAM */ #define PWB(addr) emulate_latency_ns(140) #define PFENCE() emulate_latency_ns(200) #define PSYNC() emulate_latency_ns(200) #elif PWB_IS_PCM /* Delays for emulating PCM in DRAM */ #define PWB(addr) emulate_latency_ns(340) #define PFENCE() emulate_latency_ns(500) #define PSYNC() emulate_latency_ns(500) #elif PWB_IS_CLFLUSH /* * More info at http://elixir.free-electrons.com/linux/latest/source/arch/x86/include/asm/special_insns.h#L213 * Intel programming manual at https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf * Use these for Broadwell CPUs (cervino server) */ #define PWB(addr) __asm__ volatile("clflush (%0)" :: "r" (addr) : "memory") // Broadwell only works with this. #define PFENCE() {} // No ordering fences needed for CLFLUSH (section 7.4.6 of Intel manual) #define PSYNC() {} // For durability it's not obvious, but CLFLUSH seems to be enough, and PMDK uses the same approach #elif PWB_IS_CLWB /* Use this for CPUs that support clwb, such as the SkyLake SP series (c5 compute intensive instances in AWS are an example of it) */ #define PWB(addr) __asm__ volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)(addr))) // clwb() only for Ice Lake onwards #define PFENCE() __asm__ volatile("sfence" : : : "memory") #define PSYNC() __asm__ volatile("sfence" : : : "memory") #elif PWB_IS_NOP /* pwbs are not needed for shared memory persistency (i.e. persistency across process failure) */ #define PWB(addr) {} #define PFENCE() __asm__ volatile("sfence" : : : "memory") #define PSYNC() __asm__ volatile("sfence" : : : "memory") #elif PWB_IS_CLFLUSHOPT /* Use this for CPUs that support clflushopt, which is most recent x86 */ #define PWB(addr) __asm__ volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)(addr))) // clflushopt (Kaby Lake) #define PFENCE() __asm__ volatile("sfence" : : : "memory") #define PSYNC() __asm__ volatile("sfence" : : : "memory") #else #error "You must define what PWB is. Choose PWB_IS_CLFLUSHOPT if you don't know what your CPU is capable of" #endif // Flush each cache line in a range // TODO: fix cache alignment inline static void flushFromTo(void* from, void* to) noexcept { const int cache_line_size = 64; uint8_t* ptr = (uint8_t*)from; for (; ptr < (uint8_t*)to; ptr += cache_line_size) PWB(ptr); } // TODO: Implement fences for ARM #endif ================================================ FILE: datastructures/generic/TMHashMap.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2018, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _TM_NON_RESIZABLE_HASH_MAP_H_ #define _TM_NON_RESIZABLE_HASH_MAP_H_ #include #include #include #include "../../stms/tm.h" // This header defines the macros for the STM being compiled /** *

A Non-Resizable Hash Map for usage with STMs

* * Each node contains 4 entries (key/value) so as to provide better cache locality * * * TODO * * @author Pedro Ramalhete * @author Andreia Correia */ template class TMHashMap : public TM_BASE_TYPE { private: // One KeyVal is 16+16 bytes, therefore, 4 KeyVals are 2 cache lines in x86 (128 bytes) static const int KV_NUM = 4; static const unsigned int MAX_THREADS = 128; const unsigned int maxThreads; const unsigned int capacity; struct KeyVal { //uint64_t h; // Full hash of the key, for faster comparison. TODO: add code to handle h TM_TYPE key {nullptr}; TM_TYPE val {nullptr}; KeyVal() {} KeyVal(K* key, V* value) : key{key}, val{value} { } }; struct Node : TM_BASE_TYPE { KeyVal kv[KV_NUM]; TM_TYPE next {nullptr}; Node() {} Node(K* key, V* value) { kv[0].key = key; kv[0].val = value; } bool isEmpty() { for (int i = 0; i < KV_NUM; i++) { if (kv[i].key != nullptr) return false; } return true; } }; alignas(128) Node* buckets; // An array of Nodes int myhash(K* key) { return 0; } // Used only for tests public: TMHashMap(unsigned int maxThreads=MAX_THREADS, unsigned int capacity=2*1024*1024) : maxThreads{maxThreads}, capacity{capacity} { buckets = new Node[capacity]; } ~TMHashMap() { delete[] buckets; } std::string className() { return TM_NAME() + "-HashMap"; } /* * Progress Condition: lock-free * Adds a node with a key if the key is not present, otherwise replaces the value. * Returns the previous value (nullptr by default). */ V* put(K* key, V* value, const int tid) { if (key == nullptr) throw std::invalid_argument("key can not be nullptr"); if (value == nullptr) throw std::invalid_argument("value can not be nullptr"); V* oldVal = nullptr; KeyVal *firstFree = nullptr; auto h = std::hash{}(*key); Node* node = &buckets[h]; while (true) { for (int i = 0; i < KV_NUM; i++) { KeyVal& kv = node->kv[i]; if (kv.key == nullptr) { // Save the first available entry, in case we need to insert somewhere if (firstFree == nullptr) firstFree = &kv; continue; } if (*kv.key != *key) continue; // Found a matching key, replace the old value with the new oldVal = kv.val; kv.val = value; return oldVal; } Node* lnext = node->next; if (lnext == nullptr) break; node = lnext; } // We got here without a replacement, so insert in the first available if (firstFree == nullptr) { // No available entry, allocate a node and insert it there Node* newNode = TM_ALLOC(key,value); node->next = newNode; } else { firstFree->key = key; firstFree->val = value; } return oldVal; } /* * Progress Condition: lock-free * Removes a key, returning the value associated with it. * Returns nullptr if there is no matching key. */ V* removeKey(K* key, const int tid) { if (key == nullptr) throw std::invalid_argument("key can not be nullptr"); auto h = std::hash{}(*key); Node* node = &buckets[h]; Node* prev = node; while (true) { for (int i = 0; i < KV_NUM; i++) { KeyVal& kv = node->kv[i]; if (kv.key == nullptr || *kv.key != *key) continue; // Found a matching key, replace the old value with nullptr V* oldVal = kv.val; kv.val = nullptr; kv.key = nullptr; // Check if it's the first node and if it is empty, then unlink it and free it if (prev != node && node->isEmpty()) { prev->next = node->next; TM_FREE(node); } return oldVal; } prev = node; node = node->next; // We got to the end without a matching key, return nullptr if (node == nullptr) return nullptr; } } /* * Progress Condition: lock-free * Returns the value of associated with the key, nullptr if there is no mapping */ V* get(K* key, const int tid) { if (key == nullptr) throw std::invalid_argument("key can not be nullptr"); auto h = std::hash{}(*key); Node* node = &buckets[h]; while (true) { for (int i = 0; i < KV_NUM; i++) { KeyVal& kv = node->kv[i]; if (kv.key == nullptr || *kv.key != *key) continue; return kv.val; } Node* lnext = node->next; if (lnext == nullptr) return nullptr; node = lnext; } } // // Set methods for running the usual tests and benchmarks // bool add(K* key, const int tid) { return TM_WRITE_TRANSACTION([&] () -> bool { return put(key,key, tid) == nullptr; }); } bool remove(K* key, const int tid) { return TM_WRITE_TRANSACTION([this,key,tid] () -> bool { return removeKey(key, tid) != nullptr; }); } bool contains(K* key, const int tid) { return TM_READ_TRANSACTION([this,key,tid] () -> bool { return get(key, tid) != nullptr; }); } // Used only for benchmarks. It's single-threaded bool addAll(K** keys, const int size, const int tid) { for (int i = 0; i < size; i++) add(keys[i], tid); } }; #endif /* _TM_NON_RESIZABLE_HASH_MAP_H_ */ ================================================ FILE: datastructures/generic/TMLinkedListQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2017, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _TM_LINKED_LIST_QUEUE_H_ #define _TM_LINKED_LIST_QUEUE_H_ #include #include #include "../../stms/CRWWPSTM.hpp" #include "../../stms/LeftRightTM.hpp" #include "../../stms/tm.h" // This header defines the macros for the STM being compiled #include "MWCLF.hpp" #include "MWCWF.hpp" #include "CXTM.hpp" /** *

A Linked List queue using STM

* * * TODO * * * enqueue algorithm: sequential implementation + MWC * dequeue algorithm: sequential implementation + MWC * Consistency: Linearizable * enqueue() progress: lock-free * dequeue() progress: lock-free * Memory Reclamation: Hazard Eras (integrated into MWC) * enqueue min ops: 2 DCAS + 1 CAS * dequeue min ops: 1 DCAS + 1 CAS * * @author Pedro Ramalhete * @author Andreia Correia */ template class TMLinkedListQueue { private: static const unsigned int MAX_THREADS = 128; const unsigned int maxThreads; struct Node : TM_BASE_TYPE { T* item; TM_TYPE next; Node(T* userItem) : item{userItem}, next{nullptr} { } }; alignas(128) TM_TYPE head {nullptr}; alignas(128) TM_TYPE tail {nullptr}; public: TMLinkedListQueue(unsigned int maxThreads=MAX_THREADS) : maxThreads{maxThreads} { Node* sentinelNode = TM_ALLOC(nullptr); head = sentinelNode; tail = sentinelNode; } ~TMLinkedListQueue() { // TODO: replace this 0 with the actual tid otherwise we could have issues while (dequeue(0) != nullptr); // Drain the queue Node* lhead = head; delete lhead; } static std::string className() { return TM_NAME() + "-LinkedListQueue"; } /* * * Always returns true */ bool enqueue(T* item, const int tid) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); return TM_WRITE_TRANSACTION([this,item] () -> bool { Node* newNode = TM_ALLOC(item); tail->next = newNode; tail = newNode; return true; }); } /* * */ T* dequeue(const int tid) { return TM_WRITE_TRANSACTION([this] () -> T* { Node* lhead = head; if (lhead == tail) return nullptr; head = lhead->next; TM_FREE(lhead); return head->item; }); } }; #endif /* _MWC_LINKED_LIST_QUEUE_H_ */ ================================================ FILE: datastructures/generic/TMLinkedListSet.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2018, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _TM_LINKED_LIST_SET_H_ #define _TM_LINKED_LIST_SET_H_ #include #include #include "../../stms/tm.h" // This header defines the macros for the STM being compiled /** *

A Linked List Set for usage with STMs

* * TODO * * * @author Pedro Ramalhete * @author Andreia Correia */ template class TMLinkedListSet : public TM_BASE_TYPE { private: static const unsigned int MAX_THREADS = 128; const unsigned int maxThreads; struct Node : public TM_BASE_TYPE { T* key; TM_TYPE next; Node(T* key) : key{key}, next{nullptr} { } }; alignas(128) TM_TYPE head {nullptr}; alignas(128) TM_TYPE tail {nullptr}; public: TMLinkedListSet(unsigned int maxThreads=MAX_THREADS) : maxThreads{maxThreads} { Node* lhead = new Node(nullptr); Node* ltail = new Node(nullptr); head = lhead; head->next = ltail; tail = ltail; } ~TMLinkedListSet() { // Delete all the nodes in the list Node* prev = head; Node* node = prev->next; while (node != tail) { delete prev; prev = node; node = node->next; } delete prev; delete tail; } static std::string className() { return TM_NAME() + "-LinkedListSet"; } #ifdef TINY_STM /* * Progress Condition: lock-free * Adds a node with a key, returns false if the key is already in the set */ bool add(T* key, const int tid) { if (key == nullptr) throw std::invalid_argument("key can not be nullptr"); bool retval = false; WRITE_TX_BEGIN Node* newNode = TM_ALLOC(key); Node* prev = head; Node* node = prev->next; while (true) { if (node == tail) { prev->next = newNode; newNode->next = node; retval = true; break; } if (*key == *node->key) { TM_FREE(newNode); // If the key was already in the set, free the node that was never used break; } if (*(node->key) < *key) { prev->next = newNode; newNode->next = node; retval = true; break; } prev = node; node = node->next; } WRITE_TX_END return retval; } /* * Progress Condition: lock-free * Removes a node with an key, returns false if the key is not in the set */ bool remove(T* key, const int tid) { if (key == nullptr) throw std::invalid_argument("key can not be nullptr"); bool retval = false; WRITE_TX_BEGIN Node* prev = head; Node* node = prev->next; while (true) { if (node == tail) break; if (*key == *node->key) { prev->next = node->next; TM_FREE(node); retval = true; break; } if (*(node->key) < *key) break; prev = node; node = node->next; } WRITE_TX_END return retval; } /* * Progress Condition: lock-free * Returns true if it finds a node with a matching key */ bool contains(T* key, const int tid) { if (key == nullptr) throw std::invalid_argument("key can not be nullptr"); bool retval = false; READ_TX_BEGIN Node* node = head->next; while (true) { if (node == tail) break; if (*key == *node->key) {retval = true; break; } if (*(node->key) < *key) break; node = node->next; } READ_TX_END return retval; } #else /* * Progress Condition: lock-free * Adds a node with a key, returns false if the key is already in the set */ bool add(T* key, const int tid) { if (key == nullptr) throw std::invalid_argument("key can not be nullptr"); return TM_WRITE_TRANSACTION([this,key] () -> bool { Node* newNode = TM_ALLOC(key); Node* prev = head; Node* node = prev->next; while (true) { if (node == tail) break; if (*key == *node->key) { TM_FREE(newNode); // If the key was already in the set, free the node that was never used return false; } if (*(node->key) < *key) break; prev = node; node = node->next; } prev->next = newNode; newNode->next = node; return true; }); } /* * Progress Condition: lock-free * Removes a node with an key, returns false if the key is not in the set */ bool remove(T* key, const int tid) { if (key == nullptr) throw std::invalid_argument("key can not be nullptr"); return TM_WRITE_TRANSACTION([this,key] () -> bool { Node* prev = head; Node* node = prev->next; while (true) { if (node == tail) return false; if (*key == *node->key) { prev->next = node->next; TM_FREE(node); return true; } if (*(node->key) < *key) return false; prev = node; node = node->next; } }); } /* * Progress Condition: lock-free * Returns true if it finds a node with a matching key */ bool contains(T* key, const int tid) { if (key == nullptr) throw std::invalid_argument("key can not be nullptr"); return TM_READ_TRANSACTION([this,key] () -> bool { Node* node = head->next; while (true) { if (node == tail) return false; if (*key == *node->key) return true; if (*(node->key) < *key) return false; node = node->next; } }); } #endif bool addAll(T** keys, int size, const int tid) { for (int i = 0; i < size; i++) add(keys[i], tid); } }; #endif /* _TM_LINKED_LIST_SET_H_ */ ================================================ FILE: datastructures/generic/TMRedBlackBST.hpp ================================================ #ifndef _TM_RED_BLACK_BST_H_ #define _TM_RED_BLACK_BST_H_ #include #include #include #include "../../stms/tm.h" // This header defines the macros for the STM being compiled static const int64_t RED = 0; static const int64_t BLACK = 1; //http://algs4.cs.princeton.edu/code/edu/princeton/cs/algs4/RedBlackBST.java template class TMRedBlackBST : public TM_BASE_TYPE { struct Node : TM_BASE_TYPE { TM_TYPE key; TM_TYPE val; TM_TYPE left {nullptr}; TM_TYPE right {nullptr}; TM_TYPE color; // color of parent link TM_TYPE size; // subtree count Node(K* key, V* val, int64_t color, int64_t size) : key{key}, val{val}, color{color}, size{size} {} }; TM_TYPE root {nullptr}; // root of the BST const unsigned int maxThreads; inline void assignAndFreeIfNull(TM_TYPE& z, Node* w) { Node* tofree = z; z = w; if (w == nullptr) TM_FREE(tofree); } public: /** * Initializes an empty symbol table. */ TMRedBlackBST(unsigned int maxThreads=128) : maxThreads{maxThreads} { } /*************************************************************************** * Node helper methods. ***************************************************************************/ // is node x red; false if x is null ? bool isRed(Node* x) { if (x == nullptr) return false; return x->color == RED; } // number of node in subtree rooted at x; 0 if x is null int size(Node* x) { if (x == nullptr) return 0; return x->size; } /** * Returns the number of key-value pairs in this symbol table. * @return the number of key-value pairs in this symbol table */ int size() { return size(root); } /** * Is this symbol table empty? * @return {@code true} if this symbol table is empty and {@code false} otherwise */ bool isEmpty() { return root == nullptr; } /*************************************************************************** * Standard BST search-> ***************************************************************************/ /** * Returns the value associated with the given key. * @param key the key * @return the value associated with the given key if the key is in the symbol table * and {@code null} if the key is not in the symbol table * @throws IllegalArgumentException if {@code key} is {@code null} */ V* get(K* key) { if (key == nullptr) throw std::invalid_argument("item can not be nullptr"); return get(root, key); } // value associated with the given key in subtree rooted at x; null if no such key V* get(Node* x, K* key) { while (x != nullptr) { if (*key < *x->key) x = x->left; else if (*x->key < *key) x = x->right; else return x->val; } return nullptr; } /** * Does this symbol table contain the given key? * @param key the key * @return {@code true} if this symbol table contains {@code key} and * {@code false} otherwise * @throws IllegalArgumentException if {@code key} is {@code null} */ bool contains(K* key) { return get(key) != nullptr; } /*************************************************************************** * Red-black tree insertion. ***************************************************************************/ /** * Inserts the specified key-value pair into the symbol table, overwriting the old * value with the new value if the symbol table already contains the specified key. * Deletes the specified key (and its associated value) from this symbol table * if the specified value is {@code null}. * * @param key the key * @param val the value * @throws IllegalArgumentException if {@code key} is {@code null} */ void put(K* key, V* val) { if (key == nullptr) throw std::invalid_argument("item can not be nullptr"); if (val == nullptr) { deleteKey(key); return; } root = put(root, key, val); root->color = BLACK; // assert check(); } // insert the key-value pair in the subtree rooted at h Node* put(Node* h, K* key, V* val) { if (h == nullptr) return TM_ALLOC(key, val, RED, 1); if (*key < *h->key) h->left = put(h->left, key, val); else if (*h->key < *key) h->right = put(h->right, key, val); else h->val = val; // fix-up any right-leaning links if (isRed(h->right) && !isRed(h->left)) h = rotateLeft(h); if (isRed(h->left) && isRed(h->left->left)) h = rotateRight(h); if (isRed(h->left) && isRed(h->right)) flipColors(h); h->size = size(h->left) + size(h->right) + 1; return h; } /*************************************************************************** * Red-black tree deletion. ***************************************************************************/ /** * Removes the smallest key and associated value from the symbol table. * @throws NoSuchElementException if the symbol table is empty */ void deleteMin() { if (isEmpty()) throw std::invalid_argument("item can not be nullptr"); // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = RED; assignAndFreeIfNull(root, deleteMin(root)); if (!isEmpty()) root->color = BLACK; // assert check(); } // delete the key-value pair with the minimum key rooted at h Node* deleteMin(Node* h) { if (h->left == nullptr) return nullptr; if (!isRed(h->left) && !isRed(h->left->left)) h = moveRedLeft(h); assignAndFreeIfNull(h->left, deleteMin(h->left)); return balance(h); } /** * Removes the largest key and associated value from the symbol table. * @throws NoSuchElementException if the symbol table is empty */ void deleteMax() { if (isEmpty()) throw std::invalid_argument("item can not be nullptr"); // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = RED; root = deleteMax(root); if (!isEmpty()) root->color = BLACK; // assert check(); } // delete the key-value pair with the maximum key rooted at h Node* deleteMax(Node* h) { if (isRed(h->left)) h = rotateRight(h); if (h->right == nullptr) return nullptr; if (!isRed(h->right) && !isRed(h->right->left)) h = moveRedRight(h); h->right = deleteMax(h->right); return balance(h); } /** * Removes the specified key and its associated value from this symbol table * (if the key is in this symbol table). * * @param key the key * @throws IllegalArgumentException if {@code key} is {@code null} */ void deleteKey(K* key) { if (key == nullptr) throw std::invalid_argument("item can not be nullptr"); if (!contains(key)) return; // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = RED; assignAndFreeIfNull(root, deleteKey(root, key)); if (!isEmpty()) root->color = BLACK; // assert check(); } // delete the key-value pair with the given key rooted at h Node* deleteKey(Node* h, K* key) { // assert get(h, key) != null; if (*key < *h->key) { if (!isRed(h->left) && !isRed(h->left->left)) { h = moveRedLeft(h); } assignAndFreeIfNull(h->left, deleteKey(h->left, key)); } else { if (isRed(h->left)) { h = rotateRight(h); } if (*key == *h->key && (h->right == nullptr)) { return nullptr; } if (!isRed(h->right) && !isRed(h->right->left)) { h = moveRedRight(h); } if (*key == *h->key) { Node* x = min(h->right); h->key = x->key; h->val = x->val; // h->val = get(h->right, min(h->right).key); // h->key = min(h->right).key; assignAndFreeIfNull(h->right, deleteMin(h->right)); } else { assignAndFreeIfNull(h->right, deleteKey(h->right, key)); } } return balance(h); } /*************************************************************************** * Red-black tree helper functions. ***************************************************************************/ // make a left-leaning link lean to the right Node* rotateRight(Node* h) { // assert (h != null) && isRed(h->left); Node* x = h->left; h->left = x->right; x->right = h; x->color = x->right->color; x->right->color = RED; x->size = h->size; h->size = size(h->left) + size(h->right) + 1; return x; } // make a right-leaning link lean to the left Node* rotateLeft(Node* h) { // assert (h != null) && isRed(h->right); Node* x = h->right; h->right = x->left; x->left = h; x->color = x->left->color; x->left->color = RED; x->size = h->size; h->size = size(h->left) + size(h->right) + 1; return x; } // flip the colors of a node and its two children void flipColors(Node* h) { // h must have opposite color of its two children // assert (h != null) && (h->left != null) && (h->right != null); // assert (!isRed(h) && isRed(h->left) && isRed(h->right)) // || (isRed(h) && !isRed(h->left) && !isRed(h->right)); h->color = !h->color; h->left->color = !h->left->color; h->right->color = !h->right->color; } // Assuming that h is red and both h->left and h->left.left // are black, make h->left or one of its children red. Node* moveRedLeft(Node* h) { // assert (h != null); // assert isRed(h) && !isRed(h->left) && !isRed(h->left.left); flipColors(h); if (isRed(h->right->left)) { h->right = rotateRight(h->right); h = rotateLeft(h); flipColors(h); } return h; } // Assuming that h is red and both h->right and h->right.left // are black, make h->right or one of its children red. Node* moveRedRight(Node* h) { // assert (h != null); // assert isRed(h) && !isRed(h->right) && !isRed(h->right.left); flipColors(h); if (isRed(h->left->left)) { h = rotateRight(h); flipColors(h); } return h; } // restore red-black tree invariant Node* balance(Node* h) { // assert (h != null); if (isRed(h->right)) h = rotateLeft(h); if (isRed(h->left) && isRed(h->left->left)) h = rotateRight(h); if (isRed(h->left) && isRed(h->right)) flipColors(h); h->size = size(h->left) + size(h->right) + 1; return h; } /*************************************************************************** * Utility functions. ***************************************************************************/ /** * Returns the height of the BST (for debugging). * @return the height of the BST (a 1-node tree has height 0) */ int height() { return height(root); } int height(Node* x) { if (x == nullptr) return -1; return 1 + std::max(height(x->left), height(x->right)); } /*************************************************************************** * Ordered symbol table methods. ***************************************************************************/ /** * Returns the smallest key in the symbol table. * @return the smallest key in the symbol table * @throws NoSuchElementException if the symbol table is empty */ K* min() { if (isEmpty()) throw std::invalid_argument("item can not be nullptr"); return min(root).key; } // the smallest key in subtree rooted at x; null if no such key Node* min(Node* x) { // assert x != null; if (x->left == nullptr) return x; else return min(x->left); } /** * Returns the largest key in the symbol table. * @return the largest key in the symbol table * @throws NoSuchElementException if the symbol table is empty */ K* max() { if (isEmpty()) throw std::invalid_argument("item can not be nullptr"); return max(root).key; } // the largest key in the subtree rooted at x; null if no such key Node* max(Node* x) { // assert x != null; if (x->right == nullptr) return x; else return max(x->right); } /** * Returns the largest key in the symbol table less than or equal to {@code key}. * @param key the key * @return the largest key in the symbol table less than or equal to {@code key} * @throws NoSuchElementException if there is no such key * @throws IllegalArgumentException if {@code key} is {@code null} */ K* floor(K* key) { if (key == nullptr) throw std::invalid_argument("item can not be nullptr"); if (isEmpty()) throw std::invalid_argument("item can not be nullptr"); Node* x = floor(root, key); if (x == nullptr) return nullptr; else return x->key; } // the largest key in the subtree rooted at x less than or equal to the given key Node* floor(Node* x, K* key) { if (x == nullptr) return nullptr; if (*key == *x->key) return x; if (*key < *x->key) return floor(x->left, key); Node* t = floor(x->right, key); if (t != nullptr) return t; else return x; } /** * Returns the smallest key in the symbol table greater than or equal to {@code key}. * @param key the key * @return the smallest key in the symbol table greater than or equal to {@code key} * @throws NoSuchElementException if there is no such key * @throws IllegalArgumentException if {@code key} is {@code null} */ K* ceiling(K* key) { if (key == nullptr) throw std::invalid_argument("item can not be nullptr"); if (isEmpty()) throw std::invalid_argument("item can not be nullptr"); Node* x = ceiling(root, key); if (x == nullptr) return nullptr; else return x->key; } // the smallest key in the subtree rooted at x greater than or equal to the given key Node* ceiling(Node* x, K* key) { if (x == nullptr) return nullptr; if (*key == *x->key) return x; if (*x->key < *key) return ceiling(x->right, key); Node* t = ceiling(x->left, key); if (t != nullptr) return t; else return x; } /** * Return the kth smallest key in the symbol table. * @param k the order statistic * @return the {@code k}th smallest key in the symbol table * @throws IllegalArgumentException unless {@code k} is between 0 and * n1 */ K* select(int k) { if (k < 0 || k >= size()) { throw std::invalid_argument("item can not be nullptr"); } Node x = select(root, k); return x->key; } // the key of rank k in the subtree rooted at x Node* select(Node* x, int k) { // assert x != null; // assert k >= 0 && k < size(x); int t = size(x->left); if (t > k) return select(x->left, k); else if (t < k) return select(x->right, k-t-1); else return x; } /** * Return the number of keys in the symbol table strictly less than {@code key}. * @param key the key * @return the number of keys in the symbol table strictly less than {@code key} * @throws IllegalArgumentException if {@code key} is {@code null} */ int rank(K* key) { if (key == nullptr) throw std::invalid_argument("item can not be nullptr"); return rank(key, root); } // number of keys less than key in the subtree rooted at x int rank(K* key, Node* x) { if (x == nullptr) return 0; if (*key < *x->key) return rank(key, x->left); else if (*x->key < *key) return 1 + size(x->left) + rank(key, x->right); else return size(x->left); } /*************************************************************************** * Range count and range search-> ***************************************************************************/ /** * Returns the number of keys in the symbol table in the given range. * * @param lo minimum endpoint * @param hi maximum endpoint * @return the number of keys in the sybol table between {@code lo} * (inclusive) and {@code hi} (inclusive) * @throws IllegalArgumentException if either {@code lo} or {@code hi} * is {@code null} */ int size(K* lo, K* hi) { if (lo == nullptr) throw std::invalid_argument("item can not be nullptr"); if (hi == nullptr) throw std::invalid_argument("item can not be nullptr"); if (*hi < *lo) return 0; if (contains(hi)) return rank(hi) - rank(lo) + 1; else return rank(hi) - rank(lo); } /*************************************************************************** * Check integrity of red-black tree data structure. ***************************************************************************/ bool check() { if (!isBST()) std::cout << "Not in symmetric order\n"; if (!isSizeConsistent()) std::cout << "Subtree counts not consistent\n"; //if (!isRankConsistent()) std::cout << "Ranks not consistent\n"; if (!is23()) std::cout << "Not a 2-3 tree\n"; if (!isBalanced()) std::cout << "Not balanced\n"; return isBST() && isSizeConsistent() && is23() && isBalanced(); } // does this binary tree satisfy symmetric order? // Note: this test also ensures that data structure is a binary tree since order is strict bool isBST() { return isBST(root, nullptr, nullptr); } // is the tree rooted at x a BST with all keys strictly between min and max // (if min or max is null, treat as empty constraint) // Credit: Bob Dondero's elegant solution bool isBST(Node* x, K* min, K* max) { if (x == nullptr) return true; // TODO: port these two lines //if (min != nullptr && x->key.compareTo(min) <= 0) return false; //if (max != nullptr && x->key.compareTo(max) >= 0) return false; return isBST(x->left, min, x->key) && isBST(x->right, x->key, max); } // are the size fields correct? bool isSizeConsistent() { return isSizeConsistent(root); } bool isSizeConsistent(Node* x) { if (x == nullptr) return true; if (x->size != size(x->left) + size(x->right) + 1) return false; return isSizeConsistent(x->left) && isSizeConsistent(x->right); } /* // check that ranks are consistent bool isRankConsistent() { for (int i = 0; i < size(); i++) if (i != rank(select(i))) return false; for (K* key : keys()) if (key.compareTo(select(rank(key))) != 0) return false; return true; } */ // Does the tree have no red right links, and at most one (left) // red links in a row on any path? bool is23() { return is23(root); } bool is23(Node* x) { if (x == nullptr) return true; if (isRed(x->right)) return false; if (x != root && isRed(x) && isRed(x->left)) return false; return is23(x->left) && is23(x->right); } // do all paths from root to leaf have same number of black edges? bool isBalanced() { int black = 0; // number of black links on path from root to min Node x = root; while (x != nullptr) { if (!isRed(x)) black++; x = x->left; } return isBalanced(root, black); } // does every path from the root to a leaf have the given number of black links? bool isBalanced(Node* x, int black) { if (x == nullptr) return black == 0; if (!isRed(x)) black--; return isBalanced(x->left, black) && isBalanced(x->right, black); } // Set methods bool add(K* key, const int tid) { return TM_WRITE_TRANSACTION([this,key] () -> bool { if (contains(key)) return false; put(key,key); return true; }); } bool remove(K* key, const int tid) { return TM_WRITE_TRANSACTION([this,key] () -> bool { if (!contains(key)) return false; deleteKey(key); return true; }); } inline bool contains(K* key, const int tid) { return TM_READ_TRANSACTION([this,key] () -> bool { return contains(key); }); } // This is not fully transactionally but it's ok because we use it only on initialization. // We could make it fully transactionally, but we would have to increase the size of allocation/store logs. bool addAll(K** keys, int size, const int tid) { for (int i = 0; i < size; i++) add(keys[i], tid); } std::string className() { return TM_NAME() + "-RedBlackBST"; } }; #endif // _TM_RED_BLACK_BST_H_ ================================================ FILE: datastructures/hashmaps/CRWWPSTMResizableHashSet.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _CRWWP_STM_RESIZABLE_HASH_MAP_H_ #define _CRWWP_STM_RESIZABLE_HASH_MAP_H_ #include #include "stms/CRWWPSTM.hpp" /** *

A Resizable Hash Map for usage with STMs

* TODO * */ template class CRWWPSTMResizableHashSet { private: struct Node : public crwwpstm::tmbase { crwwpstm::tmtype key; crwwpstm::tmtype next {nullptr}; Node(const K& k) : key{k} { } // Copy constructor for k }; crwwpstm::tmtype capacity; crwwpstm::tmtype sizeHM = 0; static constexpr double loadFactor = 0.75; crwwpstm::tmtype*> buckets; // An array of pointers to Nodes public: CRWWPSTMResizableHashSet(int maxThreads=0, int capacity=4) : capacity{capacity} { crwwpstm::updateTx([&] () { buckets = (crwwpstm::tmtype*)crwwpstm::tmMalloc(capacity*sizeof(crwwpstm::tmtype)); for (int i = 0; i < capacity; i++) buckets[i] = nullptr; }); } ~CRWWPSTMResizableHashSet() { crwwpstm::updateTx([&] () { for(int i = 0; i < capacity; i++){ Node* node = buckets[i]; while (node != nullptr) { Node* next = node->next; crwwpstm::tmDelete(node); node = next; } } crwwpstm::tmFree(buckets.load()); }); } static std::string className() { return crwwpstm::CRWWPSTM::className() + "-HashMap"; } void rebuild() { int newcapacity = 2*capacity; crwwpstm::tmtype* newbuckets = (crwwpstm::tmtype*)crwwpstm::tmMalloc(newcapacity*sizeof(crwwpstm::tmtype)); for (int i = 0; i < newcapacity; i++) newbuckets[i] = nullptr; for (int i = 0; i < capacity; i++) { Node* node = buckets[i]; while(node!=nullptr){ Node* next = node->next; auto h = std::hash{}(node->key) % newcapacity; node->next = newbuckets[h]; newbuckets[h] = node; node = next; } } crwwpstm::tmFree(buckets.load()); buckets = newbuckets; capacity = newcapacity; } /* * Adds a node with a key if the key is not present, otherwise replaces the value. * If saveOldValue is set, it will set 'oldValue' to the previous value, iff there was already a mapping. * * Returns true if there was no mapping for the key, false if there was already a value and it was replaced. */ bool innerPut(const K& key) { if (sizeHM > capacity*loadFactor) rebuild(); auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; Node* prev = node; while (true) { if (node == nullptr) { Node* newnode = crwwpstm::tmNew(key); if (node == prev) { buckets[h] = newnode; } else { prev->next = newnode; } sizeHM++; return true; // New insertion } if (key == node->key) return false; prev = node; node = node->next; } } /* * Removes a key and its mapping. * Saves the value in 'oldvalue' if 'saveOldValue' is set. * * Returns returns true if a matching key was found */ bool innerRemove(const K& key) { auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; Node* prev = node; while (true) { if (node == nullptr) return false; if (key == node->key) { if (node == prev) { buckets[h] = node->next; } else { prev->next = node->next; } sizeHM--; crwwpstm::tmDelete(node); return true; } prev = node; node = node->next; } } /* * Returns true if key is present. Saves a copy of 'value' in 'oldValue' if 'saveOldValue' is set. */ bool innerGet(const K& key) { auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; while (true) { if (node == nullptr) return false; if (key == node->key) return true; node = node->next; } } // // Set methods for running the usual tests and benchmarks // // Inserts a key only if it's not already present bool add(K key, const int tid=0) { return crwwpstm::updateTx([&] () { return innerPut(key); }); } // Returns true only if the key was present bool remove(K key, const int tid=0) { return crwwpstm::updateTx([&] () { return innerRemove(key); }); } bool contains(K key, const int tid=0) { return crwwpstm::readTx([&] () { return innerGet(key); }); } // Used only for benchmarks void addAll(K** keys, const int size, const int tid=0) { for (int i = 0; i < size; i++) add(*keys[i]); } }; #endif /* _CRWWP_STM_RESIZABLE_HASH_MAP_H_ */ ================================================ FILE: datastructures/hashmaps/ESTMResizableHashSet.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _ESTM_RESIZABLE_HASH_MAP_H_ #define _ESTM_RESIZABLE_HASH_MAP_H_ #include #include "stms/ESTM.hpp" /** *

A Resizable Hash Map for usage with STMs

* TODO * */ template class ESTMResizableHashSet { private: struct Node : public estm::tmbase { estm::tmtype key; estm::tmtype next {nullptr}; Node(const K& k) : key{k} { } // Copy constructor for k }; estm::tmtype capacity; estm::tmtype sizeHM = 0; static constexpr double loadFactor = 0.75; estm::tmtype*> buckets; // An array of pointers to Nodes public: ESTMResizableHashSet(int maxThreads=0, uint64_t capacity=4) : capacity{capacity} { estm::updateTx([&] () { buckets = (estm::tmtype*)estm::tmMalloc(capacity*sizeof(estm::tmtype)); for (int i = 0; i < capacity; i++) buckets[i] = nullptr; }); } ~ESTMResizableHashSet() { estm::updateTx([&] () { for(int i = 0; i < capacity; i++){ Node* node = buckets[i]; while (node != nullptr) { Node* next = node->next; estm::tmDelete(node); node = next; } } estm::tmFree(buckets.load()); }); } static std::string className() { return estm::ESTM::className() + "-HashMap"; } void rebuild() { uint64_t newcapacity = 2*capacity; estm::tmtype* newbuckets = (estm::tmtype*)estm::tmMalloc(newcapacity*sizeof(estm::tmtype)); for (int i = 0; i < newcapacity; i++) newbuckets[i] = nullptr; for (int i = 0; i < capacity; i++) { Node* node = buckets[i]; while (node!=nullptr) { Node* next = node->next; auto h = std::hash{}(node->key) % newcapacity; node->next = newbuckets[h]; newbuckets[h] = node; node = next; } } estm::tmFree(buckets); buckets = newbuckets; capacity = newcapacity; } /* * Adds a node with a key if the key is not present, otherwise replaces the value. * If saveOldValue is set, it will set 'oldValue' to the previous value, iff there was already a mapping. * * Returns true if there was no mapping for the key, false if there was already a value and it was replaced. */ bool innerPut(const K& key) { if (sizeHM.load() > capacity.load()*loadFactor) rebuild(); auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; Node* prev = node; while (true) { if (node == nullptr) { Node* newnode = estm::tmNew(key); if (node == prev) { buckets[h] = newnode; } else { prev->next = newnode; } sizeHM++; return true; // New insertion } if (key == node->key) return false; prev = node; node = node->next; } } /* * Removes a key and its mapping. * Saves the value in 'oldvalue' if 'saveOldValue' is set. * * Returns returns true if a matching key was found */ bool innerRemove(const K& key) { auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; Node* prev = node; while (true) { if (node == nullptr) return false; if (key == node->key) { if (node == prev) { buckets[h] = node->next; } else { prev->next = node->next; } sizeHM--; estm::tmDelete(node); return true; } prev = node; node = node->next; } } /* * Returns true if key is present. Saves a copy of 'value' in 'oldValue' if 'saveOldValue' is set. */ bool innerGet(const K& key) { auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; while (true) { if (node == nullptr) return false; if (key == node->key) return true; node = node->next; } } // // Set methods for running the usual tests and benchmarks // // Inserts a key only if it's not already present bool add(K key, const int tid=0) { return estm::updateTx([&] () { return innerPut(key); }); } // Returns true only if the key was present bool remove(K key, const int tid=0) { return estm::updateTx([&] () { return innerRemove(key); }); } bool contains(K key, const int tid=0) { return estm::readTx([&] () { return innerGet(key); }); } // Used only for benchmarks void addAll(K** keys, const int size, const int tid=0) { for (int i = 0; i < size; i++) add(*keys[i]); } }; #endif /* _ESTM_RESIZABLE_HASH_MAP_H_ */ ================================================ FILE: datastructures/hashmaps/OFLFResizableHashSet.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _OF_LF_RESIZABLE_HASH_MAP_H_ #define _OF_LF_RESIZABLE_HASH_MAP_H_ #include #include "stms/OneFileLF.hpp" /** *

A Resizable Hash Map for usage with STMs

* TODO * */ template class OFLFResizableHashSet { private: struct Node : public oflf::tmbase { oflf::tmtype key; oflf::tmtype next {nullptr}; Node(const K& k) : key{k} { } // Copy constructor for k }; oflf::tmtype capacity; oflf::tmtype sizeHM = 0; static constexpr double loadFactor = 0.75; oflf::tmtype*> buckets; // An array of pointers to Nodes public: OFLFResizableHashSet(int maxThreads=0, uint64_t capacity=4) : capacity{capacity} { oflf::updateTx([&] () { buckets = (oflf::tmtype*)oflf::tmMalloc(capacity*sizeof(oflf::tmtype)); for (int i = 0; i < capacity; i++) buckets[i] = nullptr; }); } ~OFLFResizableHashSet() { oflf::updateTx([&] () { for (int i = 0; i < capacity; i++){ Node* node = buckets[i]; while (node != nullptr) { Node* next = node->next; oflf::tmDelete(node); node = next; } } oflf::tmFree(buckets.pload()); }); } static std::string className() { return oflf::OneFileLF::className() + "-HashMap"; } void rebuild() { uint64_t newcapacity = 2*capacity; oflf::tmtype* newbuckets = (oflf::tmtype*)oflf::tmMalloc(newcapacity*sizeof(oflf::tmtype)); for (int i = 0; i < newcapacity; i++) newbuckets[i] = nullptr; for (int i = 0; i < capacity; i++) { Node* node = buckets[i]; while (node!=nullptr) { Node* next = node->next; auto h = std::hash{}(node->key) % newcapacity; node->next = newbuckets[h]; newbuckets[h] = node; node = next; } } oflf::tmFree(buckets.pload()); buckets = newbuckets; capacity = newcapacity; } /* * Adds a node with a key if the key is not present, otherwise replaces the value. * If saveOldValue is set, it will set 'oldValue' to the previous value, iff there was already a mapping. * * Returns true if there was no mapping for the key, false if there was already a value and it was replaced. */ bool innerPut(const K& key) { if (sizeHM.pload() > capacity.pload()*loadFactor) rebuild(); auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; Node* prev = node; while (true) { if (node == nullptr) { Node* newnode = oflf::tmNew(key); if (node == prev) { buckets[h] = newnode; } else { prev->next = newnode; } sizeHM++; return true; // New insertion } if (key == node->key) return false; prev = node; node = node->next; } } /* * Removes a key and its mapping. * Saves the value in 'oldvalue' if 'saveOldValue' is set. * * Returns returns true if a matching key was found */ bool innerRemove(const K& key) { auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; Node* prev = node; while (true) { if (node == nullptr) return false; if (key == node->key) { if (node == prev) { buckets[h] = node->next; } else { prev->next = node->next; } sizeHM--; oflf::tmDelete(node); return true; } prev = node; node = node->next; } } /* * Returns true if key is present. Saves a copy of 'value' in 'oldValue' if 'saveOldValue' is set. */ bool innerGet(const K& key) { auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; while (true) { if (node == nullptr) return false; if (key == node->key) return true; node = node->next; } } // // Set methods for running the usual tests and benchmarks // // Inserts a key only if it's not already present bool add(K key, const int tid=0) { return oflf::updateTx([&] () { return innerPut(key); }); } // Returns true only if the key was present bool remove(K key, const int tid=0) { return oflf::updateTx([&] () { return innerRemove(key); }); } bool contains(K key, const int tid=0) { return oflf::readTx([&] () { return innerGet(key); }); } // Used only for benchmarks void addAll(K** keys, const int size, const int tid=0) { for (int i = 0; i < size; i++) add(*keys[i]); } }; #endif /* _OF_LF_RESIZABLE_HASH_MAP_H_ */ ================================================ FILE: datastructures/hashmaps/OFWFResizableHashSet.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _OF_WF_RESIZABLE_HASH_MAP_H_ #define _OF_WF_RESIZABLE_HASH_MAP_H_ #include #include "stms/OneFileWF.hpp" /** *

A Resizable Hash Map for usage with STMs

* TODO * */ template class OFWFResizableHashSet { private: struct Node : public ofwf::tmbase { ofwf::tmtype key; ofwf::tmtype next {nullptr}; Node(const K& k) : key{k} { } // Copy constructor for k }; ofwf::tmtype capacity; ofwf::tmtype sizeHM = 0; static constexpr double loadFactor = 0.75; ofwf::tmtype*> buckets; // An array of pointers to Nodes public: OFWFResizableHashSet(int maxThreads=0, uint64_t capacity=4) : capacity{capacity} { ofwf::updateTx([&] () { buckets = (ofwf::tmtype*)ofwf::tmMalloc(capacity*sizeof(ofwf::tmtype)); for (int i = 0; i < capacity; i++) buckets[i] = nullptr; }); } ~OFWFResizableHashSet() { ofwf::updateTx([=] () { for (int i = 0; i < capacity; i++){ Node* node = buckets[i]; while (node != nullptr) { Node* next = node->next; ofwf::tmDelete(node); node = next; } } ofwf::tmFree(buckets.pload()); }); } static std::string className() { return ofwf::OneFileWF::className() + "-HashMap"; } void rebuild() { uint64_t newcapacity = 2*capacity; ofwf::tmtype* newbuckets = (ofwf::tmtype*)ofwf::tmMalloc(newcapacity*sizeof(ofwf::tmtype)); for (int i = 0; i < newcapacity; i++) newbuckets[i] = nullptr; for (int i = 0; i < capacity; i++) { Node* node = buckets[i]; while (node!=nullptr) { Node* next = node->next; auto h = std::hash{}(node->key) % newcapacity; node->next = newbuckets[h]; newbuckets[h] = node; node = next; } } ofwf::tmFree(buckets.pload()); buckets = newbuckets; capacity = newcapacity; } /* * Adds a node with a key if the key is not present, otherwise replaces the value. * If saveOldValue is set, it will set 'oldValue' to the previous value, iff there was already a mapping. * * Returns true if there was no mapping for the key, false if there was already a value and it was replaced. */ bool innerPut(const K& key) { if (sizeHM.pload() > capacity.pload()*loadFactor) rebuild(); auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; Node* prev = node; while (true) { if (node == nullptr) { Node* newnode = ofwf::tmNew(key); if (node == prev) { buckets[h] = newnode; } else { prev->next = newnode; } sizeHM++; return true; // New insertion } if (key == node->key) return false; prev = node; node = node->next; } } /* * Removes a key and its mapping. * Saves the value in 'oldvalue' if 'saveOldValue' is set. * * Returns returns true if a matching key was found */ bool innerRemove(const K& key) { auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; Node* prev = node; while (true) { if (node == nullptr) return false; if (key == node->key) { if (node == prev) { buckets[h] = node->next; } else { prev->next = node->next; } sizeHM--; ofwf::tmDelete(node); return true; } prev = node; node = node->next; } } /* * Returns true if key is present. Saves a copy of 'value' in 'oldValue' if 'saveOldValue' is set. */ bool innerGet(const K& key) { auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; while (true) { if (node == nullptr) return false; if (key == node->key) return true; node = node->next; } } // // Set methods for running the usual tests and benchmarks // // Inserts a key only if it's not already present bool add(K key, const int tid=0) { return ofwf::updateTx([=] () { return innerPut(key); }); } // Returns true only if the key was present bool remove(K key, const int tid=0) { return ofwf::updateTx([=] () { return innerRemove(key); }); } bool contains(K key, const int tid=0) { return ofwf::readTx([=] () { return innerGet(key); }); } // Used only for benchmarks void addAll(K** keys, const int size, const int tid=0) { for (int i = 0; i < size; i++) add(*keys[i]); } }; #endif /* _OF_WF_RESIZABLE_HASH_MAP_H_ */ ================================================ FILE: datastructures/hashmaps/TinySTMResizableHashSet.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _TINY_STM_RESIZABLE_HASH_MAP_H_ #define _TINY_STM_RESIZABLE_HASH_MAP_H_ #include #include "stms/TinySTM.hpp" /** *

A Resizable Hash Map for usage with STMs

* TODO * */ template class TinySTMResizableHashSet { private: struct Node : public tinystm::tmbase { tinystm::tmtype key; tinystm::tmtype next {nullptr}; Node(const K& k) : key{k} { } // Copy constructor for k }; tinystm::tmtype capacity; tinystm::tmtype sizeHM = 0; static constexpr double loadFactor = 0.75; tinystm::tmtype*> buckets; // An array of pointers to Nodes public: TinySTMResizableHashSet(int maxThreads=0, uint64_t capacity=4) : capacity{capacity} { tinystm::updateTx([&] () { buckets = (tinystm::tmtype*)tinystm::tmMalloc(capacity*sizeof(tinystm::tmtype)); for (int i = 0; i < capacity; i++) buckets[i] = nullptr; return true; }); } ~TinySTMResizableHashSet() { tinystm::updateTx([&] () { for(int i = 0; i < capacity; i++){ Node* node = buckets[i]; while (node != nullptr) { Node* next = node->next; tinystm::tmDelete(node); node = next; } } tinystm::tmFree(buckets.load()); return true; }); } static std::string className() { return tinystm::TinySTM::className() + "-HashMap"; } void rebuild() { uint64_t newcapacity = 2*capacity; tinystm::tmtype* newbuckets = (tinystm::tmtype*)tinystm::tmMalloc(newcapacity*sizeof(tinystm::tmtype)); for (int i = 0; i < newcapacity; i++) newbuckets[i] = nullptr; for (int i = 0; i < capacity; i++) { Node* node = buckets[i]; while (node!=nullptr) { Node* next = node->next; auto h = std::hash{}(node->key) % newcapacity; node->next = newbuckets[h]; newbuckets[h] = node; node = next; } } tinystm::tmFree(buckets); buckets = newbuckets; capacity = newcapacity; } /* * Adds a node with a key if the key is not present, otherwise replaces the value. * If saveOldValue is set, it will set 'oldValue' to the previous value, iff there was already a mapping. * * Returns true if there was no mapping for the key, false if there was already a value and it was replaced. */ bool innerPut(const K& key) { if (sizeHM > capacity*loadFactor) rebuild(); auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; Node* prev = node; while (true) { if (node == nullptr) { Node* newnode = tinystm::tmNew(key); if (node == prev) { buckets[h] = newnode; } else { prev->next = newnode; } sizeHM++; return true; // New insertion } if (key == node->key) return false; prev = node; node = node->next; } } /* * Removes a key and its mapping. * Saves the value in 'oldvalue' if 'saveOldValue' is set. * * Returns returns true if a matching key was found */ bool innerRemove(const K& key) { auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; Node* prev = node; while (true) { if (node == nullptr) return false; if (key == node->key) { if (node == prev) { buckets[h] = node->next; } else { prev->next = node->next; } sizeHM--; tinystm::tmDelete(node); return true; } prev = node; node = node->next; } } /* * Returns true if key is present. Saves a copy of 'value' in 'oldValue' if 'saveOldValue' is set. */ bool innerGet(const K& key) { auto h = std::hash{}(key) % capacity; Node* node = buckets[h]; while (true) { if (node == nullptr) return false; if (key == node->key) return true; node = node->next; } } // // Set methods for running the usual tests and benchmarks // // Inserts a key only if it's not already present bool add(K key, const int tid=0) { return tinystm::updateTx([&] () { return innerPut(key); }); } // Returns true only if the key was present bool remove(K key, const int tid=0) { return tinystm::updateTx([&] () { return innerRemove(key); }); } bool contains(K key, const int tid=0) { return tinystm::readTx([&] () { return innerGet(key); }); } // Used only for benchmarks void addAll(K** keys, const int size, const int tid=0) { for (int i = 0; i < size; i++) add(*keys[i]); } }; #endif /* _TINY_STM_RESIZABLE_HASH_MAP_H_ */ ================================================ FILE: datastructures/linkedlists/CRWWPLinkedListSet.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2018, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _CRWWP_STM_LINKED_LIST_SET_H_ #define _CRWWP_STM_LINKED_LIST_SET_H_ #include #include #include "stms/CRWWPSTM.hpp" /** *

A Linked List Set for CRWWP STM (blocking)

* * TODO * * * @author Pedro Ramalhete * @author Andreia Correia */ template class CRWWPLinkedListSet : public crwwpstm::tmbase { private: struct Node : public crwwpstm::tmbase { T key; crwwpstm::tmtype next {nullptr}; Node() {} Node(T key) : key{key} { } }; alignas(128) crwwpstm::tmtype head {nullptr}; alignas(128) crwwpstm::tmtype tail {nullptr}; public: CRWWPLinkedListSet(unsigned int maxThreads=0) { Node* lhead = new Node(); Node* ltail = new Node(); head = lhead; head->next = ltail; tail = ltail; } ~CRWWPLinkedListSet() { // Delete all the nodes in the list Node* prev = head; Node* node = prev->next; while (node != tail) { delete prev; prev = node; node = node->next; } delete prev; delete tail; } static std::string className() { return crwwpstm::CRWWPSTM::className() + "-LinkedListSet"; } /* * Progress Condition: blocking * Adds a node with a key, returns false if the key is already in the set */ bool add(T key, const int tid=0) { return crwwpstm::updateTx([&] () -> bool { Node* newNode = crwwpstm::tmNew(key); Node* prev = head; Node* node = prev->next; while (true) { if (node == tail) break; if (key == node->key) { crwwpstm::tmDelete(newNode); // If the key was already in the set, free the node that was never used return false; } if (node->key < key) break; prev = node; node = node->next; } prev->next = newNode; newNode->next = node; return true; }); } /* * Progress Condition: blocking * Removes a node with an key, returns false if the key is not in the set */ bool remove(T key, const int tid=0) { return crwwpstm::updateTx([&] () -> bool { Node* prev = head; Node* node = prev->next; while (true) { if (node == tail) return false; if (key == node->key) { prev->next = node->next; crwwpstm::tmDelete(node); return true; } if (node->key < key) return false; prev = node; node = node->next; } }); } /* * Progress Condition: blocking * Returns true if it finds a node with a matching key */ bool contains(T key, const int tid=0) { return crwwpstm::readTx([&] () -> bool { Node* node = head->next; while (true) { if (node == tail) return false; if (key == node->key) return true; if (node->key < key) return false; node = node->next; } }); } bool addAll(T** keys, int size, const int tid) { for (int i = 0; i < size; i++) add(*keys[i], tid); } }; #endif /* _C_RW_WP_STM_LINKED_LIST_SET_H_ */ ================================================ FILE: datastructures/linkedlists/ESTMLinkedListSet.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _ESTM_LINKED_LIST_SET_H_ #define _ESTM_LINKED_LIST_SET_H_ #include "../../stms/ESTM.hpp" // This header defines the macros for the STM being compiled /** *

A Linked List Set for Elastic STM

* When we make the 'ltail' optimization here, it causes a crash on ESTM, therefore we don't do it. */ template class ESTMLinkedListSet : public estm::tmbase { private: struct Node : public estm::tmbase { T key {}; estm::tmtype next {nullptr}; Node() {} Node(T key) : key{key} { } }; alignas(128) estm::tmtype head {nullptr}; alignas(128) estm::tmtype tail {nullptr}; public: ESTMLinkedListSet(unsigned int maxThreads=0) { estm::updateTx([&] () { Node* lhead = estm::tmNew(); Node* ltail = estm::tmNew(); head = lhead; head->next = ltail; tail = ltail; }); } ~ESTMLinkedListSet() { estm::updateTx([&] () { // Delete all the nodes in the list Node* prev = head; Node* node = prev->next; while (node != tail) { estm::tmDelete(prev); prev = node; node = node->next; } estm::tmDelete(prev); estm::tmDelete(tail.load()); }); } static std::string className() { return estm::ESTM::className() + "-LinkedListSet"; } /* * Progress Condition: blocking * Adds a node with a key, returns false if the key is already in the set */ bool add(T key, const int tid=0) { return estm::updateTx([this,key] () { Node* newNode = estm::tmNew(key); Node* prev = head; Node* node = prev->next; while (true) { if (node == tail) break; if (key == node->key) { estm::tmDelete(newNode); // If the key was already in the set, free the node that was never used return false; } if (node->key < key) break; prev = node; node = node->next; } prev->next = newNode; newNode->next = node; return true; }); } /* * Progress Condition: blocking * Removes a node with an key, returns false if the key is not in the set */ bool remove(T key, const int tid=0) { return estm::updateTx([this,key] () { Node* prev = head; Node* node = prev->next; while (true) { if (node == tail) return false; if (key == node->key) { prev->next = node->next; estm::tmDelete(node); return true; } if (node->key < key) return false; prev = node; node = node->next; } }); } /* * Progress Condition: blocking * Returns true if it finds a node with a matching key */ bool contains(T key, const int tid=0) { return estm::readTx([this,key] () { Node* node = head->next; while (true) { if (node == tail) return false; if (key == node->key) return true; if (node->key < key) return false; node = node->next; } }); } bool addAll(T** keys, int size, const int tid) { for (int i = 0; i < size; i++) add(*keys[i], tid); return true; } }; #endif /* _ESTM_LINKED_LIST_SET_H_ */ ================================================ FILE: datastructures/linkedlists/HazardEras.hpp ================================================ /****************************************************************************** * Copyright (c) 2016-2017, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _HAZARD_ERAS_H_ #define _HAZARD_ERAS_H_ #include #include #include #include /* *

Hazard Eras

* This a light-weight implementation of hazard eras, where each thread has a * thread-local list of retired objects. * * This is based on the paper "Hazard Eras - Non-Blocking Memory Reclamation" * by Pedro Ramalhete and Andreia Correia: * https://github.com/pramalhe/ConcurrencyFreaks/blob/master/papers/hazarderas-2017.pdf * * The type T is for the objects/nodes and it's it must have the members newEra, delEra * * R is zero. * *

* @author Pedro Ramalhete * @author Andreia Correia */ template class HazardEras { private: static const uint64_t NONE = 0; static const int HE_MAX_THREADS = 128; static const int MAX_HES = 5; // This is named 'K' in the HP paper static const int CLPAD = 128/sizeof(std::atomic); static const int HE_THRESHOLD_R = 0; // This is named 'R' in the HP paper const int maxHEs; const int maxThreads; alignas(128) std::atomic eraClock {1}; alignas(128) std::atomic* he[HE_MAX_THREADS]; // It's not nice that we have a lot of empty vectors, but we need padding to avoid false sharing alignas(128) std::vector retiredList[HE_MAX_THREADS*CLPAD]; public: HazardEras(int maxHEs=MAX_HES, int maxThreads=HE_MAX_THREADS) : maxHEs{maxHEs}, maxThreads{maxThreads} { for (int it = 0; it < HE_MAX_THREADS; it++) { he[it] = new std::atomic[CLPAD*2]; // We allocate four cache lines to allow for many hps and without false sharing retiredList[it*CLPAD].reserve(maxThreads*maxHEs); for (int ihe = 0; ihe < MAX_HES; ihe++) { he[it][ihe].store(NONE, std::memory_order_relaxed); } } static_assert(std::is_same::value, "T::newEra must be uint64_t"); static_assert(std::is_same::value, "T::delEra must be uint64_t"); } ~HazardEras() { for (int it = 0; it < HE_MAX_THREADS; it++) { delete[] he[it]; // Clear the current retired nodes for (unsigned iret = 0; iret < retiredList[it*CLPAD].size(); iret++) { delete retiredList[it*CLPAD][iret]; } } } inline uint64_t getEra() { return eraClock.load(); } /** * Progress Condition: wait-free bounded (by maxHEs) */ inline void clear(const int tid) { for (int ihe = 0; ihe < maxHEs; ihe++) { he[tid][ihe].store(NONE, std::memory_order_release); } } /** * Progress Condition: lock-free */ inline T* get_protected(int index, const std::atomic& atom, const int tid) { auto prevEra = he[tid][index].load(std::memory_order_relaxed); while (true) { T* ptr = atom.load(); auto era = eraClock.load(std::memory_order_acquire); if (era == prevEra) return ptr; he[tid][index].store(era); prevEra = era; } } inline void protectEraRelease(int index, int other, const int tid) { auto era = he[tid][other].load(std::memory_order_relaxed); if (he[tid][index].load(std::memory_order_relaxed) == era) return; he[tid][index].store(era, std::memory_order_release); } /* * Does a single iteration. Must be integrated into the algorithm that's using HE. * In other words, we must re-check if era has changed * * Progress Condition: wait-free population oblivious */ inline T* protectPtr(int index, const std::atomic& atom, uint64_t& prevEra, const int tid) { T* ptr = atom.load(std::memory_order_acquire); auto era = eraClock.load(); if (prevEra != era) { prevEra = era; he[tid][index].store(era, std::memory_order_relaxed); std::atomic_thread_fence(std::memory_order_seq_cst); } return ptr; } /** * Retire an object (node) * Progress Condition: wait-free bounded * * Doing rlist.erase() is not the most efficient way to remove entries from a std::vector, but ok... */ void retire(T* ptr, const int mytid) { auto currEra = eraClock.load(); ptr->delEra = currEra; auto& rlist = retiredList[mytid*CLPAD]; rlist.push_back(ptr); if (eraClock == currEra) eraClock.fetch_add(1); for (unsigned iret = 0; iret < rlist.size();) { auto obj = rlist[iret]; if (canDelete(obj, mytid)) { rlist.erase(rlist.begin() + iret); delete obj; continue; } iret++; } } private: bool canDelete(T* obj, const int mytid) { for (int tid = 0; tid < maxThreads; tid++) { for (int ihe = 0; ihe < maxHEs; ihe++) { const auto era = he[tid][ihe].load(std::memory_order_acquire); if (era == NONE || era < obj->newEra || era > obj->delEra) continue; return false; } } return true; } }; #endif /* _HAZARD_ERAS_H_ */ ================================================ FILE: datastructures/linkedlists/HazardPointers.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2017, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _HAZARD_POINTERS_H_ #define _HAZARD_POINTERS_H_ #include #include #include template class HazardPointers { private: static const int HP_MAX_THREADS = 128; static const int HP_MAX_HPS = 5; // This is named 'K' in the HP paper static const int CLPAD = 128/sizeof(std::atomic); static const int HP_THRESHOLD_R = 0; // This is named 'R' in the HP paper static const int MAX_RETIRED = HP_MAX_THREADS*HP_MAX_HPS; // Maximum number of retired objects per thread const int maxHPs; const int maxThreads; alignas(128) std::atomic* hp[HP_MAX_THREADS]; // It's not nice that we have a lot of empty vectors, but we need padding to avoid false sharing alignas(128) std::vector retiredList[HP_MAX_THREADS*CLPAD]; public: HazardPointers(int maxHPs=HP_MAX_HPS, int maxThreads=HP_MAX_THREADS) : maxHPs{maxHPs}, maxThreads{maxThreads} { for (int it = 0; it < HP_MAX_THREADS; it++) { hp[it] = new std::atomic[CLPAD*2]; // We allocate four cache lines to allow for many hps and without false sharing retiredList[it*CLPAD].reserve(MAX_RETIRED); for (int ihp = 0; ihp < HP_MAX_HPS; ihp++) { hp[it][ihp].store(nullptr, std::memory_order_relaxed); } } } ~HazardPointers() { for (int it = 0; it < HP_MAX_THREADS; it++) { delete[] hp[it]; // Clear the current retired nodes for (unsigned iret = 0; iret < retiredList[it*CLPAD].size(); iret++) { delete retiredList[it*CLPAD][iret]; } } } /** * Progress Condition: wait-free bounded (by maxHPs) */ inline void clear(const int tid) { for (int ihp = 0; ihp < maxHPs; ihp++) { hp[tid][ihp].store(nullptr, std::memory_order_release); } } /** * Progress Condition: wait-free population oblivious */ inline void clearOne(int ihp, const int tid) { hp[tid][ihp].store(nullptr, std::memory_order_release); } /** * Progress Condition: lock-free */ inline T* protect(int index, const std::atomic& atom, const int tid) { T* n = nullptr; T* ret; while ((ret = atom.load()) != n) { hp[tid][index].store(ret); n = ret; } return ret; } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free population oblivious */ inline T* protectPtr(int index, T* ptr, const int tid) { hp[tid][index].store(ptr); /* // For x86-only implementations, use this instead (it's 2x faster than mfence on x86): hp[tid][index].store(ptr, std::memory_order_release); __asm__ __volatile__ ("lock;addl $0,(%%rsp);" ::: "cc","memory") ; */ return ptr; } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free population oblivious */ inline T* protectPtrRelease(int index, T* ptr, const int tid) { hp[tid][index].store(ptr, std::memory_order_release); return ptr; } /** * Progress Condition: wait-free bounded (by the number of threads squared) */ void retire(T* ptr, const int tid) { retiredList[tid*CLPAD].push_back(ptr); if (retiredList[tid*CLPAD].size() < HP_THRESHOLD_R) return; for (unsigned iret = 0; iret < retiredList[tid*CLPAD].size();) { auto obj = retiredList[tid*CLPAD][iret]; bool canDelete = true; for (int tid = 0; tid < maxThreads && canDelete; tid++) { for (int ihp = 0; ihp < maxHPs; ihp++) { if (hp[tid][ihp].load() == obj) { canDelete = false; break; } } } if (canDelete) { retiredList[tid*CLPAD].erase(retiredList[tid*CLPAD].begin() + iret); delete obj; continue; } iret++; } } }; #endif /* _HAZARD_POINTERS_H_ */ ================================================ FILE: datastructures/linkedlists/MagedHarrisLinkedListSetHE.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _MAGED_M_MICHAEL_LINKED_LIST_HE_H_ #define _MAGED_M_MICHAEL_LINKED_LIST_HE_H_ #include #include #include #include #include #include #include "common/HazardEras.hpp" /** * This is the linked list by Maged M. Michael that uses Hazard Eras. * Lock-Free Linked List as described in Maged M. Michael paper (Figure 7): * http://www.cs.tau.ac.il/~afek/p73-Lock-Free-HashTbls-michael.pdf * *

* This set has three operations: *

    *
  • add(x) - Lock-Free *
  • remove(x) - Lock-Free *
  • contains(x) - Lock-Free *

*

* @author Pedro Ramalhete * @author Andreia Correia */ template class MagedHarrisLinkedListSetHE { private: struct Node { T key; uint64_t newEra; uint64_t delEra; std::atomic next; Node(T key, uint64_t newEra) : key{key}, newEra{newEra}, delEra{0}, next{nullptr} { } bool casNext(Node *cmp, Node *val) { return next.compare_exchange_strong(cmp, val); } }; // Pointers to head and tail sentinel nodes of the list std::atomic head; std::atomic tail; const int maxThreads; HazardEras he {3, maxThreads}; const int kHp0 = 0; // Protects next const int kHp1 = 1; // Protects curr const int kHp2 = 2; // Protects prev public: MagedHarrisLinkedListSetHE(const int maxThreads) : maxThreads{maxThreads} { head.store(new Node({}, 1)); // Uses K's default constructor tail.store(new Node({}, 1)); // Uses K's default constructor head.load()->next.store(tail.load()); } // We don't expect the destructor to be called if this instance can still be in use ~MagedHarrisLinkedListSetHE() { Node *prev = head.load(); Node *node = prev->next.load(); while (node != nullptr) { delete prev; prev = node; node = prev->next.load(); } delete prev; } static std::string className() { return "MagedHarris-LinkedListSetHE"; } /* * This function is single threaded to be called at the start of the test. * It is assumed keys are ordered */ void addAll(T** keys, const int size, const int tid) { Node* node = head; for(int i=0;inext.store(newNode, std::memory_order_relaxed); node = newNode; } node->next.store(tail.load(std::memory_order_relaxed), std::memory_order_relaxed); } /** * This method is named 'Insert()' in the original paper. * Taken from Figure 7 of the paper: * "High Performance Dynamic Lock-Free Hash Tables and List-Based Sets" *

* Progress Condition: Lock-Free * */ bool add(T key, const int tid) { Node *curr, *next; std::atomic *prev; Node* newNode = new Node(key, he.getEra()); while (true) { if (find(&key, &prev, &curr, &next, tid)) { delete newNode; // There is already a matching key he.clear(tid); return false; } newNode->next.store(curr, std::memory_order_relaxed); Node *tmp = getUnmarked(curr); if (prev->compare_exchange_strong(tmp, newNode)) { // seq-cst he.clear(tid); return true; } } } /** * This method is named 'Delete()' in the original paper. * Taken from Figure 7 of the paper: * "High Performance Dynamic Lock-Free Hash Tables and List-Based Sets" */ bool remove(T key, const int tid) { Node *curr, *next; std::atomic *prev; while (true) { /* Try to find the key in the list. */ if (!find(&key, &prev, &curr, &next, tid)) { he.clear(tid); return false; } /* Mark if needed. */ Node *tmp = getUnmarked(next); if (!curr->next.compare_exchange_strong(tmp, getMarked(next))) { continue; /* Another thread interfered. */ } tmp = getUnmarked(curr); if (prev->compare_exchange_strong(tmp, getUnmarked(next))) { /* Unlink */ he.clear(tid); he.retire(getUnmarked(curr), tid); /* Reclaim */ } else { he.clear(tid); } /* * If we want to prevent the possibility of there being an * unbounded number of unmarked nodes, add "else _find(head,key)." * This is not necessary for correctness. */ return true; } } /** * This is named 'Search()' on the original paper * Taken from Figure 7 of the paper: * "High Performance Dynamic Lock-Free Hash Tables and List-Based Sets" *

* Progress Condition: Lock-Free */ bool contains(T key, const int tid) { Node *curr, *next; std::atomic *prev; bool isContains = find(&key, &prev, &curr, &next, tid); he.clear(tid); return isContains; } private: /** *

* Progress Condition: Lock-Free */ bool find (T* key, std::atomic **par_prev, Node **par_curr, Node **par_next, const int tid) { std::atomic *prev; Node *curr, *next; try_again: prev = &head; // Protect curr with a hazard era curr = he.get_protected(kHp1, *prev, tid); while (true) { if (getUnmarked(curr) == nullptr) break; // TODO: Will it ever happen? // Protect next with a hazard era. next = he.get_protected(kHp0, curr->next, tid); if (getUnmarked(curr)->next.load() != next) goto try_again; if (getUnmarked(next) == tail.load()) break; if (prev->load() != getUnmarked(curr)) goto try_again; if (getUnmarked(next) == next) { // !cmark in the paper if (!(getUnmarked(curr)->key < *key)) { // Check for null to handle head and tail *par_curr = curr; *par_prev = prev; *par_next = next; return (getUnmarked(curr)->key == *key); } prev = &getUnmarked(curr)->next; he.protectEraRelease(kHp2, kHp1, tid); } else { // Update the link and retire the node. Node *tmp = getUnmarked(curr); if (!prev->compare_exchange_strong(tmp, getUnmarked(next))) { goto try_again; } he.retire(getUnmarked(curr), tid); } curr = next; he.protectEraRelease(kHp1, kHp0, tid); } *par_curr = curr; *par_prev = prev; *par_next = next; return false; } bool isMarked(Node * node) { return ((size_t) node & 0x1); } Node * getMarked(Node * node) { return (Node*)((size_t) node | 0x1); } Node * getUnmarked(Node * node) { return (Node*)((size_t) node & (~0x1)); } }; #endif /* _MAGED_M_MICHAEL_LINKED_LIST_HE_H_ */ ================================================ FILE: datastructures/linkedlists/MagedHarrisLinkedListSetHP.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _MAGED_MICHAEL_TIM_HARRIS_LINKED_LIST_HP_H_ #define _MAGED_MICHAEL_TIM_HARRIS_LINKED_LIST_HP_H_ #include #include #include #include #include #include #include "common/HazardPointers.hpp" /** * This is the linked list by Maged M. Michael that uses Hazard Pointers in * a correct way because Harris original algorithm with HPs doesn't. * Lock-Free Linked List as described in Maged M. Michael paper (Figure 4): * http://www.cs.tau.ac.il/~afek/p73-Lock-Free-HashTbls-michael.pdf * * *

* This set has three operations: *

    *
  • add(x) - Lock-Free *
  • remove(x) - Lock-Free *
  • contains(x) - Lock-Free *

*

* @author Pedro Ramalhete * @author Andreia Correia */ template class MagedHarrisLinkedListSetHP { private: struct Node { T key; std::atomic next; Node(T key) : key{key}, next{nullptr} { } bool casNext(Node *cmp, Node *val) { return next.compare_exchange_strong(cmp, val); } }; // Pointers to head and tail sentinel nodes of the list std::atomic head; std::atomic tail; const int maxThreads; // We need 3 hazard pointers HazardPointers hp {3, maxThreads}; const int kHp0 = 0; // Protects next const int kHp1 = 1; // Protects curr const int kHp2 = 2; // Protects prev public: MagedHarrisLinkedListSetHP(const int maxThreads) : maxThreads{maxThreads} { head.store(new Node({})); tail.store(new Node({})); head.load()->next.store(tail.load()); } // We don't expect the destructor to be called if this instance can still be in use ~MagedHarrisLinkedListSetHP() { Node *prev = head.load(); Node *node = prev->next.load(); while (node != nullptr) { delete prev; prev = node; node = prev->next.load(); } delete prev; } static std::string className() { return "MagedHarris-LinkedListSetHP"; } /* * This function is single threaded to be called at the start of the test. * It is assumed keys are ordered */ void addAll(T** keys, const int size, const int tid) { Node* node = head; for(int i=0;inext.store(newNode, std::memory_order_relaxed); node = newNode; } node->next.store(tail.load(std::memory_order_relaxed), std::memory_order_relaxed); } /** * This method is named 'Insert()' in the original paper. * Taken from Figure 7 of the paper: * "High Performance Dynamic Lock-Free Hash Tables and List-Based Sets" *

* Progress Condition: Lock-Free * */ bool add(T key, const int tid) { Node *curr, *next; std::atomic *prev; Node* newNode = new Node(key); while (true) { if (find(&key, &prev, &curr, &next, tid)) { delete newNode; // There is already a matching key hp.clear(tid); return false; } newNode->next.store(curr, std::memory_order_relaxed); Node *tmp = getUnmarked(curr); if (prev->compare_exchange_strong(tmp, newNode)) { // seq-cst hp.clear(tid); return true; } } } /** * This method is named 'Delete()' in the original paper. * Taken from Figure 7 of the paper: * "High Performance Dynamic Lock-Free Hash Tables and List-Based Sets" */ bool remove(T key, const int tid) { Node *curr, *next; std::atomic *prev; while (true) { /* Try to find the key in the list. */ if (!find(&key, &prev, &curr, &next, tid)) { hp.clear(tid); return false; } /* Mark if needed. */ Node *tmp = getUnmarked(next); if (!curr->next.compare_exchange_strong(tmp, getMarked(next))) { continue; /* Another thread interfered. */ } tmp = getUnmarked(curr); if (prev->compare_exchange_strong(tmp, getUnmarked(next))) { /* Unlink */ hp.clear(tid); hp.retire(getUnmarked(curr), tid); /* Reclaim */ } else { hp.clear(tid); } /* * If we want to prevent the possibility of there being an * unbounded number of unmarked nodes, add "else _find(head,key)." * This is not necessary for correctness. */ return true; } } /** * This is named 'Search()' on the original paper * Taken from Figure 7 of the paper: * "High Performance Dynamic Lock-Free Hash Tables and List-Based Sets" *

* Progress Condition: Lock-Free */ bool contains(T key, const int tid) { Node *curr, *next; std::atomic *prev; bool isContains = find(&key, &prev, &curr, &next, tid); hp.clear(tid); return isContains; } private: /** * TODO: This needs to be code reviewed... it's not production-ready *

* Progress Condition: Lock-Free */ bool find (T* key, std::atomic **par_prev, Node **par_curr, Node **par_next, const int tid) { std::atomic *prev; Node *curr, *next; try_again: prev = &head; curr = prev->load(); // Protect curr with a hazard pointer. hp.protectPtr(kHp1, curr, tid); if (prev->load() != getUnmarked(curr)) goto try_again; while (true) { if (getUnmarked(curr) == nullptr) break; // Protect next with a hazard pointer. next = curr->next.load(); hp.protectPtr(kHp0, getUnmarked(next), tid); if (getUnmarked(curr)->next.load() != next) goto try_again; if (getUnmarked(next) == tail.load()) break; if (prev->load() != getUnmarked(curr)) goto try_again; if (getUnmarked(next) == next) { // !cmark in the paper if (!(getUnmarked(curr)->key < *key)) { // Check for null to handle head and tail *par_curr = curr; *par_prev = prev; *par_next = next; return (getUnmarked(curr)->key == *key); } prev = &getUnmarked(curr)->next; hp.protectRelease(kHp2, getUnmarked(curr), tid); } else { // Update the link and retire the node. Node *tmp = getUnmarked(curr); if (!prev->compare_exchange_strong(tmp, getUnmarked(next))) { goto try_again; } hp.retire(getUnmarked(curr), tid); } curr = next; hp.protectRelease(kHp1, getUnmarked(next), tid); } *par_curr = curr; *par_prev = prev; *par_next = next; return false; } bool isMarked(Node * node) { return ((size_t) node & 0x1); } Node * getMarked(Node * node) { return (Node*)((size_t) node | 0x1); } Node * getUnmarked(Node * node) { return (Node*)((size_t) node & (~0x1)); } }; #endif /* _MAGED_MICHAEL_TIM_HARRIS_LINKED_LIST_HP_H_ */ ================================================ FILE: datastructures/linkedlists/OFLFLinkedListSet.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _ONEFILE_LF_LINKED_LIST_SET_H_ #define _ONEFILE_LF_LINKED_LIST_SET_H_ #include #include #include "stms/OneFileLF.hpp" /** *

A Linked List Set for One-File STM (Lock-Free)

*/ template class OFLFLinkedListSet : public oflf::tmbase { private: struct Node : public oflf::tmbase { T key {}; oflf::tmtype next {nullptr}; Node() {} Node(T key) : key{key} { } }; alignas(128) oflf::tmtype head {nullptr}; alignas(128) oflf::tmtype tail {nullptr}; public: OFLFLinkedListSet(unsigned int maxThreads=0) { oflf::updateTx([this] () { Node* lhead = oflf::tmNew(); Node* ltail = oflf::tmNew(); head = lhead; head->next = ltail; tail = ltail; }); } ~OFLFLinkedListSet() { oflf::updateTx([this] () { // Delete all the nodes in the list Node* prev = head; Node* node = prev->next; while (node != tail) { oflf::tmDelete(prev); prev = node; node = node->next; } oflf::tmDelete(prev); oflf::tmDelete(tail.pload()); }); } static std::string className() { return oflf::OneFileLF::className() + "-LinkedListSet"; } /* * Progress Condition: lock-free * Adds a node with a key, returns false if the key is already in the set */ bool add(T key, const int tid=0) { return oflf::updateTx([this,key] () -> bool { Node* newNode = oflf::tmNew(key); Node* prev = head; Node* node = prev->next; Node* ltail = tail; while (true) { if (node == ltail) break; T nkey = node->key; if (key == nkey) { oflf::tmDelete(newNode); // If the key was already in the set, free the node that was never used return false; } if (nkey < key) break; prev = node; node = node->next; } prev->next = newNode; newNode->next = node; return true; }); } /* * Progress Condition: lock-free * Removes a node with an key, returns false if the key is not in the set */ bool remove(T key, const int tid=0) { return oflf::updateTx([this,key] () -> bool { Node* prev = head; Node* node = prev->next; Node* ltail = tail; while (true) { if (node == ltail) return false; T nkey = node->key; if (key == nkey) { prev->next = node->next; oflf::tmDelete(node); return true; } if (nkey < key) return false; prev = node; node = node->next; } }); } /* * Progress Condition: lock-free * Returns true if it finds a node with a matching key */ bool contains(T key, const int tid=0) { return oflf::readTx([this,key] () -> bool { Node* node = head->next; Node* ltail = tail; while (true) { if (node == ltail) return false; T nkey = node->key; if (key == nkey) return true; if (nkey < key) return false; node = node->next; } }); } bool addAll(T** keys, int size, const int tid) { for (int i = 0; i < size; i++) add(*keys[i], tid); return true; } }; #endif /* _ONE_FILE_LF_LINKED_LIST_SET_H_ */ ================================================ FILE: datastructures/linkedlists/OFWFLinkedListSet.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _ONEFILE_WF_LINKED_LIST_SET_H_ #define _ONEFILE_WF_LINKED_LIST_SET_H_ #include #include #include "stms/OneFileWF.hpp" /** *

A Linked List Set for One-File STM (wait-Free)

*/ template class OFWFLinkedListSet : public ofwf::tmbase { private: struct Node : public ofwf::tmbase { T key {}; ofwf::tmtype next {nullptr}; Node(T key) : key{key} { } Node() {} }; alignas(128) ofwf::tmtype head {nullptr}; alignas(128) ofwf::tmtype tail {nullptr}; public: OFWFLinkedListSet(unsigned int maxThreads=0) { ofwf::updateTx([this] () { Node* lhead = ofwf::tmNew(); Node* ltail = ofwf::tmNew(); head = lhead; head->next = ltail; tail = ltail; }); } ~OFWFLinkedListSet() { ofwf::updateTx([this] () { // Delete all the nodes in the list Node* prev = head; Node* node = prev->next; while (node != tail) { ofwf::tmDelete(prev); prev = node; node = node->next; } ofwf::tmDelete(prev); ofwf::tmDelete(tail.pload()); }); } static std::string className() { return ofwf::OneFileWF::className() + "-LinkedListSet"; } /* * Progress Condition: wait-free * Adds a node with a key, returns false if the key is already in the set */ bool add(T key, const int tid=0) { return ofwf::updateTx([this,key] () { Node* newNode = ofwf::tmNew(key); Node* prev = head; Node* node = prev->next; Node* ltail = tail; while (true) { if (node == ltail) break; T nkey = node->key; if (key == nkey) { ofwf::tmDelete(newNode); // If the key was already in the set, free the node that was never used return false; } if (nkey < key) break; prev = node; node = node->next; } prev->next = newNode; newNode->next = node; return true; }); } /* * Progress Condition: wait-free * Removes a node with an key, returns false if the key is not in the set */ bool remove(T key, const int tid=0) { return ofwf::updateTx([this,key] () { Node* prev = head; Node* node = prev->next; Node* ltail = tail; while (true) { if (node == ltail) return false; T nkey = node->key; if (key == nkey) { prev->next = node->next; ofwf::tmDelete(node); return true; } if (nkey < key) return false; prev = node; node = node->next; } }); } /* * Progress Condition: wait-free * Returns true if it finds a node with a matching key */ bool contains(T key, const int tid=0) { return ofwf::readTx([this,key] () { Node* node = head->next; Node* ltail = tail; while (true) { if (node == ltail) return false; T nkey = node->key; if (key == nkey) return true; if (nkey < key) return false; node = node->next; } }); } bool addAll(T** keys, int size, const int tid) { for (int i = 0; i < size; i++) add(*keys[i], tid); return true; } }; #endif /* _ONE_FILE_WF_LINKED_LIST_SET_H_ */ ================================================ FILE: datastructures/linkedlists/STMLinkedListSet.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2018, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _STM_LINKED_LIST_SET_H_ #define _STM_LINKED_LIST_SET_H_ #include #include //#include "stms/OneFileLF.hpp" /** *

A Linked List Set for an STM

* * TODO * * * @author Pedro Ramalhete * @author Andreia Correia */ template class TMTYPE, template class TMBASE> class STMLinkedListSet : public TMBASE { private: struct Node : public TMBASE { T* key; TMTYPE next; Node(T* key) : key{key}, next{nullptr} { } }; alignas(128) TMTYPE head {nullptr}; alignas(128) TMTYPE tail {nullptr}; public: STMLinkedListSet(unsigned int maxThreads=0) { Node* lhead = new Node(nullptr); Node* ltail = new Node(nullptr); head = lhead; head->next = ltail; tail = ltail; } ~STMLinkedListSet() { // Delete all the nodes in the list Node* prev = head; Node* node = prev->next; while (node != tail) { delete prev; prev = node; node = node->next; } delete prev; delete tail; } static std::string className() { return TM::className() + "-LinkedListSet"; } /* * Progress Condition: lock-free * Adds a node with a key, returns false if the key is already in the set */ bool add(T* key, const int tid=0) { if (key == nullptr) throw std::invalid_argument("key can not be nullptr"); return TM::updateTx([this,key] () -> bool { Node* newNode = TM::tmNew(key); Node* prev = head; Node* node = prev->next; while (true) { if (node == tail) break; if (*key == *node->key) { TM::tmDelete(newNode); // If the key was already in the set, free the node that was never used return false; } if (*(node->key) < *key) break; prev = node; node = node->next; } prev->next = newNode; newNode->next = node; return true; }); } /* * Progress Condition: lock-free * Removes a node with an key, returns false if the key is not in the set */ bool remove(T* key, const int tid=0) { if (key == nullptr) throw std::invalid_argument("key can not be nullptr"); return TM::updateTx([this,key] () -> bool { Node* prev = head; Node* node = prev->next; while (true) { if (node == tail) return false; if (*key == *node->key) { prev->next = node->next; TM::tmDelete(node); return true; } if (*(node->key) < *key) return false; prev = node; node = node->next; } }); } /* * Progress Condition: lock-free * Returns true if it finds a node with a matching key */ bool contains(T* key, const int tid=0) { if (key == nullptr) throw std::invalid_argument("key can not be nullptr"); return TM::readTx([this,key] () -> bool { Node* node = head->next; while (true) { if (node == tail) return false; if (*key == *node->key) return true; if (*(node->key) < *key) return false; node = node->next; } }); } bool addAll(T** keys, int size, const int tid) { for (int i = 0; i < size; i++) add(keys[i], tid); } }; #endif /* _ONE_FILE_LF_LINKED_LIST_SET_H_ */ ================================================ FILE: datastructures/linkedlists/TinySTMLinkedListSet.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _TINY_STM_LINKED_LIST_SET_H_ #define _TINY_STM_LINKED_LIST_SET_H_ #include "stms/TinySTM.hpp" /** *

A Linked List Set for usage with TinySTM

*/ template class TinySTMLinkedListSet : public tinystm::tmbase { private: struct Node : public tinystm::tmbase { T key; tinystm::tmtype next{nullptr}; Node() {} Node(T key) : key{key} { } }; alignas(128) tinystm::tmtype head {nullptr}; alignas(128) tinystm::tmtype tail {nullptr}; public: TinySTMLinkedListSet(unsigned int maxThreads=0) { tinystm::updateTx([this] () { Node* lhead = tinystm::tmNew(); Node* ltail = tinystm::tmNew(); head = lhead; head->next = ltail; tail = ltail; return true; }); } ~TinySTMLinkedListSet() { tinystm::updateTx([this] () { // Delete all the nodes in the list Node* prev = head; Node* node = prev->next; while (node != tail) { tinystm::tmDelete(prev); prev = node; node = node->next; } tinystm::tmDelete(prev); tinystm::tmDelete(tail.load()); return true; }); } static std::string className() { return "TinySTM-LinkedListSet"; } /* * Progress Condition: blocking * Adds a node with a key, returns false if the key is already in the set */ bool add(T key, const int tid=0) { return tinystm::updateTx([this,key] () { Node* newNode = tinystm::tmNew(key); Node* prev = head; Node* node = prev->next; Node* ltail = tail; while (true) { if (node == ltail) break; T nkey = node->key; if (key == nkey) { tinystm::tmDelete(newNode); // If the key was already in the set, free the node that was never used return false; } if (nkey < key) break; prev = node; node = node->next; } prev->next = newNode; newNode->next = node; return true; }); } /* * Progress Condition: blocking * Removes a node with an key, returns false if the key is not in the set */ bool remove(T key, const int tid=0) { return tinystm::updateTx([this,key] () { Node* prev = head; Node* node = prev->next; Node* ltail = tail; while (true) { if (node == ltail) return false; T nkey = node->key; if (key == nkey) { prev->next = node->next; tinystm::tmDelete(node); return true; } if (nkey < key) return false; prev = node; node = node->next; } }); } /* * Progress Condition: blocking * Returns true if it finds a node with a matching key */ bool contains(T key, const int tid=0) { return tinystm::readTx([this,key] () { Node* node = head->next; Node* ltail = tail; while (true) { if (node == ltail) return false; T nkey = node->key; if (key == nkey) return true; if (nkey < key) return false; node = node->next; } }); } bool addAll(T** keys, int size, const int tid) { for (int i = 0; i < size; i++) add(*keys[i], tid); } }; #endif /* _TINY_STM_LINKED_LIST_SET_H_ */ ================================================ FILE: datastructures/queues/CRWWPLinkedListQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2017, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _CRWWP_LINKED_LIST_QUEUE_H_ #define _CRWWP_LINKED_LIST_QUEUE_H_ #include #include #include "../../stms/CRWWPSTM.hpp" /** *

A Linked List queue using C-RW-WP STM

* * * TODO * * * enqueue algorithm: sequential implementation + MWC * dequeue algorithm: sequential implementation + MWC * Consistency: Linearizable * enqueue() progress: lock-free * dequeue() progress: lock-free * Memory Reclamation: Hazard Eras (integrated into MWC) * enqueue min ops: 2 DCAS + 1 CAS * dequeue min ops: 1 DCAS + 1 CAS * * @author Pedro Ramalhete * @author Andreia Correia */ template class CRWWPLinkedListQueue { private: struct Node : crwwpstm::tmbase { T* item; crwwpstm::tmtype next; Node(T* userItem) : item{userItem}, next{nullptr} { } }; alignas(128) crwwpstm::tmtype head {nullptr}; alignas(128) crwwpstm::tmtype tail {nullptr}; public: CRWWPLinkedListQueue(unsigned int maxThreads=0) { Node* sentinelNode = new Node(nullptr); head = sentinelNode; tail = sentinelNode; } ~CRWWPLinkedListQueue() { while (dequeue() != nullptr); // Drain the queue Node* lhead = head; delete lhead; } static std::string className() { return "CRWWP-LinkedListQueue"; } /* * Progress Condition: lock-free * Always returns true */ bool enqueue(T* item, const int tid=0) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); Node* newNode = crwwpstm::tmNew(item); // Let's allocate outside the transaction, less overhead return crwwpstm::updateTx([this,&newNode] () -> bool { tail->next = newNode; tail = newNode; return true; }); } /* * Progress Condition: lock-free */ T* dequeue(const int tid=0) { return crwwpstm::updateTx([this] () -> T* { Node* lhead = head; if (lhead == tail) return nullptr; head = lhead->next; crwwpstm::tmDelete(lhead); return head->item; }); } }; #endif /* _CRWWP_TM_LINKED_LIST_QUEUE_H_ */ ================================================ FILE: datastructures/queues/ESTMArrayLinkedListQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2017, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _ELASTIC_STM_ARRAY_LINKED_LIST_QUEUE_H_ #define _ELASTIC_STM_ARRAY_LINKED_LIST_QUEUE_H_ #include #include #include "stms/ESTM.hpp" /** *

An Array Linked List Queue using OneFile STM (Wait-Free)

* * TODO * * * enqueue algorithm: sequential implementation + MWC * dequeue algorithm: sequential implementation + MWC * Consistency: Linearizable * enqueue() progress: lock-free * dequeue() progress: lock-free * Memory Reclamation: Hazard Eras (integrated into MWC) * enqueue min ops: 2 DCAS + 1 CAS * dequeue min ops: 1 DCAS + 1 CAS * * @author Pedro Ramalhete * @author Andreia Correia */ template class ESTMArrayLinkedListQueue { private: struct Node : estm::tmbase { static const int ITEM_NUM = 1024; estm::tmtype headidx {0}; estm::tmtype items[ITEM_NUM]; estm::tmtype tailidx {0}; estm::tmtype next {nullptr}; Node(T* item) { items[0] = item; tailidx = 1; headidx = 0; for (int i = 1; i < ITEM_NUM; i++) items[i] = nullptr; } }; alignas(128) estm::tmtype head {nullptr}; alignas(128) estm::tmtype tail {nullptr}; public: ESTMArrayLinkedListQueue(unsigned int maxThreads=0) { estm::updateTx([this] () { Node* sentinelNode = estm::tmNew(nullptr); sentinelNode->tailidx = 0; head = sentinelNode; tail = sentinelNode; return true; }); } ~ESTMArrayLinkedListQueue() { while (dequeue() != nullptr); // Drain the queue estm::updateTx([this] () { Node* lhead = head; estm::tmDelete(lhead); return true; }); } static std::string className() { return "ESTM-ArrayLinkedListQueue"; } /* * Progress Condition: blocking * Always returns true */ bool enqueue(T* item, const int tid=0) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); return estm::updateTx([this,item] () -> bool { Node* ltail = tail; uint64_t ltailidx = ltail->tailidx; if (ltailidx < Node::ITEM_NUM) { ltail->items[ltailidx] = item; ++ltail->tailidx; return true; } Node* newNode = estm::tmNew(item); tail->next = newNode; tail = newNode; return true; }); } /* * Progress Condition: blocking */ T* dequeue(const int tid=0) { return estm::updateTx([this] () -> T* { Node* lhead = head; uint64_t lheadidx = lhead->headidx; // Check if queue is empty if (lhead == tail && lheadidx == tail->tailidx) return nullptr; if (lheadidx < Node::ITEM_NUM) { ++lhead->headidx; return lhead->items[lheadidx]; } lhead = lhead->next; estm::tmDelete(head.load()); head = lhead; ++lhead->headidx; return lhead->items[0]; }); } }; #endif /* _OF_WF_ARRAY_LINKED_LIST_QUEUE_H_ */ ================================================ FILE: datastructures/queues/ESTMLinkedListQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2018, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _ELASTIC_STM_LINKED_LIST_QUEUE_H_ #define _ELASTIC_STM_LINKED_LIST_QUEUE_H_ #include #include #include "stms/ESTM.hpp" /** *

A Linked List queue using Elastic STM (blocking)

* * * TODO * * @author Pedro Ramalhete * @author Andreia Correia */ template class ESTMLinkedListQueue { private: struct Node : estm::tmbase { T* item; estm::tmtype next {nullptr}; Node(T* userItem) : item{userItem} { } }; alignas(128) estm::tmtype head {nullptr}; alignas(128) estm::tmtype tail {nullptr}; public: ESTMLinkedListQueue(unsigned int maxThreads=0) { estm::updateTx([this] () { Node* sentinelNode = estm::tmNew(nullptr); head = sentinelNode; tail = sentinelNode; return true; }); } ~ESTMLinkedListQueue() { while (dequeue() != nullptr); // Drain the queue estm::updateTx([this] () { Node* lhead = head; estm::tmDelete(lhead); return true; }); } static std::string className() { return "ESTM-LinkedListQueue"; } /* * Progress Condition: blocking * Always returns true */ bool enqueue(T* item, const int tid=0) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); return estm::updateTx([this,item] () -> bool { Node* newNode = estm::tmNew(item); tail->next = newNode; tail = newNode; return true; }); } /* * Progress Condition: blocking */ T* dequeue(const int tid=0) { return estm::updateTx([this] () -> T* { Node* lhead = head; if (lhead == tail) return nullptr; head = lhead->next; estm::tmDelete(lhead); return head->item; }); } }; #endif /* _ESTM_LINKED_LIST_QUEUE_H_ */ ================================================ FILE: datastructures/queues/FAAArrayQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _FAA_ARRAY_QUEUE_HP_H_ #define _FAA_ARRAY_QUEUE_HP_H_ #include #include #include "HazardPointers.hpp" /** *

Fetch-And-Add Array Queue

* * Each node has one array but we don't search for a vacant entry. Instead, we * use FAA to obtain an index in the array, for enqueueing or dequeuing. * * There are some similarities between this queue and the basic queue in YMC: * http://chaoran.me/assets/pdf/wfq-ppopp16.pdf * but it's not the same because the queue in listing 1 is obstruction-free, while * our algorithm is lock-free. * In FAAArrayQueue eventually a new node will be inserted (using Michael-Scott's * algorithm) and it will have an item pre-filled in the first position, which means * that at most, after BUFFER_SIZE steps, one item will be enqueued (and it can then * be dequeued). This kind of progress is lock-free. * * Each entry in the array may contain one of three possible values: * - A valid item that has been enqueued; * - nullptr, which means no item has yet been enqueued in that position; * - taken, a special value that means there was an item but it has been dequeued; * * Enqueue algorithm: FAA + CAS(null,item) * Dequeue algorithm: FAA + CAS(item,taken) * Consistency: Linearizable * enqueue() progress: lock-free * dequeue() progress: lock-free * Memory Reclamation: Hazard Pointers (lock-free) * Uncontended enqueue: 1 FAA + 1 CAS + 1 HP * Uncontended dequeue: 1 FAA + 1 CAS + 1 HP * * *

* Lock-Free Linked List as described in Maged Michael and Michael Scott's paper: * {@link http://www.cs.rochester.edu/~scott/papers/1996_PODC_queues.pdf} * * Simple, Fast, and Practical Non-Blocking and Blocking Concurrent Queue Algorithms *

* The paper on Hazard Pointers is named "Hazard Pointers: Safe Memory * Reclamation for Lock-Free objects" and it is available here: * http://web.cecs.pdx.edu/~walpole/class/cs510/papers/11.pdf * * @author Pedro Ramalhete * @author Andreia Correia */ template class FAAArrayQueue { static const long BUFFER_SIZE = 1024; // 1024 private: struct Node { alignas(128) std::atomic deqidx; alignas(128) std::atomic items[BUFFER_SIZE]; alignas(128) std::atomic enqidx; alignas(128) std::atomic next; // Start with the first entry pre-filled and enqidx at 1 Node(T* item) : deqidx{0}, enqidx{1}, next{nullptr} { items[0].store(item, std::memory_order_relaxed); for (long i = 1; i < BUFFER_SIZE; i++) { items[i].store(nullptr, std::memory_order_relaxed); } } bool casNext(Node *cmp, Node *val) { return next.compare_exchange_strong(cmp, val); } }; bool casTail(Node *cmp, Node *val) { return tail.compare_exchange_strong(cmp, val); } bool casHead(Node *cmp, Node *val) { return head.compare_exchange_strong(cmp, val); } // Pointers to head and tail of the list alignas(128) std::atomic head; alignas(128) std::atomic tail; static const int MAX_THREADS = 128; const int maxThreads; T* taken = (T*)new int(); // Muuuahahah ! // We need just one hazard pointer HazardPointers hp {1, maxThreads}; const int kHpTail = 0; const int kHpHead = 0; public: FAAArrayQueue(int maxThreads=MAX_THREADS) : maxThreads{maxThreads} { Node* sentinelNode = new Node(nullptr); sentinelNode->enqidx.store(0, std::memory_order_relaxed); head.store(sentinelNode, std::memory_order_relaxed); tail.store(sentinelNode, std::memory_order_relaxed); } ~FAAArrayQueue() { while (dequeue(0) != nullptr); // Drain the queue delete head.load(); // Delete the last node delete (int*)taken; } static std::string className() { return "FAAArrayQueue"; } void enqueue(T* item, const int tid) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); while (true) { Node* ltail = hp.protect(kHpTail, tail, tid); const int idx = ltail->enqidx.fetch_add(1); if (idx > BUFFER_SIZE-1) { // This node is full if (ltail != tail.load()) continue; Node* lnext = ltail->next.load(); if (lnext == nullptr) { Node* newNode = new Node(item); if (ltail->casNext(nullptr, newNode)) { casTail(ltail, newNode); hp.clear(tid); return; } delete newNode; } else { casTail(ltail, lnext); } continue; } T* itemnull = nullptr; if (ltail->items[idx].compare_exchange_strong(itemnull, item)) { hp.clear(tid); return; } } } T* dequeue(const int tid) { while (true) { Node* lhead = hp.protect(kHpHead, head, tid); if (lhead->deqidx.load() >= lhead->enqidx.load() && lhead->next.load() == nullptr) break; const int idx = lhead->deqidx.fetch_add(1); if (idx > BUFFER_SIZE-1) { // This node has been drained, check if there is another one Node* lnext = lhead->next.load(); if (lnext == nullptr) break; // No more nodes in the queue if (casHead(lhead, lnext)) hp.retire(lhead, tid); continue; } T* item = lhead->items[idx].load(); if (item != nullptr) { hp.clear(tid); return item; } item = lhead->items[idx].exchange(taken); if (item == nullptr) continue; hp.clear(tid); return item; } hp.clear(tid); return nullptr; } }; #endif /* _FAA_ARRAY_QUEUE_HP_H_ */ ================================================ FILE: datastructures/queues/HazardPointers.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _HAZARD_POINTERS_H_ #define _HAZARD_POINTERS_H_ #include #include #include #include template class HazardPointers { private: static const int HP_MAX_THREADS = 128; static const int HP_MAX_HPS = 128; // This is named 'K' in the HP paper static const int CLPAD = 128/sizeof(std::atomic); static const int HP_THRESHOLD_R = 0; // This is named 'R' in the HP paper static const int MAX_RETIRED = HP_MAX_THREADS*HP_MAX_HPS; // Maximum number of retired objects per thread const int maxHPs; const int maxThreads; alignas(128) std::atomic* hp[HP_MAX_THREADS]; // It's not nice that we have a lot of empty vectors, but we need padding to avoid false sharing alignas(128) std::vector retiredList[HP_MAX_THREADS*CLPAD]; std::function defdeleter = [](T* t, int tid){ delete t; }; std::function& deleter; public: HazardPointers(int maxHPs, int maxThreads) : maxHPs{maxHPs}, maxThreads{maxThreads}, deleter{defdeleter} { for (int ithread = 0; ithread < HP_MAX_THREADS; ithread++) { hp[ithread] = new std::atomic[HP_MAX_HPS]; for (int ihp = 0; ihp < HP_MAX_HPS; ihp++) { hp[ithread][ihp].store(nullptr, std::memory_order_relaxed); } } } HazardPointers(int maxHPs, int maxThreads, std::function& deleter) : maxHPs{maxHPs}, maxThreads{maxThreads}, deleter{deleter} { for (int ithread = 0; ithread < HP_MAX_THREADS; ithread++) { hp[ithread] = new std::atomic[HP_MAX_HPS]; for (int ihp = 0; ihp < HP_MAX_HPS; ihp++) { hp[ithread][ihp].store(nullptr, std::memory_order_relaxed); } } } ~HazardPointers() { for (int ithread = 0; ithread < HP_MAX_THREADS; ithread++) { delete[] hp[ithread]; // Clear the current retired nodes for (unsigned iret = 0; iret < retiredList[ithread*CLPAD].size(); iret++) { delete retiredList[ithread*CLPAD][iret]; } } } /** * Progress Condition: wait-free bounded (by maxHPs) */ void clear(const int tid) { for (int ihp = 0; ihp < maxHPs; ihp++) { hp[tid][ihp].store(nullptr, std::memory_order_release); } } /** * Progress Condition: wait-free population oblivious */ void clearOne(int ihp, const int tid) { hp[tid][ihp].store(nullptr, std::memory_order_release); } /** * Progress Condition: lock-free */ T* protect(int index, const std::atomic& atom, const int tid) { T* n = nullptr; T* ret; while ((ret = atom.load()) != n) { hp[tid][index].store(ret); n = ret; } return ret; } T* get(int index, const int tid){ return hp[tid][index].load(); } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free population oblivious */ T* protectPtr(int index, T* ptr, const int tid) { hp[tid][index].store(ptr); return ptr; } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free population oblivious */ T* protectPtrRelease(int index, T* ptr, const int tid) { hp[tid][index].store(ptr, std::memory_order_release); return ptr; } /** * Progress Condition: wait-free bounded (by the number of threads squared) */ void retire(T* ptr, const int tid) { retiredList[tid*CLPAD].push_back(ptr); if (retiredList[tid*CLPAD].size() < HP_THRESHOLD_R) return; for (unsigned iret = 0; iret < retiredList[tid*CLPAD].size();) { auto obj = retiredList[tid*CLPAD][iret]; bool canDelete = true; for (int tid = 0; tid < maxThreads && canDelete; tid++) { for (int ihp = maxHPs-1; ihp >= 0; ihp--) { if (hp[tid][ihp].load() == obj) { canDelete = false; break; } } } if (canDelete) { retiredList[tid*CLPAD].erase(retiredList[tid*CLPAD].begin() + iret); deleter(obj,tid); continue; } iret++; } } }; #endif /* _HAZARD_POINTERS_H_ */ ================================================ FILE: datastructures/queues/HazardPointersSimQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _HAZARD_POINTERS_SIM_QUEUE_H_ #define _HAZARD_POINTERS_SIM_QUEUE_H_ #include #include #include #include // TODO: use std::vector instead of arrays for the retired objects (keep the padding) template class HazardPointersSimQueue { private: static const int HP_MAX_THREADS = 128; static const int HP_MAX_HPS = 11; // This is named 'K' in the HP paper static const int CLPAD = 128/sizeof(std::atomic); static const int HP_THRESHOLD_R = 0; // This is named 'R' in the HP paper static const int MAX_RETIRED = HP_MAX_THREADS*HP_MAX_HPS; // Maximum number of retired objects per thread const int maxHPs; const int maxThreads; std::atomic hp[HP_MAX_THREADS*CLPAD][HP_MAX_HPS]; // It's not nice that we have a lot of empty vectors, but we need padding to avoid false sharing std::vector retiredList[HP_MAX_THREADS*CLPAD]; std::function findPtr; public: HazardPointersSimQueue(std::function& find, int maxHPs=HP_MAX_HPS, int maxThreads=HP_MAX_THREADS) : maxHPs{maxHPs}, maxThreads{maxThreads} { findPtr = find; for (int ithread = 0; ithread < HP_MAX_THREADS; ithread++) { for (int ihp = 0; ihp < HP_MAX_HPS; ihp++) { hp[ithread*CLPAD][ihp].store(nullptr, std::memory_order_relaxed); } } } ~HazardPointersSimQueue() { for (int ithread = 0; ithread < HP_MAX_THREADS; ithread++) { // Clear the current retired nodes for (unsigned iret = 0; iret < retiredList[ithread*CLPAD].size(); iret++) { delete retiredList[ithread*CLPAD][iret]; } } } /** * Progress Condition: wait-free bounded (by maxHPs) */ void clear(const int tid) { for (int ihp = 0; ihp < maxHPs; ihp++) { hp[tid*CLPAD][ihp].store(nullptr, std::memory_order_release); } } /** * Progress Condition: wait-free population oblivious */ void clearOne(int ihp, const int tid) { hp[tid*CLPAD][ihp].store(nullptr, std::memory_order_release); } /** * Progress Condition: lock-free */ T* protect(int index, const std::atomic& atom, const int tid) { T* n = nullptr; T* ret; while ((ret = atom.load()) != n) { hp[tid*CLPAD][index].store(ret); n = ret; } return ret; } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free population oblivious */ T* protectPtr(int index, T* ptr, const int tid) { hp[tid*CLPAD][index].store(ptr); return ptr; } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free population oblivious */ T* protectRelease(int index, T* ptr, const int tid) { hp[tid*CLPAD][index].store(ptr, std::memory_order_release); return ptr; } /** * This returns the same value that is passed as ptr, which is sometimes useful * Progress Condition: wait-free bounded (by the number of threads squared) */ void retire(T* ptr, const int tid) { retiredList[tid*CLPAD].push_back(ptr); for (unsigned iret = 0; iret < retiredList[tid*CLPAD].size();) { auto obj = retiredList[tid*CLPAD][iret]; if (findPtr(obj)) { iret++; continue; } bool canDelete = true; for (int tid = 0; tid < maxThreads && canDelete; tid++) { for (int ihp = maxHPs-1; ihp >= 0; ihp--) { if (hp[tid*CLPAD][ihp].load() == obj) { canDelete = false; break; } } } if (canDelete) { retiredList[tid*CLPAD].erase(retiredList[tid*CLPAD].begin() + iret); delete obj; continue; } iret++; } } }; #endif /* _HAZARD_POINTERS_H_ */ ================================================ FILE: datastructures/queues/LCRQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _LCRQ_QUEUE_HP_H_ #define _LCRQ_QUEUE_HP_H_ #include #include "HazardPointers.hpp" // CAS2 macro #define __CAS2(ptr, o1, o2, n1, n2) \ ({ \ char __ret; \ __typeof__(o2) __junk; \ __typeof__(*(ptr)) __old1 = (o1); \ __typeof__(o2) __old2 = (o2); \ __typeof__(*(ptr)) __new1 = (n1); \ __typeof__(o2) __new2 = (n2); \ asm volatile("lock cmpxchg16b %2;setz %1" \ : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ : "b"(__new1), "c"(__new2), \ "a"(__old1), "d"(__old2)); \ __ret; }) #define CAS2(ptr, o1, o2, n1, n2) __CAS2(ptr, o1, o2, n1, n2) #define BIT_TEST_AND_SET(ptr, b) \ ({ \ char __ret; \ asm volatile("lock btsq $63, %0; setnc %1" : "+m"(*ptr), "=a"(__ret) : : "cc"); \ __ret; \ }) /** *

LCRQ Queue

* * This is LCRQ by Adam Morrison and Yehuda Afek * http://www.cs.tau.ac.il/~mad/publications/ppopp2013-x86queues.pdf * * This implementation does NOT obey the C++ memory model rules AND it is x86 specific. * No guarantees are given on the correctness or consistency of the results if you use this queue. * * Bugs fixed: * tt was not initialized in dequeue(); * *

* enqueue algorithm: MS enqueue + LCRQ with re-usage * dequeue algorithm: MS dequeue + LCRQ with re-usage * Consistency: Linearizable * enqueue() progress: lock-free * dequeue() progress: lock-free * Memory Reclamation: Hazard Pointers (lock-free) * *

* The paper on Hazard Pointers is named "Hazard Pointers: Safe Memory * Reclamation for Lock-Free objects" and it is available here: * http://web.cecs.pdx.edu/~walpole/class/cs510/papers/11.pdf * * @author Pedro Ramalhete * @author Andreia Correia */ template class LCRQueue { private: static const int RING_POW = 10; static const uint64_t RING_SIZE = 1ull << RING_POW; struct Cell { std::atomic val; std::atomic idx; uint64_t pad[14]; } __attribute__ ((aligned (128))); struct Node { std::atomic head __attribute__ ((aligned (128))); std::atomic tail __attribute__ ((aligned (128))); std::atomic next __attribute__ ((aligned (128))); Cell array[RING_SIZE]; Node() { for (unsigned i = 0; i < RING_SIZE; i++) { array[i].val.store(nullptr, std::memory_order_relaxed); array[i].idx.store(i, std::memory_order_relaxed); } head.store(0, std::memory_order_relaxed); tail.store(0, std::memory_order_relaxed); next.store(nullptr, std::memory_order_relaxed); } }; alignas(128) std::atomic head; alignas(128) std::atomic tail; static const int MAX_THREADS = 128; const int maxThreads; HazardPointers hp {1, maxThreads}; const int kHpTail = 0; const int kHpHead = 0; /* * Private methods */ int is_empty(T* v) { return (v == nullptr); } uint64_t node_index(uint64_t i) { return (i & ~(1ull << 63)); } uint64_t set_unsafe(uint64_t i) { return (i | (1ull << 63)); } uint64_t node_unsafe(uint64_t i) { return (i & (1ull << 63)); } inline uint64_t tail_index(uint64_t t) { return (t & ~(1ull << 63)); } int crq_is_closed(uint64_t t) { return (t & (1ull << 63)) != 0; } void fixState(Node *lhead) { while (1) { uint64_t t = lhead->tail.fetch_add(0); uint64_t h = lhead->head.fetch_add(0); // TODO: is it ok or not to cast "t" to int64_t ? if (lhead->tail.load() != (int64_t)t) continue; if (h > t) { int64_t tmp = t; if (lhead->tail.compare_exchange_strong(tmp, h)) break; continue; } break; } } int close_crq(Node *rq, const uint64_t tailticket, const int tries) { if (tries < 10) { int64_t tmp = tailticket + 1; return rq->tail.compare_exchange_strong(tmp, (tailticket + 1)|(1ull<<63)); } else { return BIT_TEST_AND_SET(&rq->tail, 63); } } public: LCRQueue(int maxThreads=MAX_THREADS) : maxThreads{maxThreads} { // Shared object init Node *sentinel = new Node; head.store(sentinel, std::memory_order_relaxed); tail.store(sentinel, std::memory_order_relaxed); } ~LCRQueue() { while (dequeue(0) != nullptr); // Drain the queue delete head.load(); // Delete the last node } static std::string className() { return "LCRQueue"; } void enqueue(T* item, const int tid) { int try_close = 0; while (true) { Node* ltail = hp.protectPtr(kHpTail, tail.load(), tid); if (ltail != tail.load()) continue; Node *lnext = ltail->next.load(); if (lnext != nullptr) { // Help advance the tail tail.compare_exchange_strong(ltail, lnext); continue; } uint64_t tailticket = ltail->tail.fetch_add(1); if (crq_is_closed(tailticket)) { Node* newNode = new Node(); // Solo enqueue (superfluous?) newNode->tail.store(1, std::memory_order_relaxed); newNode->array[0].val.store(item, std::memory_order_relaxed); newNode->array[0].idx.store(0, std::memory_order_relaxed); Node* nullnode = nullptr; if (ltail->next.compare_exchange_strong(nullnode, newNode)) {// Insert new ring tail.compare_exchange_strong(ltail, newNode); // Advance the tail hp.clear(tid); return; } delete newNode; continue; } Cell* cell = <ail->array[tailticket & (RING_SIZE-1)]; uint64_t idx = cell->idx.load(); if (cell->val.load() == nullptr) { if (node_index(idx) <= tailticket) { // TODO: is the missing cast before "t" ok or not to add? if ((!node_unsafe(idx) || ltail->head.load() < (int64_t)tailticket)) { if (CAS2((void**)cell, nullptr, idx, item, tailticket)) { hp.clear(tid); return; } } } } if (((int64_t)(tailticket - ltail->head.load()) >= (int64_t)RING_SIZE) && close_crq(ltail, tailticket, ++try_close)) continue; } } T* dequeue(const int tid) { while (true) { Node* lhead = hp.protectPtr(kHpHead, head.load(), tid); if (lhead != head.load()) continue; uint64_t headticket = lhead->head.fetch_add(1); Cell* cell = &lhead->array[headticket & (RING_SIZE-1)]; int r = 0; uint64_t tt = 0; while (true) { uint64_t cell_idx = cell->idx.load(); uint64_t unsafe = node_unsafe(cell_idx); uint64_t idx = node_index(cell_idx); T* val = cell->val.load(); if (idx > headticket) break; if (val != nullptr) { if (idx == headticket) { if (CAS2((void**)cell, val, cell_idx, nullptr, unsafe | (headticket + RING_SIZE))) { hp.clear(tid); return val; } } else { if (CAS2((void**)cell, val, cell_idx, val, set_unsafe(idx))) break; } } else { if ((r & ((1ull << 10) - 1)) == 0) tt = lhead->tail.load(); // Optimization: try to bail quickly if queue is closed. int crq_closed = crq_is_closed(tt); uint64_t t = tail_index(tt); if (unsafe) { // Nothing to do, move along if (CAS2((void**)cell, val, cell_idx, val, unsafe | (headticket + RING_SIZE))) break; } else if (t < headticket + 1 || r > 200000 || crq_closed) { if (CAS2((void**)cell, val, idx, val, headticket + RING_SIZE)) { if (r > 200000 && tt > RING_SIZE) BIT_TEST_AND_SET(&lhead->tail, 63); break; } } else { ++r; } } } if (tail_index(lhead->tail.load()) <= headticket + 1) { fixState(lhead); // try to return empty Node* lnext = lhead->next.load(); if (lnext == nullptr) { hp.clear(tid); return nullptr; // Queue is empty } if (tail_index(lhead->tail) <= headticket + 1) { if (head.compare_exchange_strong(lhead, lnext)) hp.retire(lhead, tid); } } } } }; #endif /* _LCRQ_QUEUE_HP_H_ */ ================================================ FILE: datastructures/queues/MichaelScottQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _MICHAEL_SCOTT_QUEUE_HP_H_ #define _MICHAEL_SCOTT_QUEUE_HP_H_ #include #include #include "HazardPointers.hpp" /** *

Michael-Scott Queue

* * enqueue algorithm: MS enqueue * dequeue algorithm: MS dequeue * Consistency: Linearizable * enqueue() progress: lock-free * dequeue() progress: lock-free * Memory Reclamation: Hazard Pointers (lock-free) * * * Maged Michael and Michael Scott's Queue with Hazard Pointers *

* Lock-Free Linked List as described in Maged Michael and Michael Scott's paper: * {@link http://www.cs.rochester.edu/~scott/papers/1996_PODC_queues.pdf} * * Simple, Fast, and Practical Non-Blocking and Blocking Concurrent Queue Algorithms *

* The paper on Hazard Pointers is named "Hazard Pointers: Safe Memory * Reclamation for Lock-Free objects" and it is available here: * http://web.cecs.pdx.edu/~walpole/class/cs510/papers/11.pdf * */ template class MichaelScottQueue { private: struct Node { T* item; std::atomic next; Node(T* userItem) : item{userItem}, next{nullptr} { } bool casNext(Node *cmp, Node *val) { return next.compare_exchange_strong(cmp, val); } }; bool casTail(Node *cmp, Node *val) { return tail.compare_exchange_strong(cmp, val); } bool casHead(Node *cmp, Node *val) { return head.compare_exchange_strong(cmp, val); } // Pointers to head and tail of the list alignas(128) std::atomic head; alignas(128) std::atomic tail; static const int MAX_THREADS = 128; const int maxThreads; // We need two hazard pointers for dequeue() HazardPointers hp {2, maxThreads}; const int kHpTail = 0; const int kHpHead = 0; const int kHpNext = 1; public: MichaelScottQueue(int maxThreads=MAX_THREADS) : maxThreads{maxThreads} { Node* sentinelNode = new Node(nullptr); head.store(sentinelNode, std::memory_order_relaxed); tail.store(sentinelNode, std::memory_order_relaxed); } ~MichaelScottQueue() { while (dequeue(0) != nullptr); // Drain the queue delete head.load(); // Delete the last node } static std::string className() { return "MichaelScottQueue"; } void enqueue(T* item, const int tid) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); Node* newNode = new Node(item); while (true) { Node* ltail = hp.protectPtr(kHpTail, tail, tid); if (ltail == tail.load()) { Node* lnext = ltail->next.load(); if (lnext == nullptr) { // It seems this is the last node, so add the newNode here // and try to move the tail to the newNode if (ltail->casNext(nullptr, newNode)) { casTail(ltail, newNode); hp.clear(tid); return; } } else { casTail(ltail, lnext); } } } } T* dequeue(const int tid) { Node* node = hp.protect(kHpHead, head, tid); while (node != tail.load()) { Node* lnext = hp.protect(kHpNext, node->next, tid); if (casHead(node, lnext)) { T* item = lnext->item; // Another thread may clean up lnext after we do hp.clear() hp.clear(tid); hp.retire(node, tid); return item; } node = hp.protect(kHpHead, head, tid); } hp.clear(tid); return nullptr; // Queue is empty } }; #endif /* _MICHAEL_SCOTT_QUEUE_HP_H_ */ ================================================ FILE: datastructures/queues/OFLFArrayLinkedListQueue.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _OF_LF_ARRAY_LINKED_LIST_QUEUE_H_ #define _OF_LF_ARRAY_LINKED_LIST_QUEUE_H_ #include #include #include "stms/OneFileLF.hpp" /** *

An Array Linked List Queue using OneFile STM (Lock-Free)

* * TODO * * * enqueue algorithm: sequential implementation + MWC * dequeue algorithm: sequential implementation + MWC * Consistency: Linearizable * enqueue() progress: lock-free * dequeue() progress: lock-free * Memory Reclamation: Hazard Eras (integrated into MWC) * enqueue min ops: 2 DCAS + 1 CAS * dequeue min ops: 1 DCAS + 1 CAS */ template class OFLFArrayLinkedListQueue : public oflf::tmbase { private: /* struct cell { onefilelf::tmtype val; } __attribute__ ((aligned (128))); */ struct Node : oflf::tmbase { static const int ITEM_NUM = 1024; // TODO: use a larger ring buffer size here, 1024 for example oflf::tmtype headidx {0}; //cell items[ITEM_NUM]; oflf::tmtype items[ITEM_NUM]; oflf::tmtype tailidx {0}; oflf::tmtype next {nullptr}; Node(T* item) { items[0] = item; tailidx = 1; headidx = 0; for (int i = 1; i < ITEM_NUM; i++) items[i] = nullptr; } }; oflf::tmtype head {nullptr}; oflf::tmtype tail {nullptr}; public: OFLFArrayLinkedListQueue(unsigned int maxThreads=0) { Node* sentinelNode = new Node(nullptr); sentinelNode->tailidx = 0; head = sentinelNode; tail = sentinelNode; } ~OFLFArrayLinkedListQueue() { while (dequeue(0) != nullptr); // Drain the queue Node* lhead = head; delete lhead; } static std::string className() { return "OF-LF-ArrayLinkedListQueue"; } /* * Progress Condition: lock-free * Always returns true */ bool enqueue(T* item, const int tid=0) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); return oflf::updateTx([this,item] () -> bool { Node* ltail = tail; uint64_t ltailidx = ltail->tailidx; if (ltailidx < Node::ITEM_NUM) { ltail->items[ltailidx] = item; ++ltail->tailidx; return true; } Node* newNode = oflf::tmNew(item); tail->next = newNode; tail = newNode; return true; }); } /* * Progress Condition: lock-free */ T* dequeue(const int tid=0) { return oflf::updateTx([this] () -> T* { Node* lhead = head; uint64_t lheadidx = lhead->headidx; // Check if queue is empty if (lhead == tail && lheadidx == tail->tailidx) return nullptr; if (lheadidx < Node::ITEM_NUM) { ++lhead->headidx; return lhead->items[lheadidx]; } lhead = lhead->next; oflf::tmDelete(head); head = lhead; ++lhead->headidx; return lhead->items[0]; }); } }; #endif /* _OF_LF_ARRAY_LINKED_LIST_QUEUE_H_ */ ================================================ FILE: datastructures/queues/OFLFArrayQueue.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _OFLF_STM_ARRAY_QUEUE_H_ #define _OFLF_STM_ARRAY_QUEUE_H_ #include #include #include "stms/OneFileLF.hpp" /** *

An Array Queue

* */ template class OFLFArrayQueue : public oflf::tmbase { private: static const int MAX_ITEMS = 2048; oflf::tmtype headidx {0}; oflf::tmtype items[MAX_ITEMS]; oflf::tmtype tailidx {0}; public: OFLFArrayQueue(unsigned int maxThreads=0) { oflf::updateTx([this] () { for (int i = 0; i < MAX_ITEMS; i++) items[i] = nullptr; return true; }); } ~OFLFArrayQueue() { } static std::string className() { return "OF-LF-ArrayQueue"; } /* * Progress Condition: blocking * Always returns true */ bool enqueue(T* item, const int tid=0) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); return oflf::updateTx([this,item] () -> bool { if (tailidx >= headidx+MAX_ITEMS) return false; // queue is full items[tailidx % MAX_ITEMS] = item; ++tailidx; return true; }); } /* * Progress Condition: blocking */ T* dequeue(const int tid=0) { return oflf::updateTx([this] () -> T* { if (tailidx == headidx) return nullptr; // queue is empty T* item = items[headidx % MAX_ITEMS]; ++headidx; return item; }); } }; #endif /* _OF_LF_STM_ARRAY_QUEUE_H_ */ ================================================ FILE: datastructures/queues/OFLFLinkedListQueue.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _OF_LF_LINKED_LIST_QUEUE_H_ #define _OF_LF_LINKED_LIST_QUEUE_H_ #include #include #include "stms/OneFileLF.hpp" /** *

A Linked List queue using OneFile STM (Lock-Free)

* * enqueue algorithm: sequential implementation + OFLF * dequeue algorithm: sequential implementation + OFLF * Consistency: Linearizable * enqueue() progress: lock-free * dequeue() progress: lock-free * Memory Reclamation: lock-free Hazard Eras (integrated into OFLF) * enqueue min ops: 2 DCAS + 1 CAS * dequeue min ops: 1 DCAS + 1 CAS */ template class OFLFLinkedListQueue : public oflf::tmbase { private: struct Node : oflf::tmbase { T* item; oflf::tmtype next {nullptr}; Node(T* userItem) : item{userItem} { } }; oflf::tmtype head {nullptr}; oflf::tmtype tail {nullptr}; public: OFLFLinkedListQueue(unsigned int maxThreads=0) { Node* sentinelNode = oflf::tmNew(nullptr); head = sentinelNode; tail = sentinelNode; } ~OFLFLinkedListQueue() { while (dequeue() != nullptr); // Drain the queue Node* lhead = head; oflf::tmDelete(lhead); } static std::string className() { return "OF-LF-LinkedListQueue"; } /* * Progress Condition: lock-free * Always returns true */ bool enqueue(T* item, const int tid=0) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); Node* newNode = oflf::tmNew(item); // Let's allocate outside the transaction, less overhead return oflf::updateTx([this,newNode] () -> bool { tail->next = newNode; tail = newNode; return true; }); } /* * Progress Condition: lock-free */ T* dequeue(const int tid=0) { return oflf::updateTx([this] () -> T* { Node* lhead = head; if (lhead == tail) return nullptr; head = lhead->next; oflf::tmDelete(lhead); return head->item; }); } }; #endif /* _OF_LF_LINKED_LIST_QUEUE_H_ */ ================================================ FILE: datastructures/queues/OFWFArrayLinkedListQueue.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _OF_WF_ARRAY_LINKED_LIST_QUEUE_H_ #define _OF_WF_ARRAY_LINKED_LIST_QUEUE_H_ #include #include #include "stms/OneFileWF.hpp" /** *

An Array Linked List Queue using OneFile STM (Wait-Free)

* * TODO * * * enqueue algorithm: sequential implementation + MWC * dequeue algorithm: sequential implementation + MWC * Consistency: Linearizable * enqueue() progress: lock-free * dequeue() progress: lock-free * Memory Reclamation: Hazard Eras (integrated into MWC) * enqueue min ops: 2 DCAS + 1 CAS * dequeue min ops: 1 DCAS + 1 CAS */ template class OFWFArrayLinkedListQueue : public ofwf::tmbase { private: struct Node : ofwf::tmbase { static const int ITEM_NUM = 1024; ofwf::tmtype headidx {0}; ofwf::tmtype items[ITEM_NUM]; ofwf::tmtype tailidx {0}; ofwf::tmtype next {nullptr}; Node(T* item) { items[0] = item; tailidx = 1; headidx = 0; for (int i = 1; i < ITEM_NUM; i++) items[i] = nullptr; } }; ofwf::tmtype head {nullptr}; ofwf::tmtype tail {nullptr}; public: OFWFArrayLinkedListQueue(unsigned int maxThreads=0) { Node* sentinelNode = new Node(nullptr); sentinelNode->tailidx = 0; head = sentinelNode; tail = sentinelNode; } ~OFWFArrayLinkedListQueue() { while (dequeue(0) != nullptr); // Drain the queue Node* lhead = head; delete lhead; } static std::string className() { return "OF-WF-ArrayLinkedListQueue"; } /* * Progress Condition: lock-free * Always returns true */ bool enqueue(T* item, const int tid=0) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); return ofwf::updateTx([this,item] () -> bool { Node* ltail = tail; uint64_t ltailidx = ltail->tailidx; if (ltailidx < Node::ITEM_NUM) { ltail->items[ltailidx] = item; ++ltail->tailidx; return true; } Node* newNode = ofwf::tmNew(item); tail->next = newNode; tail = newNode; return true; }); } /* * Progress Condition: lock-free */ T* dequeue(const int tid=0) { return ofwf::updateTx([this] () -> T* { Node* lhead = head; uint64_t lheadidx = lhead->headidx; // Check if queue is empty if (lhead == tail && lheadidx == tail->tailidx) return nullptr; if (lheadidx < Node::ITEM_NUM) { ++lhead->headidx; return lhead->items[lheadidx]; } lhead = lhead->next; ofwf::tmDelete(head); head = lhead; ++lhead->headidx; return lhead->items[0]; }); } }; #endif /* _OF_WF_ARRAY_LINKED_LIST_QUEUE_H_ */ ================================================ FILE: datastructures/queues/OFWFLinkedListQueue.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _OF_WF_LINKED_LIST_QUEUE_H_ #define _OF_WF_LINKED_LIST_QUEUE_H_ #include #include #include "stms/OneFileWF.hpp" /** *

A Linked List queue using OneFile STM (Wait-Free)

* * enqueue algorithm: sequential implementation + OFWF * dequeue algorithm: sequential implementation + OFWF * Consistency: Linearizable * enqueue() progress: wait-free * dequeue() progress: wait-free * Memory Reclamation: wait-free Hazard Eras (integrated into OFWF) * enqueue min ops: 3 DCAS + 1 CAS * dequeue min ops: 2 DCAS + 1 CAS */ template class OFWFLinkedListQueue : public ofwf::tmbase { private: struct Node : ofwf::tmbase { T* item; ofwf::tmtype next; Node(T* userItem) : item{userItem}, next{nullptr} { } }; ofwf::tmtype head {nullptr}; ofwf::tmtype tail {nullptr}; public: OFWFLinkedListQueue(unsigned int maxThreads=0) { Node* sentinelNode = ofwf::tmNew(nullptr); head = sentinelNode; tail = sentinelNode; } ~OFWFLinkedListQueue() { while (dequeue() != nullptr); // Drain the queue Node* lhead = head; ofwf::tmDelete(lhead); } static std::string className() { return "OF-WF-LinkedListQueue"; } /* * Progress Condition: wait-free bounded * Always returns true */ bool enqueue(T* item, const int tid=0) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); Node* newNode = ofwf::tmNew(item); // Let's allocate outside the transaction, less overhead return ofwf::updateTx([this,newNode] () -> bool { tail->next = newNode; tail = newNode; return true; }); } /* * Progress Condition: wait-free bounded */ T* dequeue(const int tid=0) { return (T*)ofwf::updateTx([this] () -> T* { Node* lhead = head; if (lhead == tail) return nullptr; head = lhead->next; ofwf::tmDelete(lhead); return head->item; }); } }; #endif /* _OF_WF_LINKED_LIST_QUEUE_H_ */ ================================================ FILE: datastructures/queues/README.md ================================================ # Queues # This folder contains multiple multi-producer-multi-consumer queue implementations, all of them with integrated memory reclamation having the same progress condition: - FAAArrayQueue: Memory unbounded, lock-free, one array per node, hazard pointers http://... - LCRQueue: Memory unbounded, lock-free, one array per node, hazard pointers, can re-use entries in some situations http:// - OFLFLinkedListqueue: Memory unbounded, lock-free, one entry per node, hazard eras Uses OneFile STM (Lock-Free) - OFWFLinkedListqueue: Memory unbounded, wait-free bounded, one entry per node, hazard eras Uses OneFile STM (Wait-Free) - SimQueue: Memory unbounded, wait-free bounded, one entry per node, modified hazard pointers http:// - TurnQueue: Memory unbounded, wait-free bounded, one entry per node, hazard pointers http:// ================================================ FILE: datastructures/queues/SimQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _CR_SIM_QUEUE_HP_H_ #define _CR_SIM_QUEUE_HP_H_ #include #include #include "HazardPointersSimQueue.hpp" /** *

Sim Queue

* * Based on the SimQueue (FK queue) * * http://thalis.cs.uoi.gr/tech_reports/publications/TR2011-01.pdf * * enqueue algorithm: P-Sim * dequeue algorithm: P-Sim * Consistency: Linearizable * enqueue() progress: wait-free bounded O(N_threads) * dequeue() progress: wait-free bounded O(N_threads) * Memory Reclamation: Hazard Pointers with custom scanner for Nodes. EnqState and DeqState re-usage. * *

* The paper on Hazard Pointers is named "Hazard Pointers: Safe Memory * Reclamation for Lock-Free objects" and it is available here: * http://web.cecs.pdx.edu/~walpole/class/cs510/papers/11.pdf * */ template class SimQueue { private: static const int MAX_THREADS = 128; struct Node { T* item; std::atomic next {nullptr}; Node(T* item) : item{item} { } }; struct EnqState { std::atomic tail {nullptr}; // link_a std::atomic nextNode {nullptr}; // link_b std::atomic nextTail {nullptr}; // ptr std::atomic applied[MAX_THREADS]; EnqState() { for(int i=0; i < MAX_THREADS; i++){ applied[i].store(false, std::memory_order_relaxed); } } }; struct DeqState { std::atomic head {nullptr}; std::atomic items[MAX_THREADS]; std::atomic applied[MAX_THREADS]; DeqState() { for(int i=0; i < MAX_THREADS; i++){ applied[i].store(false, std::memory_order_relaxed); items[i].store(nullptr, std::memory_order_relaxed); } } }; typedef union pointer_t { struct StructData{ int64_t seq : 48; int64_t index: 16; } u; // struct_data int64_t raw; // raw_data } pointer_t; const int maxThreads; alignas(128) std::atomic enqPointer; alignas(128) std::atomic deqPointer; // Enqueue requests alignas(128) std::atomic items[MAX_THREADS]; // Always access relaxed alignas(128) std::atomic enqueuers[MAX_THREADS]; // Re-usable EnqState instances alignas(128) EnqState enqReused[MAX_THREADS*2]; // Dequeue requests alignas(128) std::atomic dequeuers[MAX_THREADS]; // Re-usable DeqState instances alignas(128) DeqState deqReused[MAX_THREADS*2]; alignas(128) Node* pool[MAX_THREADS][MAX_THREADS]; // Passed to Hazard Pointers std::function find = [this](Node* ptr) { pointer_t lpointer = enqPointer.load(); if (enqReused[lpointer.u.index].tail.load() == ptr) return true; /* lpointer = deqPointer.load(); if (deqReused[lpointer.u.index].head.load() == ptr) return true; */ return false; }; HazardPointersSimQueue hp {find, 1, maxThreads}; const int kHpTail = 0; const int kHpNode = 0; Node* sentinel = new Node(nullptr); public: SimQueue(int maxThreads=MAX_THREADS) : maxThreads(maxThreads) { for (int i = 0; i < maxThreads; i++) { enqueuers[i].store(false, std::memory_order_relaxed); dequeuers[i].store(false, std::memory_order_relaxed); for(int j=0;jtail.load(), tid); Node* lnext = lstate->nextNode.load(); // No need for HP because we don't dereference it Node* lnextTail = lstate->nextTail.load(); // No need for HP if (lpointer.raw != enqPointer.load().raw) continue; // Advance the tail if needed if (ltail->next.load() != lnext) { ltail->next.store(lnext, std::memory_order_release); } // Check if my request has been done if (lstate->applied[tid].load() == newrequest) { if (lpointer.raw == enqPointer.load().raw) break; } // Help opened enqueue requests, starting from zero Node* first = nullptr; Node* node = nullptr; const int myIndex = (lpointer.u.index == 2*tid) ? 2*tid+1 : 2*tid ; EnqState* const myState = &enqReused[myIndex]; int numNodes = 0; for (int j = 0; j < maxThreads; j++) { // Check if it is an open request const bool enqj = enqueuers[j].load(); myState->applied[j].store(enqj, std::memory_order_relaxed); if (enqj == lstate->applied[j].load()) continue; Node* prev = node; node = pool[tid][numNodes++]; node->item = items[j].load(std::memory_order_relaxed); if (first == nullptr) { first = node; } else { prev->next.store(node, std::memory_order_relaxed); } if (lpointer.raw != enqPointer.load().raw) break; } // Try to apply the new sublist if (lpointer.raw != enqPointer.load().raw) continue; node->next.store(nullptr, std::memory_order_relaxed); myState->tail.store(lnextTail, std::memory_order_relaxed); myState->nextNode.store(first, std::memory_order_relaxed); myState->nextTail.store(node, std::memory_order_relaxed); pointer_t myPointer; myPointer.u.seq = lpointer.u.seq + 1; myPointer.u.index = myIndex; if (enqPointer.compare_exchange_strong(lpointer, myPointer)) { for (int k = 0; k < numNodes; k++) { // Refill pool pool[tid][k] = new Node(nullptr); } } } hp.clear(tid); } /** * Progress condition: wait-free bounded * * We use just one HP index, but it was though to get there. */ T* dequeue(const int tid) { // Publish dequeue request bool newrequest = !dequeuers[tid].load(std::memory_order_relaxed); dequeuers[tid].store(newrequest); for (int iter = 0; iter < 2; iter++) { pointer_t lpointer = deqPointer.load(); DeqState* lstate = &deqReused[lpointer.u.index]; // Check if my request has been done if (lstate->applied[tid].load() == newrequest) { if (lpointer.raw == deqPointer.load().raw) break; } // Help opened dequeue requests, starting from turn+1 Node* newHead = hp.protectPtr(kHpNode, lstate->head, tid); if (lpointer.raw != deqPointer.load().raw) continue; const int myIndex = (lpointer.u.index == 2*tid) ? 2*tid+1 : 2*tid ; DeqState* const myState = &deqReused[myIndex]; Node* node = newHead; for (int j = 0; j < maxThreads; j++) { // Check if it is an open request const bool applied = lstate->applied[j].load(); if (dequeuers[j].load() == applied) { myState->items[j].store(lstate->items[j], std::memory_order_relaxed); myState->applied[j].store(applied, std::memory_order_relaxed); continue; } myState->applied[j].store(!applied, std::memory_order_relaxed); if (node->next.load() == nullptr) { myState->items[j].store(nullptr,std::memory_order_relaxed); } else { node = hp.protectPtr(kHpNode, node->next, tid); if (lpointer.raw != deqPointer.load().raw) break; myState->items[j].store(node->item, std::memory_order_relaxed); newHead = node; } } if (lpointer.raw != deqPointer.load().raw) continue; pointer_t newDeqIndex; newDeqIndex.u.seq = lpointer.u.seq + 1; newDeqIndex.u.index = myIndex; myState->head.store(newHead, std::memory_order_relaxed); node = lstate->head; if (deqPointer.compare_exchange_strong(lpointer, newDeqIndex)) { while (node != newHead) { Node* next = node->next.load(); hp.retire(node,tid); node = next; } break; } } hp.clear(tid); return deqReused[deqPointer.load().u.index].items[tid].load(); } }; #endif /* _SIM_QUEUE_HP_H_ */ ================================================ FILE: datastructures/queues/TinySTMArrayLinkedListQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2017, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _TINY_STM_ARRAY_LINKED_LIST_QUEUE_H_ #define _TINY_STM_ARRAY_LINKED_LIST_QUEUE_H_ #include #include #include "stms/TinySTM.hpp" /** *

An Array Linked List Queue using Tiny STM

*/ template class TinySTMArrayLinkedListQueue { private: struct Node : tinystm::tmbase { static const int ITEM_NUM = 1024; tinystm::tmtype headidx {0}; tinystm::tmtype items[ITEM_NUM]; tinystm::tmtype tailidx {0}; tinystm::tmtype next {nullptr}; Node(T* item) { items[0] = item; tailidx = 1; headidx = 0; for (int i = 1; i < ITEM_NUM; i++) items[i] = nullptr; } }; tinystm::tmtype head {nullptr}; tinystm::tmtype tail {nullptr}; public: TinySTMArrayLinkedListQueue(unsigned int maxThreads=0) { tinystm::updateTx([this] () { Node* sentinelNode = tinystm::tmNew(nullptr); sentinelNode->tailidx = 0; head = sentinelNode; tail = sentinelNode; return true; }); } ~TinySTMArrayLinkedListQueue() { while (dequeue() != nullptr); // Drain the queue tinystm::updateTx([this] () { Node* lhead = head; tinystm::tmDelete(lhead); return true; }); } static std::string className() { return "TinySTM-ArrayLinkedListQueue"; } /* * Progress Condition: blocking * Always returns true */ bool enqueue(T* item, const int tid=0) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); return tinystm::updateTx([this,item] () -> bool { Node* ltail = tail; uint64_t ltailidx = ltail->tailidx; if (ltailidx < Node::ITEM_NUM) { ltail->items[ltailidx] = item; ++ltail->tailidx; return true; } Node* newNode = tinystm::tmNew(item); tail->next = newNode; tail = newNode; return true; }); } /* * Progress Condition: blocking */ T* dequeue(const int tid=0) { return tinystm::updateTx([this] () -> T* { Node* lhead = head; uint64_t lheadidx = lhead->headidx; // Check if queue is empty if (lhead == tail && lheadidx == tail->tailidx) return nullptr; if (lheadidx < Node::ITEM_NUM) { ++lhead->headidx; return lhead->items[lheadidx]; } lhead = lhead->next; tinystm::tmDelete(head.load()); head = lhead; ++lhead->headidx; return lhead->items[0]; }); } }; #endif /* _TINY_STM_ARRAY_LINKED_LIST_QUEUE_H_ */ ================================================ FILE: datastructures/queues/TinySTMLinkedListQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2018, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _TINY_STM_LINKED_LIST_QUEUE_H_ #define _TINY_STM_LINKED_LIST_QUEUE_H_ #include #include #include "stms/TinySTM.hpp" /** *

A Linked List queue using Tiny STM

*/ template class TinySTMLinkedListQueue : public tinystm::tmbase { private: struct Node : tinystm::tmbase { T* item; tinystm::tmtype next {nullptr}; Node(T* userItem) : item{userItem} { } }; tinystm::tmtype head {nullptr}; tinystm::tmtype tail {nullptr}; public: TinySTMLinkedListQueue(unsigned int maxThreads=0) { tinystm::updateTx([this] () { Node* sentinelNode = tinystm::tmNew(nullptr); head = sentinelNode; tail = sentinelNode; return true; }); } ~TinySTMLinkedListQueue() { while (dequeue() != nullptr); // Drain the queue tinystm::updateTx([this] () { Node* lhead = head; tinystm::tmDelete(lhead); return true; }); } static std::string className() { return "TinySTM-LinkedListQueue"; } /* * Progress Condition: blocking * Always returns true */ bool enqueue(T* item, const int tid=0) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); return tinystm::updateTx([this,item] () -> bool { Node* newNode = tinystm::tmNew(item); tail->next = newNode; tail = newNode; return true; }); } /* * Progress Condition: blocking */ T* dequeue(const int tid=0) { return tinystm::updateTx([this] () -> T* { Node* lhead = head; if (lhead == tail) return nullptr; head = lhead->next; tinystm::tmDelete(lhead); return head->item; }); } }; #endif /* _TINY_STM_LINKED_LIST_QUEUE_H_ */ ================================================ FILE: datastructures/queues/TurnQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _TURN_QUEUE_HP_H_ #define _TURN_QUEUE_HP_H_ #include #include #include "HazardPointers.hpp" /** *

Turn Queue

* * A concurrent wait-free queue that is Multi-Producer-Multi-Consumer and does * its own wait-free memory reclamation. * Based on the paper "A Wait-Free Queue with Wait-Free Memory Reclamation" * https://github.com/pramalhe/ConcurrencyFreaks/tree/master/papers/crturnqueue-2016.pdf * *

* Enqueue algorithm: CR Turn enqueue * Dequeue algorithm: CR Turn dequeue * Consistency: Linearizable * enqueue() progress: wait-free bounded O(N_threads) * dequeue() progress: wait-free bounded O(N_threads) * Memory Reclamation: Hazard Pointers (wait-free) * *

* The paper on Hazard Pointers is named "Hazard Pointers: Safe Memory * Reclamation for Lock-Free objects" and it is available here: * http://web.cecs.pdx.edu/~walpole/class/cs510/papers/11.pdf * * @author Andreia Correia * @author Pedro Ramalhete */ template class TurnQueue { private: struct Node { T* item; const int enqTid; std::atomic deqTid; std::atomic next; Node(T* item, int tid) : item{item}, enqTid{tid}, deqTid{IDX_NONE}, next{nullptr} { } bool casDeqTid(int cmp, int val) { return deqTid.compare_exchange_strong(cmp, val); } }; static const int IDX_NONE = -1; static const int MAX_THREADS = 128; const int maxThreads; // Pointers to head and tail of the list alignas(128) std::atomic head; alignas(128) std::atomic tail; // Enqueue requests alignas(128) std::atomic enqueuers[MAX_THREADS]; // Dequeue requests alignas(128) std::atomic deqself[MAX_THREADS]; alignas(128) std::atomic deqhelp[MAX_THREADS]; HazardPointers hp {3, maxThreads}; // We need three hazard pointers const int kHpTail = 0; const int kHpHead = 0; const int kHpNext = 1; const int kHpDeq = 2; Node* sentinelNode = new Node(nullptr, 0); /** * Called only from dequeue() * * Search for the next request to dequeue and assign it to lnext.deqTid * It is only a request to dequeue if deqself[i] equals deqhelp[i]. */ inline int searchNext(Node* lhead, Node* lnext) { const int turn = lhead->deqTid.load(); for (int idx=turn+1; idx < turn+maxThreads+1; idx++) { const int idDeq = idx%maxThreads; if (deqself[idDeq].load() != deqhelp[idDeq].load()) continue; if (lnext->deqTid.load() == IDX_NONE) lnext->casDeqTid(IDX_NONE, idDeq); break; } return lnext->deqTid.load(); } /** * Called only from dequeue() * * If the ldeqTid is not our own, we must use an HP to protect against * deqhelp[ldeqTid] being retired-deleted-newed-reenqueued. */ inline void casDeqAndHead(Node* lhead, Node* lnext, const int tid) { const int ldeqTid = lnext->deqTid.load(); if (ldeqTid == tid) { deqhelp[ldeqTid].store(lnext, std::memory_order_release); } else { Node* ldeqhelp = hp.protectPtr(kHpDeq, deqhelp[ldeqTid].load(), tid); if (ldeqhelp != lnext && lhead == head.load()) { deqhelp[ldeqTid].compare_exchange_strong(ldeqhelp, lnext); // Assign next to request } } head.compare_exchange_strong(lhead, lnext); } /** * Called only from dequeue() * * Giveup procedure, for when there are no nodes left to dequeue */ inline void giveUp(Node* myReq, const int tid) { Node* lhead = head.load(); if (deqhelp[tid].load() != myReq || lhead == tail.load()) return; hp.protectPtr(kHpHead, lhead, tid); if (lhead != head.load()) return; Node* lnext = hp.protectPtr(kHpNext, lhead->next.load(), tid); if (lhead != head.load()) return; if (searchNext(lhead, lnext) == IDX_NONE) lnext->casDeqTid(IDX_NONE, tid); casDeqAndHead(lhead, lnext, tid); } public: TurnQueue(int maxThreads=MAX_THREADS) : maxThreads(maxThreads) { head.store(sentinelNode, std::memory_order_relaxed); tail.store(sentinelNode, std::memory_order_relaxed); for (int i = 0; i < maxThreads; i++) { enqueuers[i].store(nullptr, std::memory_order_relaxed); // deqself[i] != deqhelp[i] means that isRequest=false deqself[i].store(new Node(nullptr, 0), std::memory_order_relaxed); deqhelp[i].store(new Node(nullptr, 0), std::memory_order_relaxed); } } ~TurnQueue() { delete sentinelNode; while (dequeue(0) != nullptr); // Drain the queue for (int i=0; i < maxThreads; i++) delete deqself[i].load(); for (int i=0; i < maxThreads; i++) delete deqhelp[i].load(); } static std::string className() { return "TurnQueue"; } /** * Steps when uncontended: * 1. Add node to enqueuers[] * 2. Insert node in tail.next using a CAS * 3. Advance tail to tail.next * 4. Remove node from enqueuers[] * * @param tid The tid must be a UNIQUE index for each thread, in the range 0 to maxThreads-1 */ void enqueue(T* item, const int tid) { if (item == nullptr) throw std::invalid_argument("item can not be nullptr"); Node* myNode = new Node(item,tid); enqueuers[tid].store(myNode); for (int i = 0; i < maxThreads; i++) { if (enqueuers[tid].load() == nullptr) { hp.clear(tid); return; // Some thread did all the steps } Node* ltail = hp.protectPtr(kHpTail, tail.load(), tid); if (ltail != tail.load()) continue; // If the tail advanced maxThreads times, then my node has been enqueued if (enqueuers[ltail->enqTid].load() == ltail) { // Help a thread do step 4 Node* tmp = ltail; enqueuers[ltail->enqTid].compare_exchange_strong(tmp, nullptr); } for (int j = 1; j < maxThreads+1; j++) { // Help a thread do step 2 Node* nodeToHelp = enqueuers[(j + ltail->enqTid) % maxThreads].load(); if (nodeToHelp == nullptr) continue; Node* nodenull = nullptr; ltail->next.compare_exchange_strong(nodenull, nodeToHelp); break; } Node* lnext = ltail->next.load(); if (lnext != nullptr) tail.compare_exchange_strong(ltail, lnext); // Help a thread do step 3 } enqueuers[tid].store(nullptr, std::memory_order_release); // Do step 4, just in case it's not done hp.clear(tid); } /** * Steps when uncontended: * 1. Publish request to dequeue in dequeuers[tid]; * 2. CAS node->deqTid from IDX_START to tid; * 3. Set dequeuers[tid] to the newly owned node; * 4. Advance the head with casHead(); * * We must protect either head or tail with HP before doing the check for * empty queue, otherwise we may get into retired-deleted-newed-reenqueued. * * @param tid: The tid must be a UNIQUE index for each thread, in the range 0 to maxThreads-1 */ T* dequeue(const int tid) { Node* prReq = deqself[tid].load(); // Previous request Node* myReq = deqhelp[tid].load(); deqself[tid].store(myReq); // Step 1 for (int i=0; i < maxThreads; i++) { if (deqhelp[tid].load() != myReq) break; // No need for HP Node* lhead = hp.protectPtr(kHpHead, head.load(), tid); if (lhead != head.load()) continue; if (lhead == tail.load()) { // Give up deqself[tid].store(prReq); // Rollback request to dequeue giveUp(myReq, tid); if (deqhelp[tid].load() != myReq) { deqself[tid].store(myReq, std::memory_order_relaxed); break; } hp.clear(tid); return nullptr; } Node* lnext = hp.protectPtr(kHpNext, lhead->next.load(), tid); if (lhead != head.load()) continue; if (searchNext(lhead, lnext) != IDX_NONE) casDeqAndHead(lhead, lnext, tid); } Node* myNode = deqhelp[tid].load(); Node* lhead = hp.protectPtr(kHpHead, head.load(), tid); // Do step 4 if needed if (lhead == head.load() && myNode == lhead->next.load()) head.compare_exchange_strong(lhead, myNode); hp.clear(tid); hp.retire(prReq, tid); return myNode->item; } }; #endif /* _CR_TURN_QUEUE_HP_H_ */ ================================================ FILE: datastructures/sequential/HashSet.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2018, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _UC_HASH_SET_H_ #define _UC_HASH_SET_H_ #include #include #include // TODO: change CKey* to CKey& // This is a wrapper to std::set, which should be a Red-Black tree template class HashSet { private: std::unordered_set set; public: static std::string className() { return "HashSet"; } bool add(CKey key) { if (set.find(key) == set.end()) { set.insert(key); // TODO: can we improve this so we don't have to lookup twice? return true; } return false; } bool remove(CKey key) { auto iter = set.find(key); if (iter == set.end()) return false; set.erase(iter); return true; } bool contains(CKey key) { if (set.find(key) == set.end()) return false; return true; // TODO: optimize this } bool iterateAll(std::function itfun) { for (auto it = set.begin(); it != set.end(); ++it) { CKey key = *it; if (!itfun(&key)) return false; } return true; } }; #endif /* _UC_HASH_SET_H_ */ ================================================ FILE: datastructures/sequential/LinkedListQueue.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2017, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _SEQUENTIAL_LINKED_LIST_QUEUE_H_ #define _SEQUENTIAL_LINKED_LIST_QUEUE_H_ /** *

A sequential implementation of Linked List Queue

* * This is meant to be used by the Universal Constructs * * @author Pedro Ramalhete * @author Andreia Correia */ template class LinkedListQueue { private: struct Node { T* item; Node* next {nullptr}; Node(T* userItem) : item{userItem} { } }; Node* head {nullptr}; Node* tail {nullptr}; public: LinkedListQueue(unsigned int maxThreads=0) { Node* sentinelNode = new Node(nullptr); head = sentinelNode; tail = sentinelNode; } // Universal Constructs need a copy constructor on the underlying data structure LinkedListQueue(const LinkedListQueue& other) { head = new Node(nullptr); Node* node = head; Node* onode = other.head->next; while (onode != nullptr) { node->next = new Node(onode->item); node = node->next; onode = onode->next; } tail = node; } ~LinkedListQueue() { while (dequeue(0) != nullptr); // Drain the queue Node* lhead = head; delete lhead; } static std::string className() { return "LinkedListQueue"; } bool enqueue(T* item, const int tid=0) { if (item == nullptr) return false; Node* newNode = new Node(item); tail->next = newNode; tail = newNode; return true; } T* dequeue(const int tid=0) { Node* lhead = head; if (lhead == tail) return nullptr; head = lhead->next; delete lhead; return head->item; } }; #endif /* _SEQUENTIAL_LINKED_LIST_QUEUE_H_ */ ================================================ FILE: datastructures/sequential/LinkedListSet.hpp ================================================ #ifndef _SEQUENTIAL_LINKED_LIST_SET_H_ #define _SEQUENTIAL_LINKED_LIST_SET_H_ #include /** *

A sequential implementation of La inked List Set

* * This is meant to be used by the Universal Constructs * */ template class LinkedListSet { private: struct Node { K key; Node* next{nullptr}; Node(const K& key) : key{key}, next{nullptr} { } Node(){ } }; Node* head {nullptr}; Node* tail {nullptr}; public: LinkedListSet() { Node* lhead = new Node(); Node* ltail = new Node(); head = lhead; head->next = ltail; tail = ltail; } // Universal Constructs need a copy constructor on the underlying data structure LinkedListSet(const LinkedListSet& other) { head = new Node(); Node* node = head; Node* onode = other.head->next; while (onode != other.tail) { node->next = new Node(onode->key); node = node->next; onode = onode->next; } tail = new Node(); node->next = tail; } ~LinkedListSet() { // Delete all the nodes in the list Node* prev = head; Node* node = prev->next; while (node != tail) { delete prev; prev = node; node = node->next; } delete prev; delete tail; } static std::string className() { return "LinkedListSet"; } /* * Adds a node with a key, returns false if the key is already in the set */ bool add(const K& key) { Node *prev, *node; find(key, prev, node); bool retval = !(node != tail && key == node->key); if (!retval) return retval; Node* newNode = new Node(key); prev->next = newNode; newNode->next = node; return retval; } /* * Removes a node with an key, returns false if the key is not in the set */ bool remove(const K& key) { Node *prev, *node; find(key, prev, node); bool retval = (node != tail && key == node->key); if (!retval) return retval; prev->next = node->next; delete node; return retval; } /* * Returns true if it finds a node with a matching key */ bool contains(const K& key) { Node *prev, *node; find(key, prev, node); return (node != tail && key == node->key); } void find(const K& key, Node*& prev, Node*& node) { for (prev = head; (node = prev->next) != tail; prev = node){ if ( !(node->key < key) ) break; } } // Used only for benchmarks bool addAll(K** keys, const int size) { bool retval = false; for (int i = 0; i < size; i++) { Node *prev, *node; find(*keys[i], prev, node); retval = !(node != tail && *keys[i] == node->key); if (retval) { Node* newNode = new Node(*keys[i]); prev->next = newNode; newNode->next = node; } } return true; } }; #endif /* _SEQUENTIAL_LINKED_LIST_SET_H_ */ ================================================ FILE: datastructures/sequential/RedBlackBST.hpp ================================================ #ifndef _RED_BLACK_BST_H_ #define _RED_BLACK_BST_H_ #include #include #include // Single-threaded implementation of a Red-Black Tree Map //http://algs4.cs.princeton.edu/code/edu/princeton/cs/algs4/RedBlackBST.java template class RedBlackBST { struct Node { K* key; V* val; Node* left {nullptr}; Node* right {nullptr}; bool color; // color of parent link int size; // subtree count Node(K* key, V* val, bool color, int size) : key{key}, val{val}, color{color}, size{size} {} }; Node *root {nullptr}; // root of the BST static const bool RED = true; static const bool BLACK = false; public: /** * Initializes an empty symbol table. */ RedBlackBST(unsigned int maxThreads=128) { } /*************************************************************************** * Node helper methods. ***************************************************************************/ // is node x red; false if x is null ? bool isRed(Node* x) { if (x == nullptr) return false; return x->color == RED; } // number of node in subtree rooted at x; 0 if x is null int size(Node* x) { if (x == nullptr) return 0; return x->size; } /** * Returns the number of key-value pairs in this symbol table. * @return the number of key-value pairs in this symbol table */ int size() { return size(root); } /** * Is this symbol table empty? * @return {@code true} if this symbol table is empty and {@code false} otherwise */ bool isEmpty() { return root == nullptr; } /*************************************************************************** * Standard BST search-> ***************************************************************************/ /** * Returns the value associated with the given key. * @param key the key * @return the value associated with the given key if the key is in the symbol table * and {@code null} if the key is not in the symbol table * @throws IllegalArgumentException if {@code key} is {@code null} */ V* get(K* key) { if (key == nullptr) throw std::invalid_argument("item can not be nullptr"); return get(root, key); } // value associated with the given key in subtree rooted at x; null if no such key V* get(Node* x, K* key) { while (x != nullptr) { if (*key < *x->key) x = x->left; else if (*x->key < *key) x = x->right; else return x->val; } return nullptr; } /** * Does this symbol table contain the given key? * @param key the key * @return {@code true} if this symbol table contains {@code key} and * {@code false} otherwise * @throws IllegalArgumentException if {@code key} is {@code null} */ bool contains(K* key) { return get(key) != nullptr; } /*************************************************************************** * Red-black tree insertion. ***************************************************************************/ /** * Inserts the specified key-value pair into the symbol table, overwriting the old * value with the new value if the symbol table already contains the specified key. * Deletes the specified key (and its associated value) from this symbol table * if the specified value is {@code null}. * * @param key the key * @param val the value * @throws IllegalArgumentException if {@code key} is {@code null} */ void put(K* key, V* val) { if (key == nullptr) throw std::invalid_argument("item can not be nullptr"); if (val == nullptr) { deleteKey(key); return; } root = put(root, key, val); root->color = BLACK; // assert check(); } // insert the key-value pair in the subtree rooted at h Node* put(Node* h, K* key, V* val) { if (h == nullptr) return new Node(key, val, RED, 1); if (*key < *h->key) h->left = put(h->left, key, val); else if (*h->key < *key) h->right = put(h->right, key, val); else h->val = val; // fix-up any right-leaning links if (isRed(h->right) && !isRed(h->left)) h = rotateLeft(h); if (isRed(h->left) && isRed(h->left->left)) h = rotateRight(h); if (isRed(h->left) && isRed(h->right)) flipColors(h); h->size = size(h->left) + size(h->right) + 1; return h; } /*************************************************************************** * Red-black tree deletion. ***************************************************************************/ /** * Removes the smallest key and associated value from the symbol table. * @throws NoSuchElementException if the symbol table is empty */ void deleteMin() { if (isEmpty()) throw std::invalid_argument("item can not be nullptr"); // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = RED; root = deleteMin(root); if (!isEmpty()) root->color = BLACK; // assert check(); } // delete the key-value pair with the minimum key rooted at h Node* deleteMin(Node* h) { if (h->left == nullptr) return nullptr; if (!isRed(h->left) && !isRed(h->left->left)) h = moveRedLeft(h); h->left = deleteMin(h->left); return balance(h); } /** * Removes the largest key and associated value from the symbol table. * @throws NoSuchElementException if the symbol table is empty */ void deleteMax() { if (isEmpty()) throw std::invalid_argument("item can not be nullptr"); // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = RED; root = deleteMax(root); if (!isEmpty()) root->color = BLACK; // assert check(); } // delete the key-value pair with the maximum key rooted at h Node* deleteMax(Node* h) { if (isRed(h->left)) h = rotateRight(h); if (h->right == nullptr) return nullptr; if (!isRed(h->right) && !isRed(h->right->left)) h = moveRedRight(h); h->right = deleteMax(h->right); return balance(h); } /** * Removes the specified key and its associated value from this symbol table * (if the key is in this symbol table). * * @param key the key * @throws IllegalArgumentException if {@code key} is {@code null} */ void deleteKey(K* key) { if (key == nullptr) throw std::invalid_argument("item can not be nullptr"); if (!contains(key)) return; // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = RED; root = deleteKey(root, key); if (!isEmpty()) root->color = BLACK; // assert check(); } // delete the key-value pair with the given key rooted at h Node* deleteKey(Node* h, K* key) { // assert get(h, key) != null; if (*key < *h->key) { if (!isRed(h->left) && !isRed(h->left->left)) h = moveRedLeft(h); h->left = deleteKey(h->left, key); } else { if (isRed(h->left)) h = rotateRight(h); if (*key == *h->key && (h->right == nullptr)) return nullptr; if (!isRed(h->right) && !isRed(h->right->left)) h = moveRedRight(h); if (*key == *h->key) { Node* x = min(h->right); h->key = x->key; h->val = x->val; // h->val = get(h->right, min(h->right).key); // h->key = min(h->right).key; h->right = deleteMin(h->right); } else h->right = deleteKey(h->right, key); } return balance(h); } /*************************************************************************** * Red-black tree helper functions. ***************************************************************************/ // make a left-leaning link lean to the right Node* rotateRight(Node* h) { // assert (h != null) && isRed(h->left); Node* x = h->left; h->left = x->right; x->right = h; x->color = x->right->color; x->right->color = RED; x->size = h->size; h->size = size(h->left) + size(h->right) + 1; return x; } // make a right-leaning link lean to the left Node* rotateLeft(Node* h) { // assert (h != null) && isRed(h->right); Node* x = h->right; h->right = x->left; x->left = h; x->color = x->left->color; x->left->color = RED; x->size = h->size; h->size = size(h->left) + size(h->right) + 1; return x; } // flip the colors of a node and its two children void flipColors(Node* h) { // h must have opposite color of its two children // assert (h != null) && (h->left != null) && (h->right != null); // assert (!isRed(h) && isRed(h->left) && isRed(h->right)) // || (isRed(h) && !isRed(h->left) && !isRed(h->right)); h->color = !h->color; h->left->color = !h->left->color; h->right->color = !h->right->color; } // Assuming that h is red and both h->left and h->left.left // are black, make h->left or one of its children red. Node* moveRedLeft(Node* h) { // assert (h != null); // assert isRed(h) && !isRed(h->left) && !isRed(h->left.left); flipColors(h); if (isRed(h->right->left)) { h->right = rotateRight(h->right); h = rotateLeft(h); flipColors(h); } return h; } // Assuming that h is red and both h->right and h->right.left // are black, make h->right or one of its children red. Node* moveRedRight(Node* h) { // assert (h != null); // assert isRed(h) && !isRed(h->right) && !isRed(h->right.left); flipColors(h); if (isRed(h->left->left)) { h = rotateRight(h); flipColors(h); } return h; } // restore red-black tree invariant Node* balance(Node* h) { // assert (h != null); if (isRed(h->right)) h = rotateLeft(h); if (isRed(h->left) && isRed(h->left->left)) h = rotateRight(h); if (isRed(h->left) && isRed(h->right)) flipColors(h); h->size = size(h->left) + size(h->right) + 1; return h; } /*************************************************************************** * Utility functions. ***************************************************************************/ /** * Returns the height of the BST (for debugging). * @return the height of the BST (a 1-node tree has height 0) */ int height() { return height(root); } int height(Node* x) { if (x == nullptr) return -1; return 1 + std::max(height(x->left), height(x->right)); } /*************************************************************************** * Ordered symbol table methods. ***************************************************************************/ /** * Returns the smallest key in the symbol table. * @return the smallest key in the symbol table * @throws NoSuchElementException if the symbol table is empty */ K* min() { if (isEmpty()) throw std::invalid_argument("item can not be nullptr"); return min(root).key; } // the smallest key in subtree rooted at x; null if no such key Node* min(Node* x) { // assert x != null; if (x->left == nullptr) return x; else return min(x->left); } /** * Returns the largest key in the symbol table. * @return the largest key in the symbol table * @throws NoSuchElementException if the symbol table is empty */ K* max() { if (isEmpty()) throw std::invalid_argument("item can not be nullptr"); return max(root).key; } // the largest key in the subtree rooted at x; null if no such key Node* max(Node* x) { // assert x != null; if (x->right == nullptr) return x; else return max(x->right); } /** * Returns the largest key in the symbol table less than or equal to {@code key}. * @param key the key * @return the largest key in the symbol table less than or equal to {@code key} * @throws NoSuchElementException if there is no such key * @throws IllegalArgumentException if {@code key} is {@code null} */ K* floor(K* key) { if (key == nullptr) throw std::invalid_argument("item can not be nullptr"); if (isEmpty()) throw std::invalid_argument("item can not be nullptr"); Node* x = floor(root, key); if (x == nullptr) return nullptr; else return x->key; } // the largest key in the subtree rooted at x less than or equal to the given key Node* floor(Node* x, K* key) { if (x == nullptr) return nullptr; if (*key == *x->key) return x; if (*key < *x->key) return floor(x->left, key); Node* t = floor(x->right, key); if (t != nullptr) return t; else return x; } /** * Returns the smallest key in the symbol table greater than or equal to {@code key}. * @param key the key * @return the smallest key in the symbol table greater than or equal to {@code key} * @throws NoSuchElementException if there is no such key * @throws IllegalArgumentException if {@code key} is {@code null} */ K* ceiling(K* key) { if (key == nullptr) throw std::invalid_argument("item can not be nullptr"); if (isEmpty()) throw std::invalid_argument("item can not be nullptr"); Node* x = ceiling(root, key); if (x == nullptr) return nullptr; else return x->key; } // the smallest key in the subtree rooted at x greater than or equal to the given key Node* ceiling(Node* x, K* key) { if (x == nullptr) return nullptr; int cmp = key.compareTo(x->key); if (*key == *x->key) return x; if (*x->key < *key) return ceiling(x->right, key); Node* t = ceiling(x->left, key); if (t != nullptr) return t; else return x; } /** * Return the kth smallest key in the symbol table. * @param k the order statistic * @return the {@code k}th smallest key in the symbol table * @throws IllegalArgumentException unless {@code k} is between 0 and * n1 */ K* select(int k) { if (k < 0 || k >= size()) { throw std::invalid_argument("item can not be nullptr"); } Node x = select(root, k); return x->key; } // the key of rank k in the subtree rooted at x Node* select(Node* x, int k) { // assert x != null; // assert k >= 0 && k < size(x); int t = size(x->left); if (t > k) return select(x->left, k); else if (t < k) return select(x->right, k-t-1); else return x; } /** * Return the number of keys in the symbol table strictly less than {@code key}. * @param key the key * @return the number of keys in the symbol table strictly less than {@code key} * @throws IllegalArgumentException if {@code key} is {@code null} */ int rank(K* key) { if (key == nullptr) throw std::invalid_argument("item can not be nullptr"); return rank(key, root); } // number of keys less than key in the subtree rooted at x int rank(K* key, Node* x) { if (x == nullptr) return 0; if (*key < *x->key) return rank(key, x->left); else if (*x->key < *key) return 1 + size(x->left) + rank(key, x->right); else return size(x->left); } /*************************************************************************** * Range count and range search-> ***************************************************************************/ /** * Returns the number of keys in the symbol table in the given range. * * @param lo minimum endpoint * @param hi maximum endpoint * @return the number of keys in the sybol table between {@code lo} * (inclusive) and {@code hi} (inclusive) * @throws IllegalArgumentException if either {@code lo} or {@code hi} * is {@code null} */ int size(K* lo, K* hi) { if (lo == nullptr) throw std::invalid_argument("item can not be nullptr"); if (hi == nullptr) throw std::invalid_argument("item can not be nullptr"); if (*lo < *hi) return 0; if (contains(hi)) return rank(hi) - rank(lo) + 1; else return rank(hi) - rank(lo); } /*************************************************************************** * Check integrity of red-black tree data structure. ***************************************************************************/ bool check() { if (!isBST()) std::cout << "Not in symmetric order\n"; if (!isSizeConsistent()) std::cout << "Subtree counts not consistent\n"; //if (!isRankConsistent()) std::cout << "Ranks not consistent\n"; if (!is23()) std::cout << "Not a 2-3 tree\n"; if (!isBalanced()) std::cout << "Not balanced\n"; return isBST() && isSizeConsistent() && is23() && isBalanced(); } // does this binary tree satisfy symmetric order? // Note: this test also ensures that data structure is a binary tree since order is strict bool isBST() { return isBST(root, nullptr, nullptr); } // is the tree rooted at x a BST with all keys strictly between min and max // (if min or max is null, treat as empty constraint) // Credit: Bob Dondero's elegant solution bool isBST(Node* x, K* min, K* max) { if (x == nullptr) return true; if (min != nullptr && x->key.compareTo(min) <= 0) return false; if (max != nullptr && x->key.compareTo(max) >= 0) return false; return isBST(x->left, min, x->key) && isBST(x->right, x->key, max); } // are the size fields correct? bool isSizeConsistent() { return isSizeConsistent(root); } bool isSizeConsistent(Node* x) { if (x == nullptr) return true; if (x->size != size(x->left) + size(x->right) + 1) return false; return isSizeConsistent(x->left) && isSizeConsistent(x->right); } /* // check that ranks are consistent bool isRankConsistent() { for (int i = 0; i < size(); i++) if (i != rank(select(i))) return false; for (K* key : keys()) if (key.compareTo(select(rank(key))) != 0) return false; return true; } */ // Does the tree have no red right links, and at most one (left) // red links in a row on any path? bool is23() { return is23(root); } bool is23(Node* x) { if (x == nullptr) return true; if (isRed(x->right)) return false; if (x != root && isRed(x) && isRed(x->left)) return false; return is23(x->left) && is23(x->right); } // do all paths from root to leaf have same number of black edges? bool isBalanced() { int black = 0; // number of black links on path from root to min Node x = root; while (x != nullptr) { if (!isRed(x)) black++; x = x->left; } return isBalanced(root, black); } // does every path from the root to a leaf have the given number of black links? bool isBalanced(Node* x, int black) { if (x == nullptr) return black == 0; if (!isRed(x)) black--; return isBalanced(x->left, black) && isBalanced(x->right, black); } // Set methods bool add(K* key, const int tid) { if (contains(key)) return false; put(key,key); return true; } bool remove(K* key, const int tid) { if (!contains(key)) return false; deleteKey(key); return true; } inline bool contains(K* key, const int tid) { return contains(key); } std::string className() { return "RedBlackBST"; } }; #endif // _RED_BLACK_BST_H_ ================================================ FILE: datastructures/sequential/SortedArraySet.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _SORTEDARRAYSET_H_ #define _SORTEDARRAYSET_H_ #include // TODO: Test this for correctness /** * This is storing the pointers to the T instances, not the actual T instances. */ template class SortedArraySet { private: unsigned max_size = 32; T** vec; // TODO: change this to T if we change the API from T* to T& unsigned size = 0; static const int NOT_FOUND = 0; //std::atomic flag {false}; // For de debugging int lookup(T* key) { // Cover the special case of an empty array if (size== 0) return NOT_FOUND; int minPos = 0; int maxPos = size-1; //std::cout << "vec[0] = " << vec[0] << "\n"; // Special cases for first and last items if (*key < *(vec[0])) return NOT_FOUND; if (*key == *(vec[0])) return 0; if (*key == *(vec[maxPos])) return maxPos; if (*(vec[maxPos]) < *key) return maxPos+1; while (true) { int pos = (maxPos-minPos)/2 + minPos; if (*key < *(vec[pos])) { maxPos = pos; } else if (*key == *(vec[pos])) { return pos; } else { minPos = pos; } if (maxPos-minPos <= 1) { return maxPos; } } } public: SortedArraySet() { vec = new T*[max_size]; } ~SortedArraySet() { delete[] vec; } // We need a copy constructor to be able to use it in CXMutation SortedArraySet(const SortedArraySet& fromssv) { vec = new T*[fromssv.max_size]; max_size = fromssv.max_size; size = fromssv.size; for(unsigned i=0;iseq<<" "<tid<<" vex "<seq<<" "<tid<<" vex "<seq<< " " << vec[index]->tid<<"\n"; //assert(false); //flag.store(false); return false; } if(size+1==max_size){ T** newvec = new T*[2*max_size]; for(unsigned i=0;i=index+1;i--){ vec[i]=vec[i-1]; } } vec[index] = key; } size++; //flag.store(false); return true; } bool contains(T* key) { //if (flag.load()) std::cout << "contains() ERRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRROOOOOOOOOOOOOOR\n"; unsigned index = lookup(key); if (index == size) { return false; } return *key == *(vec[index]); } bool print() { // For debug purposes for(unsigned i=0;i BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _SORTED_VECTOR_SET_H_ #define _SORTED_VECTOR_SET_H_ #include #include // TODO: Test this for correctness /** * This is storing the pointers to the T instances, not the actual T instances. */ template class SortedVectorSet { private: std::vector vec; // TODO: change this to T if we change the API from T* to T& static const int NOT_FOUND = 0; //std::atomic flag {false}; // For de debugging int lookup(T* key) { // Cover the special case of an empty array if (vec.size()== 0) return NOT_FOUND; int minPos = 0; int maxPos = vec.size()-1; // Special cases for first and last items if (*key < *(vec[0])) return NOT_FOUND; if (*key == *(vec[0])) return 0; if (*key == *(vec[maxPos])) return maxPos; if (*(vec[maxPos]) < *key) return maxPos+1; while (true) { int pos = (maxPos-minPos)/2 + minPos; if (*key < *(vec[pos])) { maxPos = pos; } else if (*key == *(vec[pos])) { return pos; } else { minPos = pos; } if (maxPos-minPos <= 1) { return maxPos; } } } public: SortedVectorSet() { } // We need a copy constructor to be able to use it in CXMutation SortedVectorSet(const SortedVectorSet& from) { vec = from.vec; // Do a copy of the vector } static std::string className() { return "SortedVectorSet"; } /** * When the curr.key is seen to be null it means we reached the tail node */ bool remove(T* key) { //if (flag.load()) std::cout << "remove() ERRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRROOOOOOOOOOOOOOR\n"; //flag.store(true); unsigned index = lookup(key); if (index == vec.size()) { //std::cout<<"remove key "<seq<<" "<tid<<" vex "<seq<<" "<tid<<" vex "<seq<< " " << vec[index]->tid<<"\n"; //assert(false); //flag.store(false); return false; } vec.insert(vec.begin()+index, key); //flag.store(false); return true; } bool contains(T* key) { //if (flag.load()) std::cout << "contains() ERRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRROOOOOOOOOOOOOOR\n"; unsigned index = lookup(key); if (index == vec.size()) { return false; } return *key == *(vec[index]); } bool print() { // For debug purposes for (T* p : vec) std:: cout << p << ","; std::cout << "\n"; return true; } }; #endif /* _SORTED_VECTOR_SET_H_ */ ================================================ FILE: datastructures/sequential/TreeSet.hpp ================================================ /****************************************************************************** * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _CX_TREE_SET_H_ #define _CX_TREE_SET_H_ #include #include #include //#include "../datastructures/sequential/RedBlackBST.hpp" // TODO: change CKey* to CKey& // This is a wrapper to std::set, which should be a Red-Black tree template class TreeSet { private: std::set set; // Use this instead if we want to have control over the Red-Black tree //RedBlackBST set; public: static std::string className() { return "TreeSet"; } bool add(CKey key) { if (set.find(key) == set.end()) { set.insert(key); // TODO: can we improve this so we don't have to lookup twice? return true; } return false; } bool remove(CKey key) { auto iter = set.find(key); if (iter == set.end()) return false; set.erase(iter); return true; } bool contains(CKey key) { if (set.find(key) == set.end()) return false; return true; // TODO: optimize this } bool iterateAll(std::function itfunc) { for (auto it = set.begin(); it != set.end(); ++it) { CKey key = *it; if (!itfunc(&key)) return false; } return true; } /* bool add(CKey* key) { return set.add(key, 0); } bool remove(CKey* key) { return set.remove(key, 0); } bool contains(CKey* key) { return set.contains(key, 0); } */ }; #endif /* _TREE_SET_H_ */ ================================================ FILE: datastructures/treemaps/ESTMRedBlackTree.hpp ================================================ #ifndef _ESTM_RED_BLACK_BST_H_ #define _ESTM_RED_BLACK_BST_H_ #include #include #include #include "stms/ESTM.hpp" // Adapted from Java to C++ from the original at http://algs4.cs.princeton.edu/code/edu/princeton/cs/algs4/RedBlackBST.java template class ESTMRedBlackTree { const int64_t COLOR_RED = 0; const int64_t COLOR_BLACK = 1; struct Node { estm::tmtype key; estm::tmtype val; estm::tmtype left {nullptr}; estm::tmtype right {nullptr}; estm::tmtype color; // color of parent link estm::tmtype size; // subtree count Node(const K& key, const V& val, int64_t color, int64_t size) : key{key}, val{val}, color{color}, size{size} {} }; estm::tmtype root {nullptr}; // root of the BST inline void assignAndFreeIfNull(estm::tmtype& z, Node* w) { Node* tofree = z; z = w; if (w == nullptr) estm::tmDelete(tofree); } public: /** * Initializes an empty symbol table. */ ESTMRedBlackTree(int maxThreads=0){ } ~ESTMRedBlackTree() { // The transaction log is not enough to delete everything if there are too many, so we delete 1000 per transaction for (int i = 0; i < 1000; i++) { estm::updateTx([&] () { if (root == nullptr) return true; deleteMin(); return true; }); } } /*************************************************************************** * Node helper methods. ***************************************************************************/ // is node x red; false if x is null ? bool isRed(Node* x) { if (x == nullptr) return false; return x->color == COLOR_RED; } // number of node in subtree rooted at x; 0 if x is null int size(Node* x) { if (x == nullptr) return 0; return x->size; } /** * Returns the number of key-value pairs in this symbol table. * @return the number of key-value pairs in this symbol table */ int size() { return size(root); } /** * Is this symbol table empty? * @return {@code true} if this symbol table is empty and {@code false} otherwise */ bool isEmpty() { return root == nullptr; } /*************************************************************************** * Standard BST search-> ***************************************************************************/ /** * Returns the value associated with the given key. * @param key the key * @return the value associated with the given key if the key is in the symbol table * and {@code null} if the key is not in the symbol table * @throws IllegalArgumentException if {@code key} is {@code null} */ bool innerGet(K& key, V& oldValue, const bool saveOldValue) { bool found = get(root, key); if (!found) return false; //if (saveOldValue) oldValue = *val; // Copy of V return true; } // value associated with the given key in subtree rooted at x; null if no such key bool get(Node* x, K& key) { while (x != nullptr) { if (key < x->key) x = x->left; else if (x->key < key) x = x->right; else return true; } return false; } /** * Does this symbol table contain the given key? * @param key the key * @return {@code true} if this symbol table contains {@code key} and * {@code false} otherwise * @throws IllegalArgumentException if {@code key} is {@code null} */ bool containsKey(const K& key) { return get(key) != nullptr; } /*************************************************************************** * Red-black tree insertion. ***************************************************************************/ /** * Inserts the specified key-value pair into the symbol table, overwriting the old * value with the new value if the symbol table already contains the specified key. * Deletes the specified key (and its associated value) from this symbol table * if the specified value is {@code null}. * * @param key the key * @param val the value * @throws IllegalArgumentException if {@code key} is {@code null} */ bool innerPut(const K& key, const V& value) { bool ret = false; root = put(root, key, value, ret); root->color = COLOR_BLACK; return ret; } // insert the key-value pair in the subtree rooted at h Node* put(Node* h, const K& key, const V& val, bool& ret) { if (h == nullptr) { ret = true; return estm::tmNew(key, val, COLOR_RED, 1); } if (key < h->key) h->left = put(h->left, key, val, ret); else if (h->key < key) h->right = put(h->right, key, val, ret); else h->val = val; // fix-up any right-leaning links if (isRed(h->right) && !isRed(h->left)) h = rotateLeft(h); if (isRed(h->left) && isRed(h->left->left)) h = rotateRight(h); if (isRed(h->left) && isRed(h->right)) flipColors(h); h->size = size(h->left) + size(h->right) + 1; return h; } /*************************************************************************** * Red-black tree deletion. ***************************************************************************/ /** * Removes the smallest key and associated value from the symbol table. * @throws NoSuchElementException if the symbol table is empty */ void deleteMin() { if (isEmpty()) return; // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = COLOR_RED; assignAndFreeIfNull(root, deleteMin(root)); if (!isEmpty()) root->color = COLOR_BLACK; // assert check(); } // delete the key-value pair with the minimum key rooted at h Node* deleteMin(Node* h) { if (h->left == nullptr) return nullptr; if (!isRed(h->left) && !isRed(h->left->left)) h = moveRedLeft(h); assignAndFreeIfNull(h->left, deleteMin(h->left)); return balance(h); } /** * Removes the largest key and associated value from the symbol table. * @throws NoSuchElementException if the symbol table is empty */ void deleteMax() { if (isEmpty()) return; // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = COLOR_RED; root = deleteMax(root); if (!isEmpty()) root->color = COLOR_BLACK; // assert check(); } // delete the key-value pair with the maximum key rooted at h Node* deleteMax(Node* h) { if (isRed(h->left)) h = rotateRight(h); if (h->right == nullptr) return nullptr; if (!isRed(h->right) && !isRed(h->right->left)) h = moveRedRight(h); h->right = deleteMax(h->right); return balance(h); } /** * Removes the specified key and its associated value from this symbol table * (if the key is in this symbol table). * * @param key the key */ void innerRemove(const K& key) { // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = COLOR_RED; assignAndFreeIfNull(root, deleteKey(root, key)); if (!isEmpty()) root->color = COLOR_BLACK; // assert check(); } // delete the key-value pair with the given key rooted at h Node* deleteKey(Node* h, const K& key) { // assert get(h, key) != null; if (key < h->key) { if (!isRed(h->left) && !isRed(h->left->left)) { h = moveRedLeft(h); } assignAndFreeIfNull(h->left, deleteKey(h->left, key)); } else { if (isRed(h->left)) { h = rotateRight(h); } if (key == h->key && (h->right == nullptr)) { return nullptr; } if (!isRed(h->right) && !isRed(h->right->left)) { h = moveRedRight(h); } if (key == h->key) { Node* x = min(h->right); h->key = x->key; h->val = x->val; // h->val = get(h->right, min(h->right).key); // h->key = min(h->right).key; assignAndFreeIfNull(h->right, deleteMin(h->right)); } else { assignAndFreeIfNull(h->right, deleteKey(h->right, key)); } } return balance(h); } /*************************************************************************** * Red-black tree helper functions. ***************************************************************************/ // make a left-leaning link lean to the right Node* rotateRight(Node* h) { // assert (h != null) && isRed(h->left); Node* x = h->left; h->left = x->right; x->right = h; x->color = x->right->color; x->right->color = COLOR_RED; x->size = h->size; h->size = size(h->left) + size(h->right) + 1; return x; } // make a right-leaning link lean to the left Node* rotateLeft(Node* h) { // assert (h != null) && isRed(h->right); Node* x = h->right; h->right = x->left; x->left = h; x->color = x->left->color; x->left->color = COLOR_RED; x->size = h->size; h->size = size(h->left) + size(h->right) + 1; return x; } // flip the colors of a node and its two children void flipColors(Node* h) { // h must have opposite color of its two children // assert (h != null) && (h->left != null) && (h->right != null); // assert (!isRed(h) && isRed(h->left) && isRed(h->right)) // || (isRed(h) && !isRed(h->left) && !isRed(h->right)); h->color = !h->color; h->left->color = !h->left->color; h->right->color = !h->right->color; } // Assuming that h is red and both h->left and h->left.left // are black, make h->left or one of its children red. Node* moveRedLeft(Node* h) { // assert (h != null); // assert isRed(h) && !isRed(h->left) && !isRed(h->left.left); flipColors(h); if (isRed(h->right->left)) { h->right = rotateRight(h->right); h = rotateLeft(h); flipColors(h); } return h; } // Assuming that h is red and both h->right and h->right.left // are black, make h->right or one of its children red. Node* moveRedRight(Node* h) { // assert (h != null); // assert isRed(h) && !isRed(h->right) && !isRed(h->right.left); flipColors(h); if (isRed(h->left->left)) { h = rotateRight(h); flipColors(h); } return h; } // restore red-black tree invariant Node* balance(Node* h) { // assert (h != null); if (isRed(h->right)) h = rotateLeft(h); if (isRed(h->left) && isRed(h->left->left)) h = rotateRight(h); if (isRed(h->left) && isRed(h->right)) flipColors(h); h->size = size(h->left) + size(h->right) + 1; return h; } /*************************************************************************** * Utility functions. ***************************************************************************/ /** * Returns the height of the BST (for debugging). * @return the height of the BST (a 1-node tree has height 0) */ int height() { return height(root); } int height(Node* x) { if (x == nullptr) return -1; return 1 + std::max(height(x->left), height(x->right)); } /*************************************************************************** * Ordered symbol table methods. ***************************************************************************/ /** * Returns the smallest key in the symbol table. * @return the smallest key in the symbol table * @throws NoSuchElementException if the symbol table is empty */ K* min() { if (isEmpty()) return nullptr; return min(root).key; } // the smallest key in subtree rooted at x; null if no such key Node* min(Node* x) { // assert x != null; if (x->left == nullptr) return x; else return min(x->left); } /** * Returns the largest key in the symbol table. * @return the largest key in the symbol table * @throws NoSuchElementException if the symbol table is empty */ K* max() { if (isEmpty()) return nullptr; return max(root).key; } // the largest key in the subtree rooted at x; null if no such key Node* max(Node* x) { // assert x != null; if (x->right == nullptr) return x; else return max(x->right); } /** * Returns the largest key in the symbol table less than or equal to {@code key}. * @param key the key * @return the largest key in the symbol table less than or equal to {@code key} * @throws NoSuchElementException if there is no such key * @throws IllegalArgumentException if {@code key} is {@code null} */ K* floor(const K& key) { if (key == nullptr) return nullptr; if (isEmpty()) return nullptr; Node* x = floor(root, key); if (x == nullptr) return nullptr; else return x->key; } // the largest key in the subtree rooted at x less than or equal to the given key Node* floor(Node* x, const K& key) { if (x == nullptr) return nullptr; if (key == x->key) return x; if (key < x->key) return floor(x->left, key); Node* t = floor(x->right, key); if (t != nullptr) return t; else return x; } /** * Returns the smallest key in the symbol table greater than or equal to {@code key}. * @param key the key * @return the smallest key in the symbol table greater than or equal to {@code key} * @throws NoSuchElementException if there is no such key * @throws IllegalArgumentException if {@code key} is {@code null} */ K* ceiling(const K& key) { if (key == nullptr) return nullptr; if (isEmpty()) return nullptr; Node* x = ceiling(root, key); if (x == nullptr) return nullptr; else return x->key; } // the smallest key in the subtree rooted at x greater than or equal to the given key Node* ceiling(Node* x, const K& key) { if (x == nullptr) return nullptr; if (key == x->key) return x; if (x->key < key) return ceiling(x->right, key); Node* t = ceiling(x->left, key); if (t != nullptr) return t; else return x; } /** * Return the kth smallest key in the symbol table. * @param k the order statistic * @return the {@code k}th smallest key in the symbol table * @throws IllegalArgumentException unless {@code k} is between 0 and * n1 */ K* select(int k) { if (k < 0 || k >= size()) { return nullptr; } Node x = select(root, k); return x->key; } // the key of rank k in the subtree rooted at x Node* select(Node* x, int k) { // assert x != null; // assert k >= 0 && k < size(x); int t = size(x->left); if (t > k) return select(x->left, k); else if (t < k) return select(x->right, k-t-1); else return x; } /** * Return the number of keys in the symbol table strictly less than {@code key}. * @param key the key * @return the number of keys in the symbol table strictly less than {@code key} * @throws IllegalArgumentException if {@code key} is {@code null} */ int rank(const K& key) { if (key == nullptr) return -1; return rank(key, root); } // number of keys less than key in the subtree rooted at x int rank(const K& key, Node* x) { if (x == nullptr) return 0; if (key < x->key) return rank(key, x->left); else if (x->key < key) return 1 + size(x->left) + rank(key, x->right); else return size(x->left); } /*************************************************************************** * Range count and range search-> ***************************************************************************/ /** * Returns the number of keys in the symbol table in the given range. * * @param lo minimum endpoint * @param hi maximum endpoint * @return the number of keys in the sybol table between {@code lo} * (inclusive) and {@code hi} (inclusive) * @throws IllegalArgumentException if either {@code lo} or {@code hi} * is {@code null} */ int size(const K& lo, const K& hi) { if (lo == nullptr) return 0; if (hi == nullptr) return 0; if (hi < lo) return 0; if (containsKey(hi)) return rank(hi) - rank(lo) + 1; else return rank(hi) - rank(lo); } /*************************************************************************** * Check integrity of red-black tree data structure. ***************************************************************************/ bool check() { if (!isBST()) std::cout << "Not in symmetric order\n"; if (!isSizeConsistent()) std::cout << "Subtree counts not consistent\n"; //if (!isRankConsistent()) std::cout << "Ranks not consistent\n"; if (!is23()) std::cout << "Not a 2-3 tree\n"; if (!isBalanced()) std::cout << "Not balanced\n"; return isBST() && isSizeConsistent() && is23() && isBalanced(); } // does this binary tree satisfy symmetric order? // Note: this test also ensures that data structure is a binary tree since order is strict bool isBST() { return isBST(root, nullptr, nullptr); } // is the tree rooted at x a BST with all keys strictly between min and max // (if min or max is null, treat as empty constraint) // Credit: Bob Dondero's elegant solution bool isBST(Node* x, K* min, K* max) { if (x == nullptr) return true; // TODO: port these two lines //if (min != nullptr && x->key.compareTo(min) <= 0) return false; //if (max != nullptr && x->key.compareTo(max) >= 0) return false; return isBST(x->left, min, x->key) && isBST(x->right, x->key, max); } // are the size fields correct? bool isSizeConsistent() { return isSizeConsistent(root); } bool isSizeConsistent(Node* x) { if (x == nullptr) return true; if (x->size != size(x->left) + size(x->right) + 1) return false; return isSizeConsistent(x->left) && isSizeConsistent(x->right); } /* // check that ranks are consistent bool isRankConsistent() { for (int i = 0; i < size(); i++) if (i != rank(select(i))) return false; for (K* key : keys()) if (key.compareTo(select(rank(key))) != 0) return false; return true; } */ // Does the tree have no red right links, and at most one (left) // red links in a row on any path? bool is23() { return is23(root); } bool is23(Node* x) { if (x == nullptr) return true; if (isRed(x->right)) return false; if (x != root && isRed(x) && isRed(x->left)) return false; return is23(x->left) && is23(x->right); } // do all paths from root to leaf have same number of black edges? bool isBalanced() { int black = 0; // number of black links on path from root to min Node x = root; while (x != nullptr) { if (!isRed(x)) black++; x = x->left; } return isBalanced(root, black); } // does every path from the root to a leaf have the given number of black links? bool isBalanced(Node* x, int black) { if (x == nullptr) return black == 0; if (!isRed(x)) black--; return isBalanced(x->left, black) && isBalanced(x->right, black); } // Inserts a key only if it's not already present bool add(const K& key, const int tid=0) { bool retval = false; estm::updateTx([&] () { retval = innerPut(key,key); }); return retval; } // Returns true only if the key was present bool remove(K& key, const int tid=0) { bool retval = false; estm::updateTx([&] () { V notused; retval = innerGet(key,notused,false); if (retval) innerRemove(key); }); return retval; } bool contains(K& key, const int tid=0) { bool retval = false; estm::readTx([&] () { V notused; retval = innerGet(key,notused,false); }); return retval; } // This is not fully transactionally but it's ok because we use it only on initialization. // We could make it fully transactionally, but we would have to increase the size of allocation/store logs. void addAll(K** keys, int size, const int tid=0) { for (int i = 0; i < size; i++) add(*keys[i], tid); } static std::string className() { return estm::ESTM::className() + "-RedBlackTree"; } }; #endif // _ESTM_RED_BLACK_BST_H_ ================================================ FILE: datastructures/treemaps/HazardEras.hpp ================================================ /****************************************************************************** * Copyright (c) 2016-2017, Pedro Ramalhete, Andreia Correia * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Concurrency Freaks nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** */ #ifndef _HAZARD_ERAS_H_ #define _HAZARD_ERAS_H_ #include #include #include #include /* *

Hazard Eras

* This a light-weight implementation of hazard eras, where each thread has a * thread-local list of retired objects. * * This is based on the paper "Hazard Eras - Non-Blocking Memory Reclamation" * by Pedro Ramalhete and Andreia Correia: * https://github.com/pramalhe/ConcurrencyFreaks/blob/master/papers/hazarderas-2017.pdf * * The type T is for the objects/nodes and it's it must have the members newEra, delEra * * R is zero. * *

* @author Pedro Ramalhete * @author Andreia Correia */ template class HazardEras { private: static const uint64_t NONE = 0; static const int HE_MAX_THREADS = 128; static const int MAX_HES = 5; // This is named 'K' in the HP paper static const int CLPAD = 128/sizeof(std::atomic); static const int HE_THRESHOLD_R = 0; // This is named 'R' in the HP paper const int maxHEs; const int maxThreads; alignas(128) std::atomic eraClock {1}; alignas(128) std::atomic* he[HE_MAX_THREADS]; // It's not nice that we have a lot of empty vectors, but we need padding to avoid false sharing alignas(128) std::vector retiredList[HE_MAX_THREADS*CLPAD]; public: HazardEras(int maxHEs=MAX_HES, int maxThreads=HE_MAX_THREADS) : maxHEs{maxHEs}, maxThreads{maxThreads} { for (int it = 0; it < HE_MAX_THREADS; it++) { he[it] = new std::atomic[CLPAD*2]; // We allocate four cache lines to allow for many hps and without false sharing retiredList[it*CLPAD].reserve(maxThreads*maxHEs); for (int ihe = 0; ihe < MAX_HES; ihe++) { he[it][ihe].store(NONE, std::memory_order_relaxed); } } static_assert(std::is_same::value, "T::newEra must be uint64_t"); static_assert(std::is_same::value, "T::delEra must be uint64_t"); } ~HazardEras() { for (int it = 0; it < HE_MAX_THREADS; it++) { delete[] he[it]; // Clear the current retired nodes for (unsigned iret = 0; iret < retiredList[it*CLPAD].size(); iret++) { delete retiredList[it*CLPAD][iret]; } } } inline uint64_t getEra() { return eraClock.load(); } /** * Progress Condition: wait-free bounded (by maxHEs) */ inline void clear(const int tid) { for (int ihe = 0; ihe < maxHEs; ihe++) { he[tid][ihe].store(NONE, std::memory_order_release); } } /** * Progress Condition: lock-free */ inline T* get_protected(int index, const std::atomic& atom, const int tid) { auto prevEra = he[tid][index].load(std::memory_order_relaxed); while (true) { T* ptr = atom.load(); auto era = eraClock.load(std::memory_order_acquire); if (era == prevEra) return ptr; he[tid][index].store(era); prevEra = era; } } inline void protectEraRelease(int index, int other, const int tid) { auto era = he[tid][other].load(std::memory_order_relaxed); if (he[tid][index].load(std::memory_order_relaxed) == era) return; he[tid][index].store(era, std::memory_order_release); } /* * Does a single iteration. Must be integrated into the algorithm that's using HE. * In other words, we must re-check if era has changed * * Progress Condition: wait-free population oblivious */ inline T* protectPtr(int index, const std::atomic& atom, uint64_t& prevEra, const int tid) { T* ptr = atom.load(std::memory_order_acquire); auto era = eraClock.load(); if (prevEra != era) { prevEra = era; he[tid][index].store(era, std::memory_order_relaxed); std::atomic_thread_fence(std::memory_order_seq_cst); } return ptr; } /** * Retire an object (node) * Progress Condition: wait-free bounded * * Doing rlist.erase() is not the most efficient way to remove entries from a std::vector, but ok... */ void retire(T* ptr, const int mytid) { auto currEra = eraClock.load(); ptr->delEra = currEra; auto& rlist = retiredList[mytid*CLPAD]; rlist.push_back(ptr); if (eraClock == currEra) eraClock.fetch_add(1); for (unsigned iret = 0; iret < rlist.size();) { auto obj = rlist[iret]; if (canDelete(obj, mytid)) { rlist.erase(rlist.begin() + iret); delete obj; continue; } iret++; } } private: bool canDelete(T* obj, const int mytid) { for (int tid = 0; tid < maxThreads; tid++) { for (int ihe = 0; ihe < maxHEs; ihe++) { const auto era = he[tid][ihe].load(std::memory_order_acquire); if (era == NONE || era < obj->newEra || era > obj->delEra) continue; return false; } } return true; } }; #endif /* _HAZARD_ERAS_H_ */ ================================================ FILE: datastructures/treemaps/NatarajanTreeHE.hpp ================================================ /* Copyright 2017 University of Rochester Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Adapted from https://github.com/roghnin/Interval-Based-Reclamation/blob/master/src/rideables/NatarajanTree.hpp Due to the usage of , this needs C++17 to compile Pedro: I've adapted this for our benchmarks but the adaptation may contain errors, please do not use this code in production! */ #ifndef _NATARAJAN_TREE_HAZARD_ERAS_H_ #define _NATARAJAN_TREE_HAZARD_ERAS_H_ #include #include #include #include #include #include "common/HazardEras.hpp" template class NatarajanTreeHE { private: const int MAX_THREADS = 128; /* structs*/ struct Node { int level; K key; V val; std::atomic left; std::atomic right; uint64_t newEra {0}; // TODO: put he.getEra() here uint64_t delEra; Node(uint64_t newEra) : newEra{newEra} {}; Node(uint64_t newEra, K k, V v, Node* l, Node* r,int lev):level(lev),key(k),val(v),left(l),right(r),newEra{newEra} {}; Node(uint64_t newEra, K k, V v, Node* l, Node* r):level(-1),key(k),val(v),left(l),right(r),newEra{newEra} {}; }; struct SeekRecord{ Node* ancestor; Node* successor; Node* parent; Node* leaf; }; /* variables */ HazardEras he {5, MAX_THREADS}; K infK{}; V defltV{}; Node* r; Node* s; SeekRecord* records; const size_t GET_POINTER_BITS = 0xfffffffffffffffc;//for machine 64-bit or less. /* helper functions */ //flag and tags helpers inline Node* getPtr(Node* mptr){ return (Node*) ((size_t)mptr & GET_POINTER_BITS); } inline bool getFlg(Node* mptr){ return (bool)((size_t)mptr & 1); } inline bool getTg(Node* mptr){ return (bool)((size_t)mptr & 2); } inline Node* mixPtrFlgTg(Node* ptr, bool flg, bool tg){ return (Node*) ((size_t)ptr | flg | ((size_t)tg<<1)); } //node comparison inline bool isInf(Node* n){ return getInfLevel(n)!=-1; } inline int getInfLevel(Node* n){ //0 for inf0, 1 for inf1, 2 for inf2, -1 for general val n=getPtr(n); return n->level; } inline bool nodeLess(Node* n1, Node* n2){ n1=getPtr(n1); n2=getPtr(n2); int i1=getInfLevel(n1); int i2=getInfLevel(n2); return i1keykey); } inline bool nodeEqual(Node* n1, Node* n2){ n1=getPtr(n1); n2=getPtr(n2); int i1=getInfLevel(n1); int i2=getInfLevel(n2); if(i1==-1&&i2==-1) return n1->key==n2->key; else return i1==i2; } inline bool nodeLessEqual(Node* n1, Node* n2){ return !nodeLess(n2,n1); } /* private interfaces */ void seek(K key, int tid); bool cleanup(K key, int tid); void doRangeQuery(Node& k1, Node& k2, int tid, Node* root, std::map& res); public: NatarajanTreeHE(const int maxThreads=0) { r = new Node(he.getEra(), infK,defltV,nullptr,nullptr,2); s = new Node(he.getEra(), infK,defltV,nullptr,nullptr,1); r->right = new Node(he.getEra(), infK,defltV,nullptr,nullptr,2); r->left = s; s->right = new Node(he.getEra(), infK,defltV,nullptr,nullptr,1); s->left = new Node(he.getEra(), infK,defltV,nullptr,nullptr,0); records = new SeekRecord[MAX_THREADS]{}; }; ~NatarajanTreeHE(){}; static std::string className() { return "NatarajanTreeHE"; } std::optional get(K key, int tid); std::optional put(K key, V val, int tid); bool insert(K key, V val, int tid); std::optional innerRemove(K key, int tid); std::optional replace(K key, V val, int tid); std::map rangeQuery(K key1, K key2, int& len, int tid); // Used only by our tree benchmarks bool add(K key, int tid); bool remove(K key, int tid); bool contains(K key, int tid); void addAll(K** keys, const int size, const int tid); }; //-------Definition---------- template void NatarajanTreeHE::seek(K key, int tid){ /* initialize the seek record using sentinel nodes */ Node keyNode{he.getEra(),key,defltV,nullptr,nullptr};//node to be compared SeekRecord* seekRecord = &(records[tid]); seekRecord->ancestor = r; seekRecord->successor = he.get_protected(1, r->left, tid); seekRecord->parent = he.get_protected(2, r->left, tid); seekRecord->leaf = getPtr(he.get_protected(3, s->left, tid)); /* initialize other variables used in the traversal */ Node* parentField = he.get_protected(3, seekRecord->parent->left, tid); Node* currentField = he.get_protected(4, seekRecord->leaf->left,tid); Node* current = getPtr(currentField); /* traverse the tree */ while(current!=nullptr){ /* check if the edge from the current parent node is tagged */ if(!getTg(parentField)){ /* * found an untagged edge in the access path; * advance ancestor and successor pointers. */ seekRecord->ancestor=seekRecord->parent; he.protectEraRelease(0, 1, tid); seekRecord->successor=seekRecord->leaf; he.protectEraRelease(1, 3, tid); } /* advance parent and leaf pointers */ seekRecord->parent = seekRecord->leaf; he.protectEraRelease(2, 3, tid); seekRecord->leaf = current; he.protectEraRelease(3, 4, tid); /* update other variables used in traversal */ parentField=currentField; if(nodeLess(&keyNode,current)){ currentField = he.get_protected(4, current->left, tid); } else{ currentField = he.get_protected(4, current->right, tid); } current=getPtr(currentField); } /* traversal complete */ return; } template bool NatarajanTreeHE::cleanup(K key, int tid){ Node keyNode{he.getEra(),key,defltV,nullptr,nullptr};//node to be compared bool res=false; /* retrieve addresses stored in seek record */ SeekRecord* seekRecord=&(records[tid]); Node* ancestor=getPtr(seekRecord->ancestor); Node* successor=getPtr(seekRecord->successor); Node* parent=getPtr(seekRecord->parent); Node* leaf=getPtr(seekRecord->leaf); std::atomic* successorAddr=nullptr; std::atomic* childAddr=nullptr; std::atomic* siblingAddr=nullptr; /* obtain address of field of ancestor node that will be modified */ if(nodeLess(&keyNode,ancestor)) successorAddr=&(ancestor->left); else successorAddr=&(ancestor->right); /* obtain addresses of child fields of parent node */ if(nodeLess(&keyNode,parent)){ childAddr=&(parent->left); siblingAddr=&(parent->right); } else{ childAddr=&(parent->right); siblingAddr=&(parent->left); } Node* tmpChild=childAddr->load(std::memory_order_acquire); if(!getFlg(tmpChild)){ /* the leaf is not flagged, thus sibling node should be flagged */ tmpChild=siblingAddr->load(std::memory_order_acquire); /* switch the sibling address */ siblingAddr=childAddr; } /* use TAS to tag sibling edge */ while(true){ Node* untagged=siblingAddr->load(std::memory_order_acquire); Node* tagged=mixPtrFlgTg(getPtr(untagged),getFlg(untagged),true); if(siblingAddr->compare_exchange_strong(untagged,tagged,std::memory_order_acq_rel)){ break; } } /* read the flag and address fields */ Node* tmpSibling=siblingAddr->load(std::memory_order_acquire); /* make the sibling node a direct child of the ancestor node */ res=successorAddr->compare_exchange_strong(successor, mixPtrFlgTg(getPtr(tmpSibling),getFlg(tmpSibling),false), std::memory_order_acq_rel); if(res==true){ he.retire(getPtr(tmpChild),tid); he.retire(successor,tid); } return res; } /* to test rangeQuery */ // template <> // optional NatarajanTree::get(int key, int tid){ // int len=0; // auto x = rangeQuery(key-500,key,len,tid); // Node keyNode{key,defltV,nullptr,nullptr};//node to be compared // optional res={}; // SeekRecord* seekRecord=&(records[tid].ui); // Node* leaf=nullptr; // seek(key,tid); // leaf=getPtr(seekRecord->leaf); // if(nodeEqual(&keyNode,leaf)){ // res = leaf->val; // } // return res; // } template std::optional NatarajanTreeHE::get(K key, int tid){ Node keyNode{he.getEra(),key,defltV,nullptr,nullptr};//node to be compared std::optional res={}; SeekRecord* seekRecord=&(records[tid]); Node* leaf=nullptr; seek(key,tid); leaf=getPtr(seekRecord->leaf); if(nodeEqual(&keyNode,leaf)){ res = leaf->val; } he.clear(tid); return res; } template std::optional NatarajanTreeHE::put(K key, V val, int tid){ std::optional res={}; SeekRecord* seekRecord=&(records[tid]); Node* newInternal=nullptr; Node* newLeaf = new Node(he.getEra(),key,val,nullptr,nullptr);//also to compare keys Node* parent=nullptr; Node* leaf=nullptr; std::atomic* childAddr=nullptr; while(true){ seek(key,tid); leaf=getPtr(seekRecord->leaf); parent=getPtr(seekRecord->parent); if(!nodeEqual(newLeaf,leaf)){//key does not exist /* obtain address of the child field to be modified */ if(nodeLess(newLeaf,parent)) childAddr=&(parent->left); else childAddr=&(parent->right); /* create left and right leave of newInternal */ Node* newLeft=nullptr; Node* newRight=nullptr; if(nodeLess(newLeaf,leaf)){ newLeft=newLeaf; newRight=leaf; } else{ newLeft=leaf; newRight=newLeaf; } /* create newInternal */ if(isInf(leaf)){ int lev=getInfLevel(leaf); newInternal = new Node(he.getEra(),infK,defltV,newLeft,newRight,lev); } else newInternal = new Node(he.getEra(),std::max(key,leaf->key),defltV,newLeft,newRight); /* try to add the new nodes to the tree */ Node* tmpExpected=getPtr(leaf); if(childAddr->compare_exchange_strong(tmpExpected,getPtr(newInternal),std::memory_order_acq_rel)){ res={}; break;//insertion succeeds } else{//fails; help conflicting delete operation delete newInternal; Node* tmpChild=childAddr->load(std::memory_order_acquire); if(getPtr(tmpChild)==leaf && (getFlg(tmpChild)||getTg(tmpChild))){ /* * address of the child has not changed * and either the leaf node or its sibling * has been flagged for deletion */ cleanup(key,tid); } } } else{//key exists, update and return old res=leaf->val; if(nodeLess(newLeaf,parent)) childAddr=&(parent->left); else childAddr=&(parent->right); if(childAddr->compare_exchange_strong(leaf,newLeaf,std::memory_order_acq_rel)){ he.retire(leaf,tid); break; } } } he.clear(tid); return res; } template bool NatarajanTreeHE::insert(K key, V val, int tid) { bool res=false; SeekRecord* seekRecord=&(records[tid]); Node* newInternal=nullptr; Node* newLeaf = new Node(he.getEra(),key,val,nullptr,nullptr);//also for comparing keys Node* parent=nullptr; Node* leaf=nullptr; std::atomic* childAddr=nullptr; while(true){ seek(key,tid); leaf=getPtr(seekRecord->leaf); parent=getPtr(seekRecord->parent); if(!nodeEqual(newLeaf,leaf)){//key does not exist /* obtain address of the child field to be modified */ if(nodeLess(newLeaf,parent)) childAddr=&(parent->left); else childAddr=&(parent->right); /* create left and right leave of newInternal */ Node* newLeft=nullptr; Node* newRight=nullptr; if(nodeLess(newLeaf,leaf)){ newLeft=newLeaf; newRight=leaf; } else{ newLeft=leaf; newRight=newLeaf; } /* create newInternal */ if(isInf(leaf)){ int lev=getInfLevel(leaf); newInternal = new Node(he.getEra(),infK,defltV,newLeft,newRight,lev); } else newInternal = new Node(he.getEra(),std::max(key,leaf->key),defltV,newLeft,newRight); /* try to add the new nodes to the tree */ Node* tmpExpected=getPtr(leaf); if(childAddr->compare_exchange_strong(tmpExpected,getPtr(newInternal),std::memory_order_acq_rel)){ res=true; break;//insertion succeeds } else{//fails; help conflicting delete operation delete newInternal; Node* tmpChild=childAddr->load(std::memory_order_acquire); if(getPtr(tmpChild)==leaf && (getFlg(tmpChild)||getTg(tmpChild))){ /* * address of the child has not changed * and either the leaf node or its sibling * has been flagged for deletion */ cleanup(key,tid); } } } else{//key exists, insertion fails delete newLeaf; res=false; break; } } he.clear(tid); return res; } template std::optional NatarajanTreeHE::innerRemove(K key, int tid){ bool injecting = true; std::optional res={}; SeekRecord* seekRecord=&(records[tid]); Node keyNode{he.getEra(),key,defltV,nullptr,nullptr};//node to be compared Node* parent=nullptr; Node* leaf=nullptr; std::atomic* childAddr=nullptr; while(true){ seek(key,tid); parent=getPtr(seekRecord->parent); /* obtain address of the child field to be modified */ if(nodeLess(&keyNode,parent)) childAddr=&(parent->left); else childAddr=&(parent->right); if(injecting){ /* injection mode: check if the key exists */ leaf=getPtr(seekRecord->leaf); if(!nodeEqual(leaf,&keyNode)){//does not exist res={}; break; } /* inject the delete operation into the tree */ Node* tmpExpected=getPtr(leaf); res=leaf->val; if(childAddr->compare_exchange_strong(tmpExpected, mixPtrFlgTg(tmpExpected,true,false), std::memory_order_acq_rel)){ /* advance to cleanup mode to remove the leaf node */ injecting=false; if(cleanup(key,tid)) break; } else{ Node* tmpChild=childAddr->load(std::memory_order_acquire); if(getPtr(tmpChild)==leaf && (getFlg(tmpChild)||getTg(tmpChild))){ /* * address of the child has not * changed and either the leaf * node or its sibling has been * flagged for deletion */ cleanup(key,tid); } } } else{ /* cleanup mode: check if flagged node still exists */ if(seekRecord->leaf!=leaf){ /* leaf no longer in the tree */ break; } else{ /* leaf still in the tree; remove */ if(cleanup(key,tid)) break; } } } he.clear(tid); return res; } template std::optional NatarajanTreeHE::replace(K key, V val, int tid){ std::optional res={}; SeekRecord* seekRecord=&(records[tid]); Node* newInternal=nullptr; Node* newLeaf = new Node(he.getEra(),key,val,nullptr,nullptr);//also to compare keys Node* parent=nullptr; Node* leaf=nullptr; std::atomic* childAddr=nullptr; while(true){ seek(key,tid); parent=getPtr(seekRecord->parent); leaf=getPtr(seekRecord->leaf); if(!nodeEqual(newLeaf,leaf)){//key does not exist, replace fails delete newLeaf; res={}; break; } else{//key exists, update and return old res=leaf->val; if(nodeLess(newLeaf,parent)) childAddr=&(parent->left); else childAddr=&(parent->right); if(childAddr->compare_exchange_strong(leaf,newLeaf,std::memory_order_acq_rel)){ he.retire(leaf,tid); break; } } } he.clear(tid); return res; } template std::map NatarajanTreeHE::rangeQuery(K key1, K key2, int& len, int tid){ //NOT HP-like GC safe. if(key1>key2) return {}; Node k1{he.getEra(),key1,defltV,nullptr,nullptr};//node to be compared Node k2{he.getEra(),key2,defltV,nullptr,nullptr};//node to be compared Node* leaf = getPtr(he.get_protected(0, s->left, tid)); Node* current = getPtr(he.get_protected(1, leaf->left, tid)); std::map res; if(current!=nullptr) doRangeQuery(k1,k2,tid,current,res); len=res.size(); return res; } template void NatarajanTreeHE::doRangeQuery(Node& k1, Node& k2, int tid, Node* root, std::map& res){ Node* left = getPtr(he.get_protected(2, root->left, tid)); Node* right = getPtr(he.get_protected(3, root->right, tid)); if(left==nullptr&&right==nullptr){ if(nodeLessEqual(&k1,root)&&nodeLessEqual(root,&k2)){ res.emplace(root->key,root->val); } return; } if(left!=nullptr){ if(nodeLess(&k1,root)){ doRangeQuery(k1,k2,tid,left,res); } } if(right!=nullptr){ if(nodeLessEqual(root,&k2)){ doRangeQuery(k1,k2,tid,right,res); } } return; } // Wrappers for the "set" benchmarks template bool NatarajanTreeHE::add(K key, int tid) { return insert(key,key,tid); } template bool NatarajanTreeHE::remove(K key, int tid) { return innerRemove(key,tid).has_value(); } template bool NatarajanTreeHE::contains(K key, int tid) { return get(key,tid).has_value(); } // Not lock-free template void NatarajanTreeHE::addAll(K** keys, const int size, const int tid) { for (int i = 0; i < size; i++) add(*keys[i], tid); } #endif ================================================ FILE: datastructures/treemaps/OFLFRedBlackTree.hpp ================================================ #ifndef _OF_LF_RED_BLACK_BST_H_ #define _OF_LF_RED_BLACK_BST_H_ #include #include #include #include "stms/OneFileLF.hpp" // This header defines the macros for the STM being compiled // Adapted from Java to C++ from the original at http://algs4.cs.princeton.edu/code/edu/princeton/cs/algs4/RedBlackBST.java template class OFLFRedBlackTree { const int64_t COLOR_RED = 0; const int64_t COLOR_BLACK = 1; struct Node : public oflf::tmbase { oflf::tmtype key; oflf::tmtype val; oflf::tmtype left {nullptr}; oflf::tmtype right {nullptr}; oflf::tmtype color; // color of parent link oflf::tmtype size; // subtree count Node(const K& key, const V& val, int64_t color, int64_t size) : key{key}, val{val}, color{color}, size{size} {} }; oflf::tmtype root {nullptr}; // root of the BST inline void assignAndFreeIfNull(oflf::tmtype& z, Node* w) { Node* tofree = z; z = w; if (w == nullptr) oflf::tmDelete(tofree); } public: /** * Initializes an empty symbol table. */ OFLFRedBlackTree(int numThreads=0){ } ~OFLFRedBlackTree() { for (int i = 0; i < 10000; i++) { oflf::updateTx([&] () { if (root == nullptr) return; deleteMin(); }); } } /*************************************************************************** * Node helper methods. ***************************************************************************/ // is node x red; false if x is null ? bool isRed(Node* x) { if (x == nullptr) return false; return x->color == COLOR_RED; } // number of node in subtree rooted at x; 0 if x is null int size(Node* x) { if (x == nullptr) return 0; return x->size; } /** * Returns the number of key-value pairs in this symbol table. * @return the number of key-value pairs in this symbol table */ int size() { return size(root); } /** * Is this symbol table empty? * @return {@code true} if this symbol table is empty and {@code false} otherwise */ bool isEmpty() { return root == nullptr; } /*************************************************************************** * Standard BST search-> ***************************************************************************/ /** * Returns the value associated with the given key. * @param key the key * @return the value associated with the given key if the key is in the symbol table * and {@code null} if the key is not in the symbol table * @throws IllegalArgumentException if {@code key} is {@code null} */ bool innerGet(K& key, V& oldValue, const bool saveOldValue) { bool found = get(root, key); if (!found) return false; //if (saveOldValue) oldValue = *val; // Copy of V return true; } // value associated with the given key in subtree rooted at x; null if no such key bool get(Node* x, K& key) { while (x != nullptr) { if (key < x->key) x = x->left; else if (x->key < key) x = x->right; else return true; } return false; } /** * Does this symbol table contain the given key? * @param key the key * @return {@code true} if this symbol table contains {@code key} and * {@code false} otherwise * @throws IllegalArgumentException if {@code key} is {@code null} */ bool containsKey(const K& key) { return get(key) != nullptr; } /*************************************************************************** * Red-black tree insertion. ***************************************************************************/ /** * Inserts the specified key-value pair into the symbol table, overwriting the old * value with the new value if the symbol table already contains the specified key. * Deletes the specified key (and its associated value) from this symbol table * if the specified value is {@code null}. * * @param key the key * @param val the value * @throws IllegalArgumentException if {@code key} is {@code null} */ bool innerPut(const K& key, const V& value) { bool ret = false; root = put(root, key, value, ret); root->color = COLOR_BLACK; return ret; } // insert the key-value pair in the subtree rooted at h Node* put(Node* h, const K& key, const V& val, bool& ret) { if (h == nullptr) { ret = true; return oflf::tmNew(key, val, COLOR_RED, 1); } if (key < h->key) h->left = put(h->left, key, val, ret); else if (h->key < key) h->right = put(h->right, key, val, ret); else h->val = val; // fix-up any right-leaning links if (isRed(h->right) && !isRed(h->left)) h = rotateLeft(h); if (isRed(h->left) && isRed(h->left->left)) h = rotateRight(h); if (isRed(h->left) && isRed(h->right)) flipColors(h); h->size = size(h->left) + size(h->right) + 1; return h; } /*************************************************************************** * Red-black tree deletion. ***************************************************************************/ /** * Removes the smallest key and associated value from the symbol table. * @throws NoSuchElementException if the symbol table is empty */ void deleteMin() { if (isEmpty()) return; // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = COLOR_RED; assignAndFreeIfNull(root, deleteMin(root)); if (!isEmpty()) root->color = COLOR_BLACK; // assert check(); } // delete the key-value pair with the minimum key rooted at h Node* deleteMin(Node* h) { if (h->left == nullptr) return nullptr; if (!isRed(h->left) && !isRed(h->left->left)) h = moveRedLeft(h); assignAndFreeIfNull(h->left, deleteMin(h->left)); return balance(h); } /** * Removes the largest key and associated value from the symbol table. * @throws NoSuchElementException if the symbol table is empty */ void deleteMax() { if (isEmpty()) return; // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = COLOR_RED; root = deleteMax(root); if (!isEmpty()) root->color = COLOR_BLACK; // assert check(); } // delete the key-value pair with the maximum key rooted at h Node* deleteMax(Node* h) { if (isRed(h->left)) h = rotateRight(h); if (h->right == nullptr) return nullptr; if (!isRed(h->right) && !isRed(h->right->left)) h = moveRedRight(h); h->right = deleteMax(h->right); return balance(h); } /** * Removes the specified key and its associated value from this symbol table * (if the key is in this symbol table). * * @param key the key */ void innerRemove(const K& key) { // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = COLOR_RED; assignAndFreeIfNull(root, deleteKey(root, key)); if (!isEmpty()) root->color = COLOR_BLACK; // assert check(); } // delete the key-value pair with the given key rooted at h Node* deleteKey(Node* h, const K& key) { // assert get(h, key) != null; if (key < h->key) { if (!isRed(h->left) && !isRed(h->left->left)) { h = moveRedLeft(h); } assignAndFreeIfNull(h->left, deleteKey(h->left, key)); } else { if (isRed(h->left)) { h = rotateRight(h); } if (key == h->key && (h->right == nullptr)) { return nullptr; } if (!isRed(h->right) && !isRed(h->right->left)) { h = moveRedRight(h); } if (key == h->key) { Node* x = min(h->right); h->key = x->key; h->val = x->val; // h->val = get(h->right, min(h->right).key); // h->key = min(h->right).key; assignAndFreeIfNull(h->right, deleteMin(h->right)); } else { assignAndFreeIfNull(h->right, deleteKey(h->right, key)); } } return balance(h); } /*************************************************************************** * Red-black tree helper functions. ***************************************************************************/ // make a left-leaning link lean to the right Node* rotateRight(Node* h) { // assert (h != null) && isRed(h->left); Node* x = h->left; h->left = x->right; x->right = h; x->color = x->right->color; x->right->color = COLOR_RED; x->size = h->size; h->size = size(h->left) + size(h->right) + 1; return x; } // make a right-leaning link lean to the left Node* rotateLeft(Node* h) { // assert (h != null) && isRed(h->right); Node* x = h->right; h->right = x->left; x->left = h; x->color = x->left->color; x->left->color = COLOR_RED; x->size = h->size; h->size = size(h->left) + size(h->right) + 1; return x; } // flip the colors of a node and its two children void flipColors(Node* h) { // h must have opposite color of its two children // assert (h != null) && (h->left != null) && (h->right != null); // assert (!isRed(h) && isRed(h->left) && isRed(h->right)) // || (isRed(h) && !isRed(h->left) && !isRed(h->right)); h->color = !h->color; h->left->color = !h->left->color; h->right->color = !h->right->color; } // Assuming that h is red and both h->left and h->left.left // are black, make h->left or one of its children red. Node* moveRedLeft(Node* h) { // assert (h != null); // assert isRed(h) && !isRed(h->left) && !isRed(h->left.left); flipColors(h); if (isRed(h->right->left)) { h->right = rotateRight(h->right); h = rotateLeft(h); flipColors(h); } return h; } // Assuming that h is red and both h->right and h->right.left // are black, make h->right or one of its children red. Node* moveRedRight(Node* h) { // assert (h != null); // assert isRed(h) && !isRed(h->right) && !isRed(h->right.left); flipColors(h); if (isRed(h->left->left)) { h = rotateRight(h); flipColors(h); } return h; } // restore red-black tree invariant Node* balance(Node* h) { // assert (h != null); if (isRed(h->right)) h = rotateLeft(h); if (isRed(h->left) && isRed(h->left->left)) h = rotateRight(h); if (isRed(h->left) && isRed(h->right)) flipColors(h); h->size = size(h->left) + size(h->right) + 1; return h; } /*************************************************************************** * Utility functions. ***************************************************************************/ /** * Returns the height of the BST (for debugging). * @return the height of the BST (a 1-node tree has height 0) */ int height() { return height(root); } int height(Node* x) { if (x == nullptr) return -1; return 1 + std::max(height(x->left), height(x->right)); } /*************************************************************************** * Ordered symbol table methods. ***************************************************************************/ /** * Returns the smallest key in the symbol table. * @return the smallest key in the symbol table * @throws NoSuchElementException if the symbol table is empty */ K* min() { if (isEmpty()) return nullptr; return min(root).key; } // the smallest key in subtree rooted at x; null if no such key Node* min(Node* x) { // assert x != null; if (x->left == nullptr) return x; else return min(x->left); } /** * Returns the largest key in the symbol table. * @return the largest key in the symbol table * @throws NoSuchElementException if the symbol table is empty */ K* max() { if (isEmpty()) return nullptr; return max(root).key; } // the largest key in the subtree rooted at x; null if no such key Node* max(Node* x) { // assert x != null; if (x->right == nullptr) return x; else return max(x->right); } /** * Returns the largest key in the symbol table less than or equal to {@code key}. * @param key the key * @return the largest key in the symbol table less than or equal to {@code key} * @throws NoSuchElementException if there is no such key * @throws IllegalArgumentException if {@code key} is {@code null} */ K* floor(const K& key) { if (key == nullptr) return nullptr; if (isEmpty()) return nullptr; Node* x = floor(root, key); if (x == nullptr) return nullptr; else return x->key; } // the largest key in the subtree rooted at x less than or equal to the given key Node* floor(Node* x, const K& key) { if (x == nullptr) return nullptr; if (key == x->key) return x; if (key < x->key) return floor(x->left, key); Node* t = floor(x->right, key); if (t != nullptr) return t; else return x; } /** * Returns the smallest key in the symbol table greater than or equal to {@code key}. * @param key the key * @return the smallest key in the symbol table greater than or equal to {@code key} * @throws NoSuchElementException if there is no such key * @throws IllegalArgumentException if {@code key} is {@code null} */ K* ceiling(const K& key) { if (key == nullptr) return nullptr; if (isEmpty()) return nullptr; Node* x = ceiling(root, key); if (x == nullptr) return nullptr; else return x->key; } // the smallest key in the subtree rooted at x greater than or equal to the given key Node* ceiling(Node* x, const K& key) { if (x == nullptr) return nullptr; if (key == x->key) return x; if (x->key < key) return ceiling(x->right, key); Node* t = ceiling(x->left, key); if (t != nullptr) return t; else return x; } /** * Return the kth smallest key in the symbol table. * @param k the order statistic * @return the {@code k}th smallest key in the symbol table * @throws IllegalArgumentException unless {@code k} is between 0 and * n1 */ K* select(int k) { if (k < 0 || k >= size()) { return nullptr; } Node x = select(root, k); return x->key; } // the key of rank k in the subtree rooted at x Node* select(Node* x, int k) { // assert x != null; // assert k >= 0 && k < size(x); int t = size(x->left); if (t > k) return select(x->left, k); else if (t < k) return select(x->right, k-t-1); else return x; } /** * Return the number of keys in the symbol table strictly less than {@code key}. * @param key the key * @return the number of keys in the symbol table strictly less than {@code key} * @throws IllegalArgumentException if {@code key} is {@code null} */ int rank(const K& key) { if (key == nullptr) return -1; return rank(key, root); } // number of keys less than key in the subtree rooted at x int rank(const K& key, Node* x) { if (x == nullptr) return 0; if (key < x->key) return rank(key, x->left); else if (x->key < key) return 1 + size(x->left) + rank(key, x->right); else return size(x->left); } /*************************************************************************** * Range count and range search-> ***************************************************************************/ /** * Returns the number of keys in the symbol table in the given range. * * @param lo minimum endpoint * @param hi maximum endpoint * @return the number of keys in the sybol table between {@code lo} * (inclusive) and {@code hi} (inclusive) * @throws IllegalArgumentException if either {@code lo} or {@code hi} * is {@code null} */ int size(const K& lo, const K& hi) { if (lo == nullptr) return 0; if (hi == nullptr) return 0; if (hi < lo) return 0; if (containsKey(hi)) return rank(hi) - rank(lo) + 1; else return rank(hi) - rank(lo); } /*************************************************************************** * Check integrity of red-black tree data structure. ***************************************************************************/ bool check() { if (!isBST()) std::cout << "Not in symmetric order\n"; if (!isSizeConsistent()) std::cout << "Subtree counts not consistent\n"; //if (!isRankConsistent()) std::cout << "Ranks not consistent\n"; if (!is23()) std::cout << "Not a 2-3 tree\n"; if (!isBalanced()) std::cout << "Not balanced\n"; return isBST() && isSizeConsistent() && is23() && isBalanced(); } // does this binary tree satisfy symmetric order? // Note: this test also ensures that data structure is a binary tree since order is strict bool isBST() { return isBST(root, nullptr, nullptr); } // is the tree rooted at x a BST with all keys strictly between min and max // (if min or max is null, treat as empty constraint) // Credit: Bob Dondero's elegant solution bool isBST(Node* x, K* min, K* max) { if (x == nullptr) return true; // TODO: port these two lines //if (min != nullptr && x->key.compareTo(min) <= 0) return false; //if (max != nullptr && x->key.compareTo(max) >= 0) return false; return isBST(x->left, min, x->key) && isBST(x->right, x->key, max); } // are the size fields correct? bool isSizeConsistent() { return isSizeConsistent(root); } bool isSizeConsistent(Node* x) { if (x == nullptr) return true; if (x->size != size(x->left) + size(x->right) + 1) return false; return isSizeConsistent(x->left) && isSizeConsistent(x->right); } /* // check that ranks are consistent bool isRankConsistent() { for (int i = 0; i < size(); i++) if (i != rank(select(i))) return false; for (K* key : keys()) if (key.compareTo(select(rank(key))) != 0) return false; return true; } */ // Does the tree have no red right links, and at most one (left) // red links in a row on any path? bool is23() { return is23(root); } bool is23(Node* x) { if (x == nullptr) return true; if (isRed(x->right)) return false; if (x != root && isRed(x) && isRed(x->left)) return false; return is23(x->left) && is23(x->right); } // do all paths from root to leaf have same number of black edges? bool isBalanced() { int black = 0; // number of black links on path from root to min Node x = root; while (x != nullptr) { if (!isRed(x)) black++; x = x->left; } return isBalanced(root, black); } // does every path from the root to a leaf have the given number of black links? bool isBalanced(Node* x, int black) { if (x == nullptr) return black == 0; if (!isRed(x)) black--; return isBalanced(x->left, black) && isBalanced(x->right, black); } // Inserts a key only if it's not already present bool add(K key, const int tid=0) { return oflf::updateTx([&] () { return innerPut(key,key); }); } // Returns true only if the key was present bool remove(K key, const int tid=0) { return oflf::updateTx([&] () { V notused; bool retval = innerGet(key,notused,false); if (retval) innerRemove(key); return retval; }); } bool contains(K key, const int tid=0) { return oflf::readTx([&] () { V notused; return innerGet(key,notused,false); }); } // This is not fully transactionally but it's ok because we use it only on initialization. // We could make it fully transactionally, but we would have to increase the size of allocation/store logs. void addAll(K** keys, int size, const int tid=0) { for (int i = 0; i < size; i++) add(*keys[i], tid); } static std::string className() { return "OF-LF-RedBlackTree"; } }; #endif // _TM_RED_BLACK_BST_H_ ================================================ FILE: datastructures/treemaps/OFWFRedBlackTree.hpp ================================================ #ifndef _OF_WF_RED_BLACK_BST_H_ #define _OF_WF_RED_BLACK_BST_H_ #include #include #include #include "stms/OneFileWF.hpp" // This header defines the macros for the STM being compiled // Adapted from Java to C++ from the original at http://algs4.cs.princeton.edu/code/edu/princeton/cs/algs4/RedBlackBST.java template class OFWFRedBlackTree { const int64_t COLOR_RED = 0; const int64_t COLOR_BLACK = 1; struct Node : public ofwf::tmbase { ofwf::tmtype key; ofwf::tmtype val; ofwf::tmtype left {nullptr}; ofwf::tmtype right {nullptr}; ofwf::tmtype color; // color of parent link ofwf::tmtype size; // subtree count Node(const K& key, const V& val, int64_t color, int64_t size) : key{key}, val{val}, color{color}, size{size} {} }; ofwf::tmtype root {nullptr}; // root of the BST inline void assignAndFreeIfNull(ofwf::tmtype& z, Node* w) { Node* tofree = z; z = w; if (w == nullptr) ofwf::tmDelete(tofree); } public: /** * Initializes an empty symbol table. */ OFWFRedBlackTree(int numThreads=0){ } ~OFWFRedBlackTree() { for (int i = 0; i < 10000; i++) { ofwf::updateTx([&] () { if (root == nullptr) return; deleteMin(); }); } } /*************************************************************************** * Node helper methods. ***************************************************************************/ // is node x red; false if x is null ? bool isRed(Node* x) { if (x == nullptr) return false; return x->color == COLOR_RED; } // number of node in subtree rooted at x; 0 if x is null int size(Node* x) { if (x == nullptr) return 0; return x->size; } /** * Returns the number of key-value pairs in this symbol table. * @return the number of key-value pairs in this symbol table */ int size() { return size(root); } /** * Is this symbol table empty? * @return {@code true} if this symbol table is empty and {@code false} otherwise */ bool isEmpty() { return root == nullptr; } /*************************************************************************** * Standard BST search-> ***************************************************************************/ /** * Returns the value associated with the given key. * @param key the key * @return the value associated with the given key if the key is in the symbol table * and {@code null} if the key is not in the symbol table * @throws IllegalArgumentException if {@code key} is {@code null} */ bool innerGet(K key, V& oldValue, const bool saveOldValue) { bool found = get(root, key); if (!found) return false; //if (saveOldValue) oldValue = *val; // Copy of V return true; } // value associated with the given key in subtree rooted at x; null if no such key bool get(Node* x, K& key) { while (x != nullptr) { if (key < x->key) x = x->left; else if (x->key < key) x = x->right; else return true; } return false; } /** * Does this symbol table contain the given key? * @param key the key * @return {@code true} if this symbol table contains {@code key} and * {@code false} otherwise * @throws IllegalArgumentException if {@code key} is {@code null} */ bool containsKey(const K& key) { return get(key) != nullptr; } /*************************************************************************** * Red-black tree insertion. ***************************************************************************/ /** * Inserts the specified key-value pair into the symbol table, overwriting the old * value with the new value if the symbol table already contains the specified key. * Deletes the specified key (and its associated value) from this symbol table * if the specified value is {@code null}. * * @param key the key * @param val the value * @throws IllegalArgumentException if {@code key} is {@code null} */ bool innerPut(const K& key, const V& value) { bool ret = false; root = put(root, key, value, ret); root->color = COLOR_BLACK; return ret; } // insert the key-value pair in the subtree rooted at h Node* put(Node* h, const K& key, const V& val, bool& ret) { if (h == nullptr) { ret = true; return ofwf::tmNew(key, val, COLOR_RED, 1); } if (key < h->key) h->left = put(h->left, key, val, ret); else if (h->key < key) h->right = put(h->right, key, val, ret); else h->val = val; // fix-up any right-leaning links if (isRed(h->right) && !isRed(h->left)) h = rotateLeft(h); if (isRed(h->left) && isRed(h->left->left)) h = rotateRight(h); if (isRed(h->left) && isRed(h->right)) flipColors(h); h->size = size(h->left) + size(h->right) + 1; return h; } /*************************************************************************** * Red-black tree deletion. ***************************************************************************/ /** * Removes the smallest key and associated value from the symbol table. * @throws NoSuchElementException if the symbol table is empty */ void deleteMin() { if (isEmpty()) return; // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = COLOR_RED; assignAndFreeIfNull(root, deleteMin(root)); if (!isEmpty()) root->color = COLOR_BLACK; // assert check(); } // delete the key-value pair with the minimum key rooted at h Node* deleteMin(Node* h) { if (h->left == nullptr) return nullptr; if (!isRed(h->left) && !isRed(h->left->left)) h = moveRedLeft(h); assignAndFreeIfNull(h->left, deleteMin(h->left)); return balance(h); } /** * Removes the largest key and associated value from the symbol table. * @throws NoSuchElementException if the symbol table is empty */ void deleteMax() { if (isEmpty()) return; // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = COLOR_RED; root = deleteMax(root); if (!isEmpty()) root->color = COLOR_BLACK; // assert check(); } // delete the key-value pair with the maximum key rooted at h Node* deleteMax(Node* h) { if (isRed(h->left)) h = rotateRight(h); if (h->right == nullptr) return nullptr; if (!isRed(h->right) && !isRed(h->right->left)) h = moveRedRight(h); h->right = deleteMax(h->right); return balance(h); } /** * Removes the specified key and its associated value from this symbol table * (if the key is in this symbol table). * * @param key the key */ void innerRemove(K key) { // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = COLOR_RED; assignAndFreeIfNull(root, deleteKey(root, key)); if (!isEmpty()) root->color = COLOR_BLACK; // assert check(); } // delete the key-value pair with the given key rooted at h Node* deleteKey(Node* h, const K& key) { // assert get(h, key) != null; if (key < h->key) { if (!isRed(h->left) && !isRed(h->left->left)) { h = moveRedLeft(h); } assignAndFreeIfNull(h->left, deleteKey(h->left, key)); } else { if (isRed(h->left)) { h = rotateRight(h); } if (key == h->key && (h->right == nullptr)) { return nullptr; } if (!isRed(h->right) && !isRed(h->right->left)) { h = moveRedRight(h); } if (key == h->key) { Node* x = min(h->right); h->key = x->key; h->val = x->val; // h->val = get(h->right, min(h->right).key); // h->key = min(h->right).key; assignAndFreeIfNull(h->right, deleteMin(h->right)); } else { assignAndFreeIfNull(h->right, deleteKey(h->right, key)); } } return balance(h); } /*************************************************************************** * Red-black tree helper functions. ***************************************************************************/ // make a left-leaning link lean to the right Node* rotateRight(Node* h) { // assert (h != null) && isRed(h->left); Node* x = h->left; h->left = x->right; x->right = h; x->color = x->right->color; x->right->color = COLOR_RED; x->size = h->size; h->size = size(h->left) + size(h->right) + 1; return x; } // make a right-leaning link lean to the left Node* rotateLeft(Node* h) { // assert (h != null) && isRed(h->right); Node* x = h->right; h->right = x->left; x->left = h; x->color = x->left->color; x->left->color = COLOR_RED; x->size = h->size; h->size = size(h->left) + size(h->right) + 1; return x; } // flip the colors of a node and its two children void flipColors(Node* h) { // h must have opposite color of its two children // assert (h != null) && (h->left != null) && (h->right != null); // assert (!isRed(h) && isRed(h->left) && isRed(h->right)) // || (isRed(h) && !isRed(h->left) && !isRed(h->right)); h->color = !h->color; h->left->color = !h->left->color; h->right->color = !h->right->color; } // Assuming that h is red and both h->left and h->left.left // are black, make h->left or one of its children red. Node* moveRedLeft(Node* h) { // assert (h != null); // assert isRed(h) && !isRed(h->left) && !isRed(h->left.left); flipColors(h); if (isRed(h->right->left)) { h->right = rotateRight(h->right); h = rotateLeft(h); flipColors(h); } return h; } // Assuming that h is red and both h->right and h->right.left // are black, make h->right or one of its children red. Node* moveRedRight(Node* h) { // assert (h != null); // assert isRed(h) && !isRed(h->right) && !isRed(h->right.left); flipColors(h); if (isRed(h->left->left)) { h = rotateRight(h); flipColors(h); } return h; } // restore red-black tree invariant Node* balance(Node* h) { // assert (h != null); if (isRed(h->right)) h = rotateLeft(h); if (isRed(h->left) && isRed(h->left->left)) h = rotateRight(h); if (isRed(h->left) && isRed(h->right)) flipColors(h); h->size = size(h->left) + size(h->right) + 1; return h; } /*************************************************************************** * Utility functions. ***************************************************************************/ /** * Returns the height of the BST (for debugging). * @return the height of the BST (a 1-node tree has height 0) */ int height() { return height(root); } int height(Node* x) { if (x == nullptr) return -1; return 1 + std::max(height(x->left), height(x->right)); } /*************************************************************************** * Ordered symbol table methods. ***************************************************************************/ /** * Returns the smallest key in the symbol table. * @return the smallest key in the symbol table * @throws NoSuchElementException if the symbol table is empty */ K* min() { if (isEmpty()) return nullptr; return min(root).key; } // the smallest key in subtree rooted at x; null if no such key Node* min(Node* x) { // assert x != null; if (x->left == nullptr) return x; else return min(x->left); } /** * Returns the largest key in the symbol table. * @return the largest key in the symbol table * @throws NoSuchElementException if the symbol table is empty */ K* max() { if (isEmpty()) return nullptr; return max(root).key; } // the largest key in the subtree rooted at x; null if no such key Node* max(Node* x) { // assert x != null; if (x->right == nullptr) return x; else return max(x->right); } /** * Returns the largest key in the symbol table less than or equal to {@code key}. * @param key the key * @return the largest key in the symbol table less than or equal to {@code key} * @throws NoSuchElementException if there is no such key * @throws IllegalArgumentException if {@code key} is {@code null} */ K* floor(const K& key) { if (key == nullptr) return nullptr; if (isEmpty()) return nullptr; Node* x = floor(root, key); if (x == nullptr) return nullptr; else return x->key; } // the largest key in the subtree rooted at x less than or equal to the given key Node* floor(Node* x, const K& key) { if (x == nullptr) return nullptr; if (key == x->key) return x; if (key < x->key) return floor(x->left, key); Node* t = floor(x->right, key); if (t != nullptr) return t; else return x; } /** * Returns the smallest key in the symbol table greater than or equal to {@code key}. * @param key the key * @return the smallest key in the symbol table greater than or equal to {@code key} * @throws NoSuchElementException if there is no such key * @throws IllegalArgumentException if {@code key} is {@code null} */ K* ceiling(const K& key) { if (key == nullptr) return nullptr; if (isEmpty()) return nullptr; Node* x = ceiling(root, key); if (x == nullptr) return nullptr; else return x->key; } // the smallest key in the subtree rooted at x greater than or equal to the given key Node* ceiling(Node* x, const K& key) { if (x == nullptr) return nullptr; if (key == x->key) return x; if (x->key < key) return ceiling(x->right, key); Node* t = ceiling(x->left, key); if (t != nullptr) return t; else return x; } /** * Return the kth smallest key in the symbol table. * @param k the order statistic * @return the {@code k}th smallest key in the symbol table * @throws IllegalArgumentException unless {@code k} is between 0 and * n1 */ K* select(int k) { if (k < 0 || k >= size()) { return nullptr; } Node x = select(root, k); return x->key; } // the key of rank k in the subtree rooted at x Node* select(Node* x, int k) { // assert x != null; // assert k >= 0 && k < size(x); int t = size(x->left); if (t > k) return select(x->left, k); else if (t < k) return select(x->right, k-t-1); else return x; } /** * Return the number of keys in the symbol table strictly less than {@code key}. * @param key the key * @return the number of keys in the symbol table strictly less than {@code key} * @throws IllegalArgumentException if {@code key} is {@code null} */ int rank(const K& key) { if (key == nullptr) return -1; return rank(key, root); } // number of keys less than key in the subtree rooted at x int rank(const K& key, Node* x) { if (x == nullptr) return 0; if (key < x->key) return rank(key, x->left); else if (x->key < key) return 1 + size(x->left) + rank(key, x->right); else return size(x->left); } /*************************************************************************** * Range count and range search-> ***************************************************************************/ /** * Returns the number of keys in the symbol table in the given range. * * @param lo minimum endpoint * @param hi maximum endpoint * @return the number of keys in the sybol table between {@code lo} * (inclusive) and {@code hi} (inclusive) * @throws IllegalArgumentException if either {@code lo} or {@code hi} * is {@code null} */ int size(const K& lo, const K& hi) { if (lo == nullptr) return 0; if (hi == nullptr) return 0; if (hi < lo) return 0; if (containsKey(hi)) return rank(hi) - rank(lo) + 1; else return rank(hi) - rank(lo); } /*************************************************************************** * Check integrity of red-black tree data structure. ***************************************************************************/ bool check() { if (!isBST()) std::cout << "Not in symmetric order\n"; if (!isSizeConsistent()) std::cout << "Subtree counts not consistent\n"; //if (!isRankConsistent()) std::cout << "Ranks not consistent\n"; if (!is23()) std::cout << "Not a 2-3 tree\n"; if (!isBalanced()) std::cout << "Not balanced\n"; return isBST() && isSizeConsistent() && is23() && isBalanced(); } // does this binary tree satisfy symmetric order? // Note: this test also ensures that data structure is a binary tree since order is strict bool isBST() { return isBST(root, nullptr, nullptr); } // is the tree rooted at x a BST with all keys strictly between min and max // (if min or max is null, treat as empty constraint) // Credit: Bob Dondero's elegant solution bool isBST(Node* x, K* min, K* max) { if (x == nullptr) return true; // TODO: port these two lines //if (min != nullptr && x->key.compareTo(min) <= 0) return false; //if (max != nullptr && x->key.compareTo(max) >= 0) return false; return isBST(x->left, min, x->key) && isBST(x->right, x->key, max); } // are the size fields correct? bool isSizeConsistent() { return isSizeConsistent(root); } bool isSizeConsistent(Node* x) { if (x == nullptr) return true; if (x->size != size(x->left) + size(x->right) + 1) return false; return isSizeConsistent(x->left) && isSizeConsistent(x->right); } /* // check that ranks are consistent bool isRankConsistent() { for (int i = 0; i < size(); i++) if (i != rank(select(i))) return false; for (K* key : keys()) if (key.compareTo(select(rank(key))) != 0) return false; return true; } */ // Does the tree have no red right links, and at most one (left) // red links in a row on any path? bool is23() { return is23(root); } bool is23(Node* x) { if (x == nullptr) return true; if (isRed(x->right)) return false; if (x != root && isRed(x) && isRed(x->left)) return false; return is23(x->left) && is23(x->right); } // do all paths from root to leaf have same number of black edges? bool isBalanced() { int black = 0; // number of black links on path from root to min Node x = root; while (x != nullptr) { if (!isRed(x)) black++; x = x->left; } return isBalanced(root, black); } // does every path from the root to a leaf have the given number of black links? bool isBalanced(Node* x, int black) { if (x == nullptr) return black == 0; if (!isRed(x)) black--; return isBalanced(x->left, black) && isBalanced(x->right, black); } // Inserts a key only if it's not already present bool add(K key, const int tid=0) { return ofwf::updateTx([=] () { return innerPut(key,key); }); } // Returns true only if the key was present bool remove(K key, const int tid=0) { return ofwf::updateTx([=] () { V notused; bool retval = innerGet(key,notused,false); if (retval) innerRemove(key); return retval; }); } bool contains(K key, const int tid=0) { return ofwf::readTx([=] () { V notused; return innerGet(key,notused,false); }); } void addAll(K** keys, int size, const int tid=0) { for (int i = 0; i < size; i++) add(*keys[i], tid); } static std::string className() { return "OF-WF-RedBlackTree"; } }; #endif // _OF_WF_RED_BLACK_BST_H_ ================================================ FILE: datastructures/treemaps/TinySTMRedBlackTree.hpp ================================================ #ifndef _TINY_STM_RED_BLACK_BST_H_ #define _TINY_STM_RED_BLACK_BST_H_ #include #include #include #include "stms/TinySTM.hpp" // Adapted from Java to C++ from the original at http://algs4.cs.princeton.edu/code/edu/princeton/cs/algs4/RedBlackBST.java template class TinySTMRedBlackTree { const int64_t COLOR_RED = 0; const int64_t COLOR_BLACK = 1; struct Node { tinystm::tmtype key; tinystm::tmtype val; tinystm::tmtype left {nullptr}; tinystm::tmtype right {nullptr}; tinystm::tmtype color; // color of parent link tinystm::tmtype size; // subtree count Node(const K& key, const V& val, int64_t color, int64_t size) : key{key}, val{val}, color{color}, size{size} {} }; tinystm::tmtype root {nullptr}; // root of the BST inline void assignAndFreeIfNull(tinystm::tmtype& z, Node* w) { Node* tofree = z; z = w; if (w == nullptr) tinystm::tmDelete(tofree); } public: /** * Initializes an empty symbol table. */ TinySTMRedBlackTree(int maxThreads=0){ } ~TinySTMRedBlackTree() { // The transaction log is not enough to delete everything if there are too many, so we delete 1000 per transaction for (int i = 0; i < 1000; i++) { tinystm::updateTx([&] () { if (root == nullptr) return true; deleteMin(); return true; }); } } /*************************************************************************** * Node helper methods. ***************************************************************************/ // is node x red; false if x is null ? bool isRed(Node* x) { if (x == nullptr) return false; return x->color == COLOR_RED; } // number of node in subtree rooted at x; 0 if x is null int size(Node* x) { if (x == nullptr) return 0; return x->size; } /** * Returns the number of key-value pairs in this symbol table. * @return the number of key-value pairs in this symbol table */ int size() { return size(root); } /** * Is this symbol table empty? * @return {@code true} if this symbol table is empty and {@code false} otherwise */ bool isEmpty() { return root == nullptr; } /*************************************************************************** * Standard BST search-> ***************************************************************************/ /** * Returns the value associated with the given key. * @param key the key * @return the value associated with the given key if the key is in the symbol table * and {@code null} if the key is not in the symbol table * @throws IllegalArgumentException if {@code key} is {@code null} */ bool innerGet(K& key, V& oldValue, const bool saveOldValue) { bool found = get(root, key); if (!found) return false; //if (saveOldValue) oldValue = *val; // Copy of V return true; } // value associated with the given key in subtree rooted at x; null if no such key bool get(Node* x, K& key) { while (x != nullptr) { if (key < x->key) x = x->left; else if (x->key < key) x = x->right; else return true; } return false; } /** * Does this symbol table contain the given key? * @param key the key * @return {@code true} if this symbol table contains {@code key} and * {@code false} otherwise * @throws IllegalArgumentException if {@code key} is {@code null} */ bool containsKey(const K& key) { return get(key) != nullptr; } /*************************************************************************** * Red-black tree insertion. ***************************************************************************/ /** * Inserts the specified key-value pair into the symbol table, overwriting the old * value with the new value if the symbol table already contains the specified key. * Deletes the specified key (and its associated value) from this symbol table * if the specified value is {@code null}. * * @param key the key * @param val the value * @throws IllegalArgumentException if {@code key} is {@code null} */ bool innerPut(const K& key, const V& value) { bool ret = false; root = put(root, key, value, ret); root->color = COLOR_BLACK; return ret; } // insert the key-value pair in the subtree rooted at h Node* put(Node* h, const K& key, const V& val, bool& ret) { if (h == nullptr) { ret = true; return tinystm::tmNew(key, val, COLOR_RED, 1); } if (key < h->key) h->left = put(h->left, key, val, ret); else if (h->key < key) h->right = put(h->right, key, val, ret); else h->val = val; // fix-up any right-leaning links if (isRed(h->right) && !isRed(h->left)) h = rotateLeft(h); if (isRed(h->left) && isRed(h->left->left)) h = rotateRight(h); if (isRed(h->left) && isRed(h->right)) flipColors(h); h->size = size(h->left) + size(h->right) + 1; return h; } /*************************************************************************** * Red-black tree deletion. ***************************************************************************/ /** * Removes the smallest key and associated value from the symbol table. * @throws NoSuchElementException if the symbol table is empty */ void deleteMin() { if (isEmpty()) return; // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = COLOR_RED; assignAndFreeIfNull(root, deleteMin(root)); if (!isEmpty()) root->color = COLOR_BLACK; // assert check(); } // delete the key-value pair with the minimum key rooted at h Node* deleteMin(Node* h) { if (h->left == nullptr) return nullptr; if (!isRed(h->left) && !isRed(h->left->left)) h = moveRedLeft(h); assignAndFreeIfNull(h->left, deleteMin(h->left)); return balance(h); } /** * Removes the largest key and associated value from the symbol table. * @throws NoSuchElementException if the symbol table is empty */ void deleteMax() { if (isEmpty()) return; // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = COLOR_RED; root = deleteMax(root); if (!isEmpty()) root->color = COLOR_BLACK; // assert check(); } // delete the key-value pair with the maximum key rooted at h Node* deleteMax(Node* h) { if (isRed(h->left)) h = rotateRight(h); if (h->right == nullptr) return nullptr; if (!isRed(h->right) && !isRed(h->right->left)) h = moveRedRight(h); h->right = deleteMax(h->right); return balance(h); } /** * Removes the specified key and its associated value from this symbol table * (if the key is in this symbol table). * * @param key the key */ void innerRemove(const K& key) { // if both children of root are black, set root to red if (!isRed(root->left) && !isRed(root->right)) root->color = COLOR_RED; assignAndFreeIfNull(root, deleteKey(root, key)); if (!isEmpty()) root->color = COLOR_BLACK; // assert check(); } // delete the key-value pair with the given key rooted at h Node* deleteKey(Node* h, const K& key) { // assert get(h, key) != null; if (key < h->key) { if (!isRed(h->left) && !isRed(h->left->left)) { h = moveRedLeft(h); } assignAndFreeIfNull(h->left, deleteKey(h->left, key)); } else { if (isRed(h->left)) { h = rotateRight(h); } if (key == h->key && (h->right == nullptr)) { return nullptr; } if (!isRed(h->right) && !isRed(h->right->left)) { h = moveRedRight(h); } if (key == h->key) { Node* x = min(h->right); h->key = x->key; h->val = x->val; // h->val = get(h->right, min(h->right).key); // h->key = min(h->right).key; assignAndFreeIfNull(h->right, deleteMin(h->right)); } else { assignAndFreeIfNull(h->right, deleteKey(h->right, key)); } } return balance(h); } /*************************************************************************** * Red-black tree helper functions. ***************************************************************************/ // make a left-leaning link lean to the right Node* rotateRight(Node* h) { // assert (h != null) && isRed(h->left); Node* x = h->left; h->left = x->right; x->right = h; x->color = x->right->color; x->right->color = COLOR_RED; x->size = h->size; h->size = size(h->left) + size(h->right) + 1; return x; } // make a right-leaning link lean to the left Node* rotateLeft(Node* h) { // assert (h != null) && isRed(h->right); Node* x = h->right; h->right = x->left; x->left = h; x->color = x->left->color; x->left->color = COLOR_RED; x->size = h->size; h->size = size(h->left) + size(h->right) + 1; return x; } // flip the colors of a node and its two children void flipColors(Node* h) { // h must have opposite color of its two children // assert (h != null) && (h->left != null) && (h->right != null); // assert (!isRed(h) && isRed(h->left) && isRed(h->right)) // || (isRed(h) && !isRed(h->left) && !isRed(h->right)); h->color = !h->color; h->left->color = !h->left->color; h->right->color = !h->right->color; } // Assuming that h is red and both h->left and h->left.left // are black, make h->left or one of its children red. Node* moveRedLeft(Node* h) { // assert (h != null); // assert isRed(h) && !isRed(h->left) && !isRed(h->left.left); flipColors(h); if (isRed(h->right->left)) { h->right = rotateRight(h->right); h = rotateLeft(h); flipColors(h); } return h; } // Assuming that h is red and both h->right and h->right.left // are black, make h->right or one of its children red. Node* moveRedRight(Node* h) { // assert (h != null); // assert isRed(h) && !isRed(h->right) && !isRed(h->right.left); flipColors(h); if (isRed(h->left->left)) { h = rotateRight(h); flipColors(h); } return h; } // restore red-black tree invariant Node* balance(Node* h) { // assert (h != null); if (isRed(h->right)) h = rotateLeft(h); if (isRed(h->left) && isRed(h->left->left)) h = rotateRight(h); if (isRed(h->left) && isRed(h->right)) flipColors(h); h->size = size(h->left) + size(h->right) + 1; return h; } /*************************************************************************** * Utility functions. ***************************************************************************/ /** * Returns the height of the BST (for debugging). * @return the height of the BST (a 1-node tree has height 0) */ int height() { return height(root); } int height(Node* x) { if (x == nullptr) return -1; return 1 + std::max(height(x->left), height(x->right)); } /*************************************************************************** * Ordered symbol table methods. ***************************************************************************/ /** * Returns the smallest key in the symbol table. * @return the smallest key in the symbol table * @throws NoSuchElementException if the symbol table is empty */ K* min() { if (isEmpty()) return nullptr; return min(root).key; } // the smallest key in subtree rooted at x; null if no such key Node* min(Node* x) { // assert x != null; if (x->left == nullptr) return x; else return min(x->left); } /** * Returns the largest key in the symbol table. * @return the largest key in the symbol table * @throws NoSuchElementException if the symbol table is empty */ K* max() { if (isEmpty()) return nullptr; return max(root).key; } // the largest key in the subtree rooted at x; null if no such key Node* max(Node* x) { // assert x != null; if (x->right == nullptr) return x; else return max(x->right); } /** * Returns the largest key in the symbol table less than or equal to {@code key}. * @param key the key * @return the largest key in the symbol table less than or equal to {@code key} * @throws NoSuchElementException if there is no such key * @throws IllegalArgumentException if {@code key} is {@code null} */ K* floor(const K& key) { if (key == nullptr) return nullptr; if (isEmpty()) return nullptr; Node* x = floor(root, key); if (x == nullptr) return nullptr; else return x->key; } // the largest key in the subtree rooted at x less than or equal to the given key Node* floor(Node* x, const K& key) { if (x == nullptr) return nullptr; if (key == x->key) return x; if (key < x->key) return floor(x->left, key); Node* t = floor(x->right, key); if (t != nullptr) return t; else return x; } /** * Returns the smallest key in the symbol table greater than or equal to {@code key}. * @param key the key * @return the smallest key in the symbol table greater than or equal to {@code key} * @throws NoSuchElementException if there is no such key * @throws IllegalArgumentException if {@code key} is {@code null} */ K* ceiling(const K& key) { if (key == nullptr) return nullptr; if (isEmpty()) return nullptr; Node* x = ceiling(root, key); if (x == nullptr) return nullptr; else return x->key; } // the smallest key in the subtree rooted at x greater than or equal to the given key Node* ceiling(Node* x, const K& key) { if (x == nullptr) return nullptr; if (key == x->key) return x; if (x->key < key) return ceiling(x->right, key); Node* t = ceiling(x->left, key); if (t != nullptr) return t; else return x; } /** * Return the kth smallest key in the symbol table. * @param k the order statistic * @return the {@code k}th smallest key in the symbol table * @throws IllegalArgumentException unless {@code k} is between 0 and * n1 */ K* select(int k) { if (k < 0 || k >= size()) { return nullptr; } Node x = select(root, k); return x->key; } // the key of rank k in the subtree rooted at x Node* select(Node* x, int k) { // assert x != null; // assert k >= 0 && k < size(x); int t = size(x->left); if (t > k) return select(x->left, k); else if (t < k) return select(x->right, k-t-1); else return x; } /** * Return the number of keys in the symbol table strictly less than {@code key}. * @param key the key * @return the number of keys in the symbol table strictly less than {@code key} * @throws IllegalArgumentException if {@code key} is {@code null} */ int rank(const K& key) { if (key == nullptr) return -1; return rank(key, root); } // number of keys less than key in the subtree rooted at x int rank(const K& key, Node* x) { if (x == nullptr) return 0; if (key < x->key) return rank(key, x->left); else if (x->key < key) return 1 + size(x->left) + rank(key, x->right); else return size(x->left); } /*************************************************************************** * Range count and range search-> ***************************************************************************/ /** * Returns the number of keys in the symbol table in the given range. * * @param lo minimum endpoint * @param hi maximum endpoint * @return the number of keys in the sybol table between {@code lo} * (inclusive) and {@code hi} (inclusive) * @throws IllegalArgumentException if either {@code lo} or {@code hi} * is {@code null} */ int size(const K& lo, const K& hi) { if (lo == nullptr) return 0; if (hi == nullptr) return 0; if (hi < lo) return 0; if (containsKey(hi)) return rank(hi) - rank(lo) + 1; else return rank(hi) - rank(lo); } /*************************************************************************** * Check integrity of red-black tree data structure. ***************************************************************************/ bool check() { if (!isBST()) std::cout << "Not in symmetric order\n"; if (!isSizeConsistent()) std::cout << "Subtree counts not consistent\n"; //if (!isRankConsistent()) std::cout << "Ranks not consistent\n"; if (!is23()) std::cout << "Not a 2-3 tree\n"; if (!isBalanced()) std::cout << "Not balanced\n"; return isBST() && isSizeConsistent() && is23() && isBalanced(); } // does this binary tree satisfy symmetric order? // Note: this test also ensures that data structure is a binary tree since order is strict bool isBST() { return isBST(root, nullptr, nullptr); } // is the tree rooted at x a BST with all keys strictly between min and max // (if min or max is null, treat as empty constraint) // Credit: Bob Dondero's elegant solution bool isBST(Node* x, K* min, K* max) { if (x == nullptr) return true; // TODO: port these two lines //if (min != nullptr && x->key.compareTo(min) <= 0) return false; //if (max != nullptr && x->key.compareTo(max) >= 0) return false; return isBST(x->left, min, x->key) && isBST(x->right, x->key, max); } // are the size fields correct? bool isSizeConsistent() { return isSizeConsistent(root); } bool isSizeConsistent(Node* x) { if (x == nullptr) return true; if (x->size != size(x->left) + size(x->right) + 1) return false; return isSizeConsistent(x->left) && isSizeConsistent(x->right); } /* // check that ranks are consistent bool isRankConsistent() { for (int i = 0; i < size(); i++) if (i != rank(select(i))) return false; for (K* key : keys()) if (key.compareTo(select(rank(key))) != 0) return false; return true; } */ // Does the tree have no red right links, and at most one (left) // red links in a row on any path? bool is23() { return is23(root); } bool is23(Node* x) { if (x == nullptr) return true; if (isRed(x->right)) return false; if (x != root && isRed(x) && isRed(x->left)) return false; return is23(x->left) && is23(x->right); } // do all paths from root to leaf have same number of black edges? bool isBalanced() { int black = 0; // number of black links on path from root to min Node x = root; while (x != nullptr) { if (!isRed(x)) black++; x = x->left; } return isBalanced(root, black); } // does every path from the root to a leaf have the given number of black links? bool isBalanced(Node* x, int black) { if (x == nullptr) return black == 0; if (!isRed(x)) black--; return isBalanced(x->left, black) && isBalanced(x->right, black); } // Inserts a key only if it's not already present bool add(K key, const int tid=0) { return tinystm::updateTx([&] () { return innerPut(key,key); }); } // Returns true only if the key was present bool remove(K key, const int tid=0) { return tinystm::updateTx([&] () { V notused; bool retval = innerGet(key,notused,false); if (retval) innerRemove(key); return retval; }); } bool contains(K key, const int tid=0) { return tinystm::readTx([&] () { V notused; return innerGet(key,notused,false); }); } // This is not fully transactionally but it's ok because we use it only on initialization. // We could make it fully transactionally, but we would have to increase the size of allocation/store logs. void addAll(K** keys, int size, const int tid=0) { for (int i = 0; i < size; i++) add(*keys[i], tid); } static std::string className() { return tinystm::TinySTM::className() + "-RedBlackTree"; } }; #endif // _TINY_STM_RED_BLACK_BST_H_ ================================================ FILE: datastructures/trevor_brown_abtree/Makefile ================================================ GPP = g++ FLAGS = -std=c++11 -mcx16 -O3 -g FLAGS += -DPHYSICAL_PROCESSORS=48 -DMAX_TID_POW2=64 LDFLAGS += -I./common LDFLAGS += -I./common/dcss LDFLAGS += -I./common/atomic_ops LDFLAGS += -I./common/descriptors LDFLAGS += -I./common/recordmgr LDFLAGS += -I./common/rq LDFLAGS += -I./common/rq/snapcollector LDFLAGS += -I./ds/brown_ext_abtree_lf LDFLAGS += -lpthread all: minimal_example minimal_example: $(GPP) $(FLAGS) -o $@.out $@.cpp $(LDFLAGS) ================================================ FILE: datastructures/trevor_brown_abtree/TrevorBrownABTree.hpp ================================================ #ifndef _TREVOR_BROWN_AB_TREE_HP_H_ #define _TREVOR_BROWN_AB_TREE_HP_H_ #include #include #include #include "common/ThreadRegistry.hpp" #include "ds/brown_ext_abtree_lf/brown_ext_abtree_lf_adapter.h" /* * This is a wrapper to Trevor Brown's AB-Tree so we can use it in our benchmarks * TODO: We've enabled Hazard Pointers as memory reclamation */ template class TrevorBrownABTree { static const int NODE_DEGREE = 16; const int ANY_KEY = 0; const int NUM_THREADS = 128; //ds_adapter>* tree; ds_adapter* tree; public: TrevorBrownABTree(int numThreads) { //tree = new ds_adapter>(NUM_THREADS, ANY_KEY); tree = new ds_adapter(NUM_THREADS, ANY_KEY); } ~TrevorBrownABTree() { delete tree; } // Inserts a key only if it's not already present bool add(K key, const int tid=0) { int threadID = tl_tcico.tid; if (threadID == ThreadCheckInCheckOut::NOT_ASSIGNED) { threadID = ThreadRegistry::getTID(); tree->initThread(threadID); } return tree->insert(threadID, key, (void *) 1) != tree->getNoValue(); } // Returns true only if the key was present bool remove(K key, const int tid=0) { int threadID = tl_tcico.tid; if (threadID == ThreadCheckInCheckOut::NOT_ASSIGNED) { threadID = ThreadRegistry::getTID(); tree->initThread(threadID); } return tree->erase(threadID, key) != tree->getNoValue(); } bool contains(K key, const int tid=0) { int threadID = tl_tcico.tid; if (threadID == ThreadCheckInCheckOut::NOT_ASSIGNED) { threadID = ThreadRegistry::getTID(); tree->initThread(threadID); } return tree->contains(threadID, key); } // This is not fully transactionally but it's ok because we use it only on initialization. // We could make it fully transactionally, but we would have to increase the size of allocation/store logs. void addAll(K** keys, int size, const int tid=0) { for (int i = 0; i < size; i++) add(*keys[i], tid); } static std::string className() { return "TrevorBrown-AB-Tree"; } }; #endif // _TREVOR_BROWN_AB_TREE_HP_H_ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/generalize-small.h ================================================ /* char_load */ #if defined(AO_HAVE_char_load_acquire) && !defined(AO_HAVE_char_load) # define AO_char_load(addr) AO_char_load_acquire(addr) # define AO_HAVE_char_load #endif #if defined(AO_HAVE_char_load_full) && !defined(AO_HAVE_char_load_acquire) # define AO_char_load_acquire(addr) AO_char_load_full(addr) # define AO_HAVE_char_load_acquire #endif #if defined(AO_HAVE_char_load_full) && !defined(AO_HAVE_char_load_read) # define AO_char_load_read(addr) AO_char_load_full(addr) # define AO_HAVE_char_load_read #endif #if !defined(AO_HAVE_char_load_acquire_read) && defined(AO_HAVE_char_load_acquire) # define AO_char_load_acquire_read(addr) AO_char_load_acquire(addr) # define AO_HAVE_char_load_acquire_read #endif #if defined(AO_HAVE_char_load) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_char_load_acquire) AO_INLINE unsigned char AO_char_load_acquire(const volatile unsigned char *addr) { unsigned char result = AO_char_load(addr); /* Acquire barrier would be useless, since the load could be delayed */ /* beyond it. */ AO_nop_full(); return result; } # define AO_HAVE_char_load_acquire #endif #if defined(AO_HAVE_char_load) && defined(AO_HAVE_nop_read) && \ !defined(AO_HAVE_char_load_read) AO_INLINE unsigned char AO_char_load_read(const volatile unsigned char *addr) { unsigned char result = AO_char_load(addr); /* Acquire barrier would be useless, since the load could be delayed */ /* beyond it. */ AO_nop_read(); return result; } # define AO_HAVE_char_load_read #endif #if defined(AO_HAVE_char_load_acquire) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_char_load_full) # define AO_char_load_full(addr) (AO_nop_full(), AO_char_load_acquire(addr)) # define AO_HAVE_char_load_full #endif #if !defined(AO_HAVE_char_load_acquire_read) && defined(AO_HAVE_char_load_read) # define AO_char_load_acquire_read(addr) AO_char_load_read(addr) # define AO_HAVE_char_load_acquire_read #endif #if defined(AO_HAVE_char_load_acquire_read) && !defined(AO_HAVE_char_load) # define AO_char_load(addr) AO_char_load_acquire_read(addr) # define AO_HAVE_char_load #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_char_load_acquire_read) # define AO_char_load_dd_acquire_read(addr) \ AO_char_load_acquire_read(addr) # define AO_HAVE_char_load_dd_acquire_read # endif #else # if defined(AO_HAVE_char_load) # define AO_char_load_dd_acquire_read(addr) \ AO_char_load(addr) # define AO_HAVE_char_load_dd_acquire_read # endif #endif /* char_store */ #if defined(AO_HAVE_char_store_release) && !defined(AO_HAVE_char_store) # define AO_char_store(addr, val) AO_char_store_release(addr,val) # define AO_HAVE_char_store #endif #if defined(AO_HAVE_char_store_full) && !defined(AO_HAVE_char_store_release) # define AO_char_store_release(addr,val) AO_char_store_full(addr,val) # define AO_HAVE_char_store_release #endif #if defined(AO_HAVE_char_store_full) && !defined(AO_HAVE_char_store_write) # define AO_char_store_write(addr,val) AO_char_store_full(addr,val) # define AO_HAVE_char_store_write #endif #if defined(AO_HAVE_char_store_release) && \ !defined(AO_HAVE_char_store_release_write) # define AO_char_store_release_write(addr, val) \ AO_char_store_release(addr,val) # define AO_HAVE_char_store_release_write #endif #if defined(AO_HAVE_char_store_write) && !defined(AO_HAVE_char_store) # define AO_char_store(addr, val) AO_char_store_write(addr,val) # define AO_HAVE_char_store #endif #if defined(AO_HAVE_char_store) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_char_store_release) # define AO_char_store_release(addr,val) \ (AO_nop_full(), AO_char_store(addr,val)) # define AO_HAVE_char_store_release #endif #if defined(AO_HAVE_nop_write) && defined(AO_HAVE_char_store) && \ !defined(AO_HAVE_char_store_write) # define AO_char_store_write(addr, val) \ (AO_nop_write(), AO_char_store(addr,val)) # define AO_HAVE_char_store_write #endif #if defined(AO_HAVE_char_store_write) && \ !defined(AO_HAVE_char_store_release_write) # define AO_char_store_release_write(addr, val) AO_char_store_write(addr,val) # define AO_HAVE_char_store_release_write #endif #if defined(AO_HAVE_char_store_release) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_char_store_full) # define AO_char_store_full(addr, val) \ (AO_char_store_release(addr, val), AO_nop_full()) # define AO_HAVE_char_store_full #endif /* char_fetch_and_add */ #if defined(AO_HAVE_char_compare_and_swap_full) && \ !defined(AO_HAVE_char_fetch_and_add_full) AO_INLINE AO_t AO_char_fetch_and_add_full(volatile unsigned char *addr, unsigned char incr) { unsigned char old; do { old = *addr; } while (!AO_char_compare_and_swap_full(addr, old, old+incr)); return old; } # define AO_HAVE_char_fetch_and_add_full #endif #if defined(AO_HAVE_char_compare_and_swap_acquire) && \ !defined(AO_HAVE_char_fetch_and_add_acquire) AO_INLINE AO_t AO_char_fetch_and_add_acquire(volatile unsigned char *addr, unsigned char incr) { unsigned char old; do { old = *addr; } while (!AO_char_compare_and_swap_acquire(addr, old, old+incr)); return old; } # define AO_HAVE_char_fetch_and_add_acquire #endif #if defined(AO_HAVE_char_compare_and_swap_release) && \ !defined(AO_HAVE_char_fetch_and_add_release) AO_INLINE AO_t AO_char_fetch_and_add_release(volatile unsigned char *addr, unsigned char incr) { unsigned char old; do { old = *addr; } while (!AO_char_compare_and_swap_release(addr, old, old+incr)); return old; } # define AO_HAVE_char_fetch_and_add_release #endif #if defined(AO_HAVE_char_fetch_and_add_full) # if !defined(AO_HAVE_char_fetch_and_add_release) # define AO_char_fetch_and_add_release(addr, val) \ AO_char_fetch_and_add_full(addr, val) # define AO_HAVE_char_fetch_and_add_release # endif # if !defined(AO_HAVE_char_fetch_and_add_acquire) # define AO_char_fetch_and_add_acquire(addr, val) \ AO_char_fetch_and_add_full(addr, val) # define AO_HAVE_char_fetch_and_add_acquire # endif # if !defined(AO_HAVE_char_fetch_and_add_write) # define AO_char_fetch_and_add_write(addr, val) \ AO_char_fetch_and_add_full(addr, val) # define AO_HAVE_char_fetch_and_add_write # endif # if !defined(AO_HAVE_char_fetch_and_add_read) # define AO_char_fetch_and_add_read(addr, val) \ AO_char_fetch_and_add_full(addr, val) # define AO_HAVE_char_fetch_and_add_read # endif #endif /* AO_HAVE_char_fetch_and_add_full */ #if !defined(AO_HAVE_char_fetch_and_add) && \ defined(AO_HAVE_char_fetch_and_add_release) # define AO_char_fetch_and_add(addr, val) \ AO_char_fetch_and_add_release(addr, val) # define AO_HAVE_char_fetch_and_add #endif #if !defined(AO_HAVE_char_fetch_and_add) && \ defined(AO_HAVE_char_fetch_and_add_acquire) # define AO_char_fetch_and_add(addr, val) \ AO_char_fetch_and_add_acquire(addr, val) # define AO_HAVE_char_fetch_and_add #endif #if !defined(AO_HAVE_char_fetch_and_add) && \ defined(AO_HAVE_char_fetch_and_add_write) # define AO_char_fetch_and_add(addr, val) \ AO_char_fetch_and_add_write(addr, val) # define AO_HAVE_char_fetch_and_add #endif #if !defined(AO_HAVE_char_fetch_and_add) && \ defined(AO_HAVE_char_fetch_and_add_read) # define AO_char_fetch_and_add(addr, val) \ AO_char_fetch_and_add_read(addr, val) # define AO_HAVE_char_fetch_and_add #endif #if defined(AO_HAVE_char_fetch_and_add_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_char_fetch_and_add_full) # define AO_char_fetch_and_add_full(addr, val) \ (AO_nop_full(), AO_char_fetch_and_add_acquire(addr, val)) #endif #if !defined(AO_HAVE_char_fetch_and_add_release_write) && \ defined(AO_HAVE_char_fetch_and_add_write) # define AO_char_fetch_and_add_release_write(addr, val) \ AO_char_fetch_and_add_write(addr, val) # define AO_HAVE_char_fetch_and_add_release_write #endif #if !defined(AO_HAVE_char_fetch_and_add_release_write) && \ defined(AO_HAVE_char_fetch_and_add_release) # define AO_char_fetch_and_add_release_write(addr, val) \ AO_char_fetch_and_add_release(addr, val) # define AO_HAVE_char_fetch_and_add_release_write #endif #if !defined(AO_HAVE_char_fetch_and_add_acquire_read) && \ defined(AO_HAVE_char_fetch_and_add_read) # define AO_char_fetch_and_add_acquire_read(addr, val) \ AO_char_fetch_and_add_read(addr, val) # define AO_HAVE_char_fetch_and_add_acquire_read #endif #if !defined(AO_HAVE_char_fetch_and_add_acquire_read) && \ defined(AO_HAVE_char_fetch_and_add_acquire) # define AO_char_fetch_and_add_acquire_read(addr, val) \ AO_char_fetch_and_add_acquire(addr, val) # define AO_HAVE_char_fetch_and_add_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_char_fetch_and_add_acquire_read) # define AO_char_fetch_and_add_dd_acquire_read(addr, val) \ AO_char_fetch_and_add_acquire_read(addr, val) # define AO_HAVE_char_fetch_and_add_dd_acquire_read # endif #else # if defined(AO_HAVE_char_fetch_and_add) # define AO_char_fetch_and_add_dd_acquire_read(addr, val) \ AO_char_fetch_and_add(addr, val) # define AO_HAVE_char_fetch_and_add_dd_acquire_read # endif #endif /* char_fetch_and_add1 */ #if defined(AO_HAVE_char_fetch_and_add_full) &&\ !defined(AO_HAVE_char_fetch_and_add1_full) # define AO_char_fetch_and_add1_full(addr) \ AO_char_fetch_and_add_full(addr,1) # define AO_HAVE_char_fetch_and_add1_full #endif #if defined(AO_HAVE_char_fetch_and_add_release) &&\ !defined(AO_HAVE_char_fetch_and_add1_release) # define AO_char_fetch_and_add1_release(addr) \ AO_char_fetch_and_add_release(addr,1) # define AO_HAVE_char_fetch_and_add1_release #endif #if defined(AO_HAVE_char_fetch_and_add_acquire) &&\ !defined(AO_HAVE_char_fetch_and_add1_acquire) # define AO_char_fetch_and_add1_acquire(addr) \ AO_char_fetch_and_add_acquire(addr,1) # define AO_HAVE_char_fetch_and_add1_acquire #endif #if defined(AO_HAVE_char_fetch_and_add_write) &&\ !defined(AO_HAVE_char_fetch_and_add1_write) # define AO_char_fetch_and_add1_write(addr) \ AO_char_fetch_and_add_write(addr,1) # define AO_HAVE_char_fetch_and_add1_write #endif #if defined(AO_HAVE_char_fetch_and_add_read) &&\ !defined(AO_HAVE_char_fetch_and_add1_read) # define AO_char_fetch_and_add1_read(addr) \ AO_char_fetch_and_add_read(addr,1) # define AO_HAVE_char_fetch_and_add1_read #endif #if defined(AO_HAVE_char_fetch_and_add_release_write) &&\ !defined(AO_HAVE_char_fetch_and_add1_release_write) # define AO_char_fetch_and_add1_release_write(addr) \ AO_char_fetch_and_add_release_write(addr,1) # define AO_HAVE_char_fetch_and_add1_release_write #endif #if defined(AO_HAVE_char_fetch_and_add_acquire_read) &&\ !defined(AO_HAVE_char_fetch_and_add1_acquire_read) # define AO_char_fetch_and_add1_acquire_read(addr) \ AO_char_fetch_and_add_acquire_read(addr,1) # define AO_HAVE_char_fetch_and_add1_acquire_read #endif #if defined(AO_HAVE_char_fetch_and_add) &&\ !defined(AO_HAVE_char_fetch_and_add1) # define AO_char_fetch_and_add1(addr) \ AO_char_fetch_and_add(addr,1) # define AO_HAVE_char_fetch_and_add1 #endif #if defined(AO_HAVE_char_fetch_and_add1_full) # if !defined(AO_HAVE_char_fetch_and_add1_release) # define AO_char_fetch_and_add1_release(addr) \ AO_char_fetch_and_add1_full(addr) # define AO_HAVE_char_fetch_and_add1_release # endif # if !defined(AO_HAVE_char_fetch_and_add1_acquire) # define AO_char_fetch_and_add1_acquire(addr) \ AO_char_fetch_and_add1_full(addr) # define AO_HAVE_char_fetch_and_add1_acquire # endif # if !defined(AO_HAVE_char_fetch_and_add1_write) # define AO_char_fetch_and_add1_write(addr) \ AO_char_fetch_and_add1_full(addr) # define AO_HAVE_char_fetch_and_add1_write # endif # if !defined(AO_HAVE_char_fetch_and_add1_read) # define AO_char_fetch_and_add1_read(addr) \ AO_char_fetch_and_add1_full(addr) # define AO_HAVE_char_fetch_and_add1_read # endif #endif /* AO_HAVE_char_fetch_and_add1_full */ #if !defined(AO_HAVE_char_fetch_and_add1) && \ defined(AO_HAVE_char_fetch_and_add1_release) # define AO_char_fetch_and_add1(addr) \ AO_char_fetch_and_add1_release(addr) # define AO_HAVE_char_fetch_and_add1 #endif #if !defined(AO_HAVE_char_fetch_and_add1) && \ defined(AO_HAVE_char_fetch_and_add1_acquire) # define AO_char_fetch_and_add1(addr) \ AO_char_fetch_and_add1_acquire(addr) # define AO_HAVE_char_fetch_and_add1 #endif #if !defined(AO_HAVE_char_fetch_and_add1) && \ defined(AO_HAVE_char_fetch_and_add1_write) # define AO_char_fetch_and_add1(addr) \ AO_char_fetch_and_add1_write(addr) # define AO_HAVE_char_fetch_and_add1 #endif #if !defined(AO_HAVE_char_fetch_and_add1) && \ defined(AO_HAVE_char_fetch_and_add1_read) # define AO_char_fetch_and_add1(addr) \ AO_char_fetch_and_add1_read(addr) # define AO_HAVE_char_fetch_and_add1 #endif #if defined(AO_HAVE_char_fetch_and_add1_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_char_fetch_and_add1_full) # define AO_char_fetch_and_add1_full(addr) \ (AO_nop_full(), AO_char_fetch_and_add1_acquire(addr)) # define AO_HAVE_char_fetch_and_add1_full #endif #if !defined(AO_HAVE_char_fetch_and_add1_release_write) && \ defined(AO_HAVE_char_fetch_and_add1_write) # define AO_char_fetch_and_add1_release_write(addr) \ AO_char_fetch_and_add1_write(addr) # define AO_HAVE_char_fetch_and_add1_release_write #endif #if !defined(AO_HAVE_char_fetch_and_add1_release_write) && \ defined(AO_HAVE_char_fetch_and_add1_release) # define AO_char_fetch_and_add1_release_write(addr) \ AO_char_fetch_and_add1_release(addr) # define AO_HAVE_char_fetch_and_add1_release_write #endif #if !defined(AO_HAVE_char_fetch_and_add1_acquire_read) && \ defined(AO_HAVE_char_fetch_and_add1_read) # define AO_char_fetch_and_add1_acquire_read(addr) \ AO_char_fetch_and_add1_read(addr) # define AO_HAVE_char_fetch_and_add1_acquire_read #endif #if !defined(AO_HAVE_char_fetch_and_add1_acquire_read) && \ defined(AO_HAVE_char_fetch_and_add1_acquire) # define AO_char_fetch_and_add1_acquire_read(addr) \ AO_char_fetch_and_add1_acquire(addr) # define AO_HAVE_char_fetch_and_add1_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_char_fetch_and_add1_acquire_read) # define AO_char_fetch_and_add1_dd_acquire_read(addr) \ AO_char_fetch_and_add1_acquire_read(addr) # define AO_HAVE_char_fetch_and_add1_dd_acquire_read # endif #else # if defined(AO_HAVE_char_fetch_and_add1) # define AO_char_fetch_and_add1_dd_acquire_read(addr) \ AO_char_fetch_and_add1(addr) # define AO_HAVE_char_fetch_and_add1_dd_acquire_read # endif #endif /* char_fetch_and_sub1 */ #if defined(AO_HAVE_char_fetch_and_add_full) &&\ !defined(AO_HAVE_char_fetch_and_sub1_full) # define AO_char_fetch_and_sub1_full(addr) \ AO_char_fetch_and_add_full(addr,(unsigned char)(-1)) # define AO_HAVE_char_fetch_and_sub1_full #endif #if defined(AO_HAVE_char_fetch_and_add_release) &&\ !defined(AO_HAVE_char_fetch_and_sub1_release) # define AO_char_fetch_and_sub1_release(addr) \ AO_char_fetch_and_add_release(addr,(unsigned char)(-1)) # define AO_HAVE_char_fetch_and_sub1_release #endif #if defined(AO_HAVE_char_fetch_and_add_acquire) &&\ !defined(AO_HAVE_char_fetch_and_sub1_acquire) # define AO_char_fetch_and_sub1_acquire(addr) \ AO_char_fetch_and_add_acquire(addr,(unsigned char)(-1)) # define AO_HAVE_char_fetch_and_sub1_acquire #endif #if defined(AO_HAVE_char_fetch_and_add_write) &&\ !defined(AO_HAVE_char_fetch_and_sub1_write) # define AO_char_fetch_and_sub1_write(addr) \ AO_char_fetch_and_add_write(addr,(unsigned char)(-1)) # define AO_HAVE_char_fetch_and_sub1_write #endif #if defined(AO_HAVE_char_fetch_and_add_read) &&\ !defined(AO_HAVE_char_fetch_and_sub1_read) # define AO_char_fetch_and_sub1_read(addr) \ AO_char_fetch_and_add_read(addr,(unsigned char)(-1)) # define AO_HAVE_char_fetch_and_sub1_read #endif #if defined(AO_HAVE_char_fetch_and_add_release_write) &&\ !defined(AO_HAVE_char_fetch_and_sub1_release_write) # define AO_char_fetch_and_sub1_release_write(addr) \ AO_char_fetch_and_add_release_write(addr,(unsigned char)(-1)) # define AO_HAVE_char_fetch_and_sub1_release_write #endif #if defined(AO_HAVE_char_fetch_and_add_acquire_read) &&\ !defined(AO_HAVE_char_fetch_and_sub1_acquire_read) # define AO_char_fetch_and_sub1_acquire_read(addr) \ AO_char_fetch_and_add_acquire_read(addr,(unsigned char)(-1)) # define AO_HAVE_char_fetch_and_sub1_acquire_read #endif #if defined(AO_HAVE_char_fetch_and_add) &&\ !defined(AO_HAVE_char_fetch_and_sub1) # define AO_char_fetch_and_sub1(addr) \ AO_char_fetch_and_add(addr,(unsigned char)(-1)) # define AO_HAVE_char_fetch_and_sub1 #endif #if defined(AO_HAVE_char_fetch_and_sub1_full) # if !defined(AO_HAVE_char_fetch_and_sub1_release) # define AO_char_fetch_and_sub1_release(addr) \ AO_char_fetch_and_sub1_full(addr) # define AO_HAVE_char_fetch_and_sub1_release # endif # if !defined(AO_HAVE_char_fetch_and_sub1_acquire) # define AO_char_fetch_and_sub1_acquire(addr) \ AO_char_fetch_and_sub1_full(addr) # define AO_HAVE_char_fetch_and_sub1_acquire # endif # if !defined(AO_HAVE_char_fetch_and_sub1_write) # define AO_char_fetch_and_sub1_write(addr) \ AO_char_fetch_and_sub1_full(addr) # define AO_HAVE_char_fetch_and_sub1_write # endif # if !defined(AO_HAVE_char_fetch_and_sub1_read) # define AO_char_fetch_and_sub1_read(addr) \ AO_char_fetch_and_sub1_full(addr) # define AO_HAVE_char_fetch_and_sub1_read # endif #endif /* AO_HAVE_char_fetch_and_sub1_full */ #if !defined(AO_HAVE_char_fetch_and_sub1) && \ defined(AO_HAVE_char_fetch_and_sub1_release) # define AO_char_fetch_and_sub1(addr) \ AO_char_fetch_and_sub1_release(addr) # define AO_HAVE_char_fetch_and_sub1 #endif #if !defined(AO_HAVE_char_fetch_and_sub1) && \ defined(AO_HAVE_char_fetch_and_sub1_acquire) # define AO_char_fetch_and_sub1(addr) \ AO_char_fetch_and_sub1_acquire(addr) # define AO_HAVE_char_fetch_and_sub1 #endif #if !defined(AO_HAVE_char_fetch_and_sub1) && \ defined(AO_HAVE_char_fetch_and_sub1_write) # define AO_char_fetch_and_sub1(addr) \ AO_char_fetch_and_sub1_write(addr) # define AO_HAVE_char_fetch_and_sub1 #endif #if !defined(AO_HAVE_char_fetch_and_sub1) && \ defined(AO_HAVE_char_fetch_and_sub1_read) # define AO_char_fetch_and_sub1(addr) \ AO_char_fetch_and_sub1_read(addr) # define AO_HAVE_char_fetch_and_sub1 #endif #if defined(AO_HAVE_char_fetch_and_sub1_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_char_fetch_and_sub1_full) # define AO_char_fetch_and_sub1_full(addr) \ (AO_nop_full(), AO_char_fetch_and_sub1_acquire(addr)) # define AO_HAVE_char_fetch_and_sub1_full #endif #if !defined(AO_HAVE_char_fetch_and_sub1_release_write) && \ defined(AO_HAVE_char_fetch_and_sub1_write) # define AO_char_fetch_and_sub1_release_write(addr) \ AO_char_fetch_and_sub1_write(addr) # define AO_HAVE_char_fetch_and_sub1_release_write #endif #if !defined(AO_HAVE_char_fetch_and_sub1_release_write) && \ defined(AO_HAVE_char_fetch_and_sub1_release) # define AO_char_fetch_and_sub1_release_write(addr) \ AO_char_fetch_and_sub1_release(addr) # define AO_HAVE_char_fetch_and_sub1_release_write #endif #if !defined(AO_HAVE_char_fetch_and_sub1_acquire_read) && \ defined(AO_HAVE_char_fetch_and_sub1_read) # define AO_char_fetch_and_sub1_acquire_read(addr) \ AO_char_fetch_and_sub1_read(addr) # define AO_HAVE_char_fetch_and_sub1_acquire_read #endif #if !defined(AO_HAVE_char_fetch_and_sub1_acquire_read) && \ defined(AO_HAVE_char_fetch_and_sub1_acquire) # define AO_char_fetch_and_sub1_acquire_read(addr) \ AO_char_fetch_and_sub1_acquire(addr) # define AO_HAVE_char_fetch_and_sub1_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_char_fetch_and_sub1_acquire_read) # define AO_char_fetch_and_sub1_dd_acquire_read(addr) \ AO_char_fetch_and_sub1_acquire_read(addr) # define AO_HAVE_char_fetch_and_sub1_dd_acquire_read # endif #else # if defined(AO_HAVE_char_fetch_and_sub1) # define AO_char_fetch_and_sub1_dd_acquire_read(addr) \ AO_char_fetch_and_sub1(addr) # define AO_HAVE_char_fetch_and_sub1_dd_acquire_read # endif #endif /* short_load */ #if defined(AO_HAVE_short_load_acquire) && !defined(AO_HAVE_short_load) # define AO_short_load(addr) AO_short_load_acquire(addr) # define AO_HAVE_short_load #endif #if defined(AO_HAVE_short_load_full) && !defined(AO_HAVE_short_load_acquire) # define AO_short_load_acquire(addr) AO_short_load_full(addr) # define AO_HAVE_short_load_acquire #endif #if defined(AO_HAVE_short_load_full) && !defined(AO_HAVE_short_load_read) # define AO_short_load_read(addr) AO_short_load_full(addr) # define AO_HAVE_short_load_read #endif #if !defined(AO_HAVE_short_load_acquire_read) && defined(AO_HAVE_short_load_acquire) # define AO_short_load_acquire_read(addr) AO_short_load_acquire(addr) # define AO_HAVE_short_load_acquire_read #endif #if defined(AO_HAVE_short_load) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_short_load_acquire) AO_INLINE unsigned short AO_short_load_acquire(const volatile unsigned short *addr) { unsigned short result = AO_short_load(addr); /* Acquire barrier would be useless, since the load could be delayed */ /* beyond it. */ AO_nop_full(); return result; } # define AO_HAVE_short_load_acquire #endif #if defined(AO_HAVE_short_load) && defined(AO_HAVE_nop_read) && \ !defined(AO_HAVE_short_load_read) AO_INLINE unsigned short AO_short_load_read(const volatile unsigned short *addr) { unsigned short result = AO_short_load(addr); /* Acquire barrier would be useless, since the load could be delayed */ /* beyond it. */ AO_nop_read(); return result; } # define AO_HAVE_short_load_read #endif #if defined(AO_HAVE_short_load_acquire) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_short_load_full) # define AO_short_load_full(addr) (AO_nop_full(), AO_short_load_acquire(addr)) # define AO_HAVE_short_load_full #endif #if !defined(AO_HAVE_short_load_acquire_read) && defined(AO_HAVE_short_load_read) # define AO_short_load_acquire_read(addr) AO_short_load_read(addr) # define AO_HAVE_short_load_acquire_read #endif #if defined(AO_HAVE_short_load_acquire_read) && !defined(AO_HAVE_short_load) # define AO_short_load(addr) AO_short_load_acquire_read(addr) # define AO_HAVE_short_load #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_short_load_acquire_read) # define AO_short_load_dd_acquire_read(addr) \ AO_short_load_acquire_read(addr) # define AO_HAVE_short_load_dd_acquire_read # endif #else # if defined(AO_HAVE_short_load) # define AO_short_load_dd_acquire_read(addr) \ AO_short_load(addr) # define AO_HAVE_short_load_dd_acquire_read # endif #endif /* short_store */ #if defined(AO_HAVE_short_store_release) && !defined(AO_HAVE_short_store) # define AO_short_store(addr, val) AO_short_store_release(addr,val) # define AO_HAVE_short_store #endif #if defined(AO_HAVE_short_store_full) && !defined(AO_HAVE_short_store_release) # define AO_short_store_release(addr,val) AO_short_store_full(addr,val) # define AO_HAVE_short_store_release #endif #if defined(AO_HAVE_short_store_full) && !defined(AO_HAVE_short_store_write) # define AO_short_store_write(addr,val) AO_short_store_full(addr,val) # define AO_HAVE_short_store_write #endif #if defined(AO_HAVE_short_store_release) && \ !defined(AO_HAVE_short_store_release_write) # define AO_short_store_release_write(addr, val) \ AO_short_store_release(addr,val) # define AO_HAVE_short_store_release_write #endif #if defined(AO_HAVE_short_store_write) && !defined(AO_HAVE_short_store) # define AO_short_store(addr, val) AO_short_store_write(addr,val) # define AO_HAVE_short_store #endif #if defined(AO_HAVE_short_store) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_short_store_release) # define AO_short_store_release(addr,val) \ (AO_nop_full(), AO_short_store(addr,val)) # define AO_HAVE_short_store_release #endif #if defined(AO_HAVE_nop_write) && defined(AO_HAVE_short_store) && \ !defined(AO_HAVE_short_store_write) # define AO_short_store_write(addr, val) \ (AO_nop_write(), AO_short_store(addr,val)) # define AO_HAVE_short_store_write #endif #if defined(AO_HAVE_short_store_write) && \ !defined(AO_HAVE_short_store_release_write) # define AO_short_store_release_write(addr, val) AO_short_store_write(addr,val) # define AO_HAVE_short_store_release_write #endif #if defined(AO_HAVE_short_store_release) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_short_store_full) # define AO_short_store_full(addr, val) \ (AO_short_store_release(addr, val), AO_nop_full()) # define AO_HAVE_short_store_full #endif /* short_fetch_and_add */ #if defined(AO_HAVE_short_compare_and_swap_full) && \ !defined(AO_HAVE_short_fetch_and_add_full) AO_INLINE AO_t AO_short_fetch_and_add_full(volatile unsigned short *addr, unsigned short incr) { unsigned short old; do { old = *addr; } while (!AO_short_compare_and_swap_full(addr, old, old+incr)); return old; } # define AO_HAVE_short_fetch_and_add_full #endif #if defined(AO_HAVE_short_compare_and_swap_acquire) && \ !defined(AO_HAVE_short_fetch_and_add_acquire) AO_INLINE AO_t AO_short_fetch_and_add_acquire(volatile unsigned short *addr, unsigned short incr) { unsigned short old; do { old = *addr; } while (!AO_short_compare_and_swap_acquire(addr, old, old+incr)); return old; } # define AO_HAVE_short_fetch_and_add_acquire #endif #if defined(AO_HAVE_short_compare_and_swap_release) && \ !defined(AO_HAVE_short_fetch_and_add_release) AO_INLINE AO_t AO_short_fetch_and_add_release(volatile unsigned short *addr, unsigned short incr) { unsigned short old; do { old = *addr; } while (!AO_short_compare_and_swap_release(addr, old, old+incr)); return old; } # define AO_HAVE_short_fetch_and_add_release #endif #if defined(AO_HAVE_short_fetch_and_add_full) # if !defined(AO_HAVE_short_fetch_and_add_release) # define AO_short_fetch_and_add_release(addr, val) \ AO_short_fetch_and_add_full(addr, val) # define AO_HAVE_short_fetch_and_add_release # endif # if !defined(AO_HAVE_short_fetch_and_add_acquire) # define AO_short_fetch_and_add_acquire(addr, val) \ AO_short_fetch_and_add_full(addr, val) # define AO_HAVE_short_fetch_and_add_acquire # endif # if !defined(AO_HAVE_short_fetch_and_add_write) # define AO_short_fetch_and_add_write(addr, val) \ AO_short_fetch_and_add_full(addr, val) # define AO_HAVE_short_fetch_and_add_write # endif # if !defined(AO_HAVE_short_fetch_and_add_read) # define AO_short_fetch_and_add_read(addr, val) \ AO_short_fetch_and_add_full(addr, val) # define AO_HAVE_short_fetch_and_add_read # endif #endif /* AO_HAVE_short_fetch_and_add_full */ #if !defined(AO_HAVE_short_fetch_and_add) && \ defined(AO_HAVE_short_fetch_and_add_release) # define AO_short_fetch_and_add(addr, val) \ AO_short_fetch_and_add_release(addr, val) # define AO_HAVE_short_fetch_and_add #endif #if !defined(AO_HAVE_short_fetch_and_add) && \ defined(AO_HAVE_short_fetch_and_add_acquire) # define AO_short_fetch_and_add(addr, val) \ AO_short_fetch_and_add_acquire(addr, val) # define AO_HAVE_short_fetch_and_add #endif #if !defined(AO_HAVE_short_fetch_and_add) && \ defined(AO_HAVE_short_fetch_and_add_write) # define AO_short_fetch_and_add(addr, val) \ AO_short_fetch_and_add_write(addr, val) # define AO_HAVE_short_fetch_and_add #endif #if !defined(AO_HAVE_short_fetch_and_add) && \ defined(AO_HAVE_short_fetch_and_add_read) # define AO_short_fetch_and_add(addr, val) \ AO_short_fetch_and_add_read(addr, val) # define AO_HAVE_short_fetch_and_add #endif #if defined(AO_HAVE_short_fetch_and_add_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_short_fetch_and_add_full) # define AO_short_fetch_and_add_full(addr, val) \ (AO_nop_full(), AO_short_fetch_and_add_acquire(addr, val)) #endif #if !defined(AO_HAVE_short_fetch_and_add_release_write) && \ defined(AO_HAVE_short_fetch_and_add_write) # define AO_short_fetch_and_add_release_write(addr, val) \ AO_short_fetch_and_add_write(addr, val) # define AO_HAVE_short_fetch_and_add_release_write #endif #if !defined(AO_HAVE_short_fetch_and_add_release_write) && \ defined(AO_HAVE_short_fetch_and_add_release) # define AO_short_fetch_and_add_release_write(addr, val) \ AO_short_fetch_and_add_release(addr, val) # define AO_HAVE_short_fetch_and_add_release_write #endif #if !defined(AO_HAVE_short_fetch_and_add_acquire_read) && \ defined(AO_HAVE_short_fetch_and_add_read) # define AO_short_fetch_and_add_acquire_read(addr, val) \ AO_short_fetch_and_add_read(addr, val) # define AO_HAVE_short_fetch_and_add_acquire_read #endif #if !defined(AO_HAVE_short_fetch_and_add_acquire_read) && \ defined(AO_HAVE_short_fetch_and_add_acquire) # define AO_short_fetch_and_add_acquire_read(addr, val) \ AO_short_fetch_and_add_acquire(addr, val) # define AO_HAVE_short_fetch_and_add_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_short_fetch_and_add_acquire_read) # define AO_short_fetch_and_add_dd_acquire_read(addr, val) \ AO_short_fetch_and_add_acquire_read(addr, val) # define AO_HAVE_short_fetch_and_add_dd_acquire_read # endif #else # if defined(AO_HAVE_short_fetch_and_add) # define AO_short_fetch_and_add_dd_acquire_read(addr, val) \ AO_short_fetch_and_add(addr, val) # define AO_HAVE_short_fetch_and_add_dd_acquire_read # endif #endif /* short_fetch_and_add1 */ #if defined(AO_HAVE_short_fetch_and_add_full) &&\ !defined(AO_HAVE_short_fetch_and_add1_full) # define AO_short_fetch_and_add1_full(addr) \ AO_short_fetch_and_add_full(addr,1) # define AO_HAVE_short_fetch_and_add1_full #endif #if defined(AO_HAVE_short_fetch_and_add_release) &&\ !defined(AO_HAVE_short_fetch_and_add1_release) # define AO_short_fetch_and_add1_release(addr) \ AO_short_fetch_and_add_release(addr,1) # define AO_HAVE_short_fetch_and_add1_release #endif #if defined(AO_HAVE_short_fetch_and_add_acquire) &&\ !defined(AO_HAVE_short_fetch_and_add1_acquire) # define AO_short_fetch_and_add1_acquire(addr) \ AO_short_fetch_and_add_acquire(addr,1) # define AO_HAVE_short_fetch_and_add1_acquire #endif #if defined(AO_HAVE_short_fetch_and_add_write) &&\ !defined(AO_HAVE_short_fetch_and_add1_write) # define AO_short_fetch_and_add1_write(addr) \ AO_short_fetch_and_add_write(addr,1) # define AO_HAVE_short_fetch_and_add1_write #endif #if defined(AO_HAVE_short_fetch_and_add_read) &&\ !defined(AO_HAVE_short_fetch_and_add1_read) # define AO_short_fetch_and_add1_read(addr) \ AO_short_fetch_and_add_read(addr,1) # define AO_HAVE_short_fetch_and_add1_read #endif #if defined(AO_HAVE_short_fetch_and_add_release_write) &&\ !defined(AO_HAVE_short_fetch_and_add1_release_write) # define AO_short_fetch_and_add1_release_write(addr) \ AO_short_fetch_and_add_release_write(addr,1) # define AO_HAVE_short_fetch_and_add1_release_write #endif #if defined(AO_HAVE_short_fetch_and_add_acquire_read) &&\ !defined(AO_HAVE_short_fetch_and_add1_acquire_read) # define AO_short_fetch_and_add1_acquire_read(addr) \ AO_short_fetch_and_add_acquire_read(addr,1) # define AO_HAVE_short_fetch_and_add1_acquire_read #endif #if defined(AO_HAVE_short_fetch_and_add) &&\ !defined(AO_HAVE_short_fetch_and_add1) # define AO_short_fetch_and_add1(addr) \ AO_short_fetch_and_add(addr,1) # define AO_HAVE_short_fetch_and_add1 #endif #if defined(AO_HAVE_short_fetch_and_add1_full) # if !defined(AO_HAVE_short_fetch_and_add1_release) # define AO_short_fetch_and_add1_release(addr) \ AO_short_fetch_and_add1_full(addr) # define AO_HAVE_short_fetch_and_add1_release # endif # if !defined(AO_HAVE_short_fetch_and_add1_acquire) # define AO_short_fetch_and_add1_acquire(addr) \ AO_short_fetch_and_add1_full(addr) # define AO_HAVE_short_fetch_and_add1_acquire # endif # if !defined(AO_HAVE_short_fetch_and_add1_write) # define AO_short_fetch_and_add1_write(addr) \ AO_short_fetch_and_add1_full(addr) # define AO_HAVE_short_fetch_and_add1_write # endif # if !defined(AO_HAVE_short_fetch_and_add1_read) # define AO_short_fetch_and_add1_read(addr) \ AO_short_fetch_and_add1_full(addr) # define AO_HAVE_short_fetch_and_add1_read # endif #endif /* AO_HAVE_short_fetch_and_add1_full */ #if !defined(AO_HAVE_short_fetch_and_add1) && \ defined(AO_HAVE_short_fetch_and_add1_release) # define AO_short_fetch_and_add1(addr) \ AO_short_fetch_and_add1_release(addr) # define AO_HAVE_short_fetch_and_add1 #endif #if !defined(AO_HAVE_short_fetch_and_add1) && \ defined(AO_HAVE_short_fetch_and_add1_acquire) # define AO_short_fetch_and_add1(addr) \ AO_short_fetch_and_add1_acquire(addr) # define AO_HAVE_short_fetch_and_add1 #endif #if !defined(AO_HAVE_short_fetch_and_add1) && \ defined(AO_HAVE_short_fetch_and_add1_write) # define AO_short_fetch_and_add1(addr) \ AO_short_fetch_and_add1_write(addr) # define AO_HAVE_short_fetch_and_add1 #endif #if !defined(AO_HAVE_short_fetch_and_add1) && \ defined(AO_HAVE_short_fetch_and_add1_read) # define AO_short_fetch_and_add1(addr) \ AO_short_fetch_and_add1_read(addr) # define AO_HAVE_short_fetch_and_add1 #endif #if defined(AO_HAVE_short_fetch_and_add1_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_short_fetch_and_add1_full) # define AO_short_fetch_and_add1_full(addr) \ (AO_nop_full(), AO_short_fetch_and_add1_acquire(addr)) # define AO_HAVE_short_fetch_and_add1_full #endif #if !defined(AO_HAVE_short_fetch_and_add1_release_write) && \ defined(AO_HAVE_short_fetch_and_add1_write) # define AO_short_fetch_and_add1_release_write(addr) \ AO_short_fetch_and_add1_write(addr) # define AO_HAVE_short_fetch_and_add1_release_write #endif #if !defined(AO_HAVE_short_fetch_and_add1_release_write) && \ defined(AO_HAVE_short_fetch_and_add1_release) # define AO_short_fetch_and_add1_release_write(addr) \ AO_short_fetch_and_add1_release(addr) # define AO_HAVE_short_fetch_and_add1_release_write #endif #if !defined(AO_HAVE_short_fetch_and_add1_acquire_read) && \ defined(AO_HAVE_short_fetch_and_add1_read) # define AO_short_fetch_and_add1_acquire_read(addr) \ AO_short_fetch_and_add1_read(addr) # define AO_HAVE_short_fetch_and_add1_acquire_read #endif #if !defined(AO_HAVE_short_fetch_and_add1_acquire_read) && \ defined(AO_HAVE_short_fetch_and_add1_acquire) # define AO_short_fetch_and_add1_acquire_read(addr) \ AO_short_fetch_and_add1_acquire(addr) # define AO_HAVE_short_fetch_and_add1_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_short_fetch_and_add1_acquire_read) # define AO_short_fetch_and_add1_dd_acquire_read(addr) \ AO_short_fetch_and_add1_acquire_read(addr) # define AO_HAVE_short_fetch_and_add1_dd_acquire_read # endif #else # if defined(AO_HAVE_short_fetch_and_add1) # define AO_short_fetch_and_add1_dd_acquire_read(addr) \ AO_short_fetch_and_add1(addr) # define AO_HAVE_short_fetch_and_add1_dd_acquire_read # endif #endif /* short_fetch_and_sub1 */ #if defined(AO_HAVE_short_fetch_and_add_full) &&\ !defined(AO_HAVE_short_fetch_and_sub1_full) # define AO_short_fetch_and_sub1_full(addr) \ AO_short_fetch_and_add_full(addr,(unsigned short)(-1)) # define AO_HAVE_short_fetch_and_sub1_full #endif #if defined(AO_HAVE_short_fetch_and_add_release) &&\ !defined(AO_HAVE_short_fetch_and_sub1_release) # define AO_short_fetch_and_sub1_release(addr) \ AO_short_fetch_and_add_release(addr,(unsigned short)(-1)) # define AO_HAVE_short_fetch_and_sub1_release #endif #if defined(AO_HAVE_short_fetch_and_add_acquire) &&\ !defined(AO_HAVE_short_fetch_and_sub1_acquire) # define AO_short_fetch_and_sub1_acquire(addr) \ AO_short_fetch_and_add_acquire(addr,(unsigned short)(-1)) # define AO_HAVE_short_fetch_and_sub1_acquire #endif #if defined(AO_HAVE_short_fetch_and_add_write) &&\ !defined(AO_HAVE_short_fetch_and_sub1_write) # define AO_short_fetch_and_sub1_write(addr) \ AO_short_fetch_and_add_write(addr,(unsigned short)(-1)) # define AO_HAVE_short_fetch_and_sub1_write #endif #if defined(AO_HAVE_short_fetch_and_add_read) &&\ !defined(AO_HAVE_short_fetch_and_sub1_read) # define AO_short_fetch_and_sub1_read(addr) \ AO_short_fetch_and_add_read(addr,(unsigned short)(-1)) # define AO_HAVE_short_fetch_and_sub1_read #endif #if defined(AO_HAVE_short_fetch_and_add_release_write) &&\ !defined(AO_HAVE_short_fetch_and_sub1_release_write) # define AO_short_fetch_and_sub1_release_write(addr) \ AO_short_fetch_and_add_release_write(addr,(unsigned short)(-1)) # define AO_HAVE_short_fetch_and_sub1_release_write #endif #if defined(AO_HAVE_short_fetch_and_add_acquire_read) &&\ !defined(AO_HAVE_short_fetch_and_sub1_acquire_read) # define AO_short_fetch_and_sub1_acquire_read(addr) \ AO_short_fetch_and_add_acquire_read(addr,(unsigned short)(-1)) # define AO_HAVE_short_fetch_and_sub1_acquire_read #endif #if defined(AO_HAVE_short_fetch_and_add) &&\ !defined(AO_HAVE_short_fetch_and_sub1) # define AO_short_fetch_and_sub1(addr) \ AO_short_fetch_and_add(addr,(unsigned short)(-1)) # define AO_HAVE_short_fetch_and_sub1 #endif #if defined(AO_HAVE_short_fetch_and_sub1_full) # if !defined(AO_HAVE_short_fetch_and_sub1_release) # define AO_short_fetch_and_sub1_release(addr) \ AO_short_fetch_and_sub1_full(addr) # define AO_HAVE_short_fetch_and_sub1_release # endif # if !defined(AO_HAVE_short_fetch_and_sub1_acquire) # define AO_short_fetch_and_sub1_acquire(addr) \ AO_short_fetch_and_sub1_full(addr) # define AO_HAVE_short_fetch_and_sub1_acquire # endif # if !defined(AO_HAVE_short_fetch_and_sub1_write) # define AO_short_fetch_and_sub1_write(addr) \ AO_short_fetch_and_sub1_full(addr) # define AO_HAVE_short_fetch_and_sub1_write # endif # if !defined(AO_HAVE_short_fetch_and_sub1_read) # define AO_short_fetch_and_sub1_read(addr) \ AO_short_fetch_and_sub1_full(addr) # define AO_HAVE_short_fetch_and_sub1_read # endif #endif /* AO_HAVE_short_fetch_and_sub1_full */ #if !defined(AO_HAVE_short_fetch_and_sub1) && \ defined(AO_HAVE_short_fetch_and_sub1_release) # define AO_short_fetch_and_sub1(addr) \ AO_short_fetch_and_sub1_release(addr) # define AO_HAVE_short_fetch_and_sub1 #endif #if !defined(AO_HAVE_short_fetch_and_sub1) && \ defined(AO_HAVE_short_fetch_and_sub1_acquire) # define AO_short_fetch_and_sub1(addr) \ AO_short_fetch_and_sub1_acquire(addr) # define AO_HAVE_short_fetch_and_sub1 #endif #if !defined(AO_HAVE_short_fetch_and_sub1) && \ defined(AO_HAVE_short_fetch_and_sub1_write) # define AO_short_fetch_and_sub1(addr) \ AO_short_fetch_and_sub1_write(addr) # define AO_HAVE_short_fetch_and_sub1 #endif #if !defined(AO_HAVE_short_fetch_and_sub1) && \ defined(AO_HAVE_short_fetch_and_sub1_read) # define AO_short_fetch_and_sub1(addr) \ AO_short_fetch_and_sub1_read(addr) # define AO_HAVE_short_fetch_and_sub1 #endif #if defined(AO_HAVE_short_fetch_and_sub1_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_short_fetch_and_sub1_full) # define AO_short_fetch_and_sub1_full(addr) \ (AO_nop_full(), AO_short_fetch_and_sub1_acquire(addr)) # define AO_HAVE_short_fetch_and_sub1_full #endif #if !defined(AO_HAVE_short_fetch_and_sub1_release_write) && \ defined(AO_HAVE_short_fetch_and_sub1_write) # define AO_short_fetch_and_sub1_release_write(addr) \ AO_short_fetch_and_sub1_write(addr) # define AO_HAVE_short_fetch_and_sub1_release_write #endif #if !defined(AO_HAVE_short_fetch_and_sub1_release_write) && \ defined(AO_HAVE_short_fetch_and_sub1_release) # define AO_short_fetch_and_sub1_release_write(addr) \ AO_short_fetch_and_sub1_release(addr) # define AO_HAVE_short_fetch_and_sub1_release_write #endif #if !defined(AO_HAVE_short_fetch_and_sub1_acquire_read) && \ defined(AO_HAVE_short_fetch_and_sub1_read) # define AO_short_fetch_and_sub1_acquire_read(addr) \ AO_short_fetch_and_sub1_read(addr) # define AO_HAVE_short_fetch_and_sub1_acquire_read #endif #if !defined(AO_HAVE_short_fetch_and_sub1_acquire_read) && \ defined(AO_HAVE_short_fetch_and_sub1_acquire) # define AO_short_fetch_and_sub1_acquire_read(addr) \ AO_short_fetch_and_sub1_acquire(addr) # define AO_HAVE_short_fetch_and_sub1_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_short_fetch_and_sub1_acquire_read) # define AO_short_fetch_and_sub1_dd_acquire_read(addr) \ AO_short_fetch_and_sub1_acquire_read(addr) # define AO_HAVE_short_fetch_and_sub1_dd_acquire_read # endif #else # if defined(AO_HAVE_short_fetch_and_sub1) # define AO_short_fetch_and_sub1_dd_acquire_read(addr) \ AO_short_fetch_and_sub1(addr) # define AO_HAVE_short_fetch_and_sub1_dd_acquire_read # endif #endif /* int_load */ #if defined(AO_HAVE_int_load_acquire) && !defined(AO_HAVE_int_load) # define AO_int_load(addr) AO_int_load_acquire(addr) # define AO_HAVE_int_load #endif #if defined(AO_HAVE_int_load_full) && !defined(AO_HAVE_int_load_acquire) # define AO_int_load_acquire(addr) AO_int_load_full(addr) # define AO_HAVE_int_load_acquire #endif #if defined(AO_HAVE_int_load_full) && !defined(AO_HAVE_int_load_read) # define AO_int_load_read(addr) AO_int_load_full(addr) # define AO_HAVE_int_load_read #endif #if !defined(AO_HAVE_int_load_acquire_read) && defined(AO_HAVE_int_load_acquire) # define AO_int_load_acquire_read(addr) AO_int_load_acquire(addr) # define AO_HAVE_int_load_acquire_read #endif #if defined(AO_HAVE_int_load) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_int_load_acquire) AO_INLINE unsigned int AO_int_load_acquire(const volatile unsigned int *addr) { unsigned int result = AO_int_load(addr); /* Acquire barrier would be useless, since the load could be delayed */ /* beyond it. */ AO_nop_full(); return result; } # define AO_HAVE_int_load_acquire #endif #if defined(AO_HAVE_int_load) && defined(AO_HAVE_nop_read) && \ !defined(AO_HAVE_int_load_read) AO_INLINE unsigned int AO_int_load_read(const volatile unsigned int *addr) { unsigned int result = AO_int_load(addr); /* Acquire barrier would be useless, since the load could be delayed */ /* beyond it. */ AO_nop_read(); return result; } # define AO_HAVE_int_load_read #endif #if defined(AO_HAVE_int_load_acquire) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_int_load_full) # define AO_int_load_full(addr) (AO_nop_full(), AO_int_load_acquire(addr)) # define AO_HAVE_int_load_full #endif #if !defined(AO_HAVE_int_load_acquire_read) && defined(AO_HAVE_int_load_read) # define AO_int_load_acquire_read(addr) AO_int_load_read(addr) # define AO_HAVE_int_load_acquire_read #endif #if defined(AO_HAVE_int_load_acquire_read) && !defined(AO_HAVE_int_load) # define AO_int_load(addr) AO_int_load_acquire_read(addr) # define AO_HAVE_int_load #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_int_load_acquire_read) # define AO_int_load_dd_acquire_read(addr) \ AO_int_load_acquire_read(addr) # define AO_HAVE_int_load_dd_acquire_read # endif #else # if defined(AO_HAVE_int_load) # define AO_int_load_dd_acquire_read(addr) \ AO_int_load(addr) # define AO_HAVE_int_load_dd_acquire_read # endif #endif /* int_store */ #if defined(AO_HAVE_int_store_release) && !defined(AO_HAVE_int_store) # define AO_int_store(addr, val) AO_int_store_release(addr,val) # define AO_HAVE_int_store #endif #if defined(AO_HAVE_int_store_full) && !defined(AO_HAVE_int_store_release) # define AO_int_store_release(addr,val) AO_int_store_full(addr,val) # define AO_HAVE_int_store_release #endif #if defined(AO_HAVE_int_store_full) && !defined(AO_HAVE_int_store_write) # define AO_int_store_write(addr,val) AO_int_store_full(addr,val) # define AO_HAVE_int_store_write #endif #if defined(AO_HAVE_int_store_release) && \ !defined(AO_HAVE_int_store_release_write) # define AO_int_store_release_write(addr, val) \ AO_int_store_release(addr,val) # define AO_HAVE_int_store_release_write #endif #if defined(AO_HAVE_int_store_write) && !defined(AO_HAVE_int_store) # define AO_int_store(addr, val) AO_int_store_write(addr,val) # define AO_HAVE_int_store #endif #if defined(AO_HAVE_int_store) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_int_store_release) # define AO_int_store_release(addr,val) \ (AO_nop_full(), AO_int_store(addr,val)) # define AO_HAVE_int_store_release #endif #if defined(AO_HAVE_nop_write) && defined(AO_HAVE_int_store) && \ !defined(AO_HAVE_int_store_write) # define AO_int_store_write(addr, val) \ (AO_nop_write(), AO_int_store(addr,val)) # define AO_HAVE_int_store_write #endif #if defined(AO_HAVE_int_store_write) && \ !defined(AO_HAVE_int_store_release_write) # define AO_int_store_release_write(addr, val) AO_int_store_write(addr,val) # define AO_HAVE_int_store_release_write #endif #if defined(AO_HAVE_int_store_release) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_int_store_full) # define AO_int_store_full(addr, val) \ (AO_int_store_release(addr, val), AO_nop_full()) # define AO_HAVE_int_store_full #endif /* int_fetch_and_add */ #if defined(AO_HAVE_int_compare_and_swap_full) && \ !defined(AO_HAVE_int_fetch_and_add_full) AO_INLINE AO_t AO_int_fetch_and_add_full(volatile unsigned int *addr, unsigned int incr) { unsigned int old; do { old = *addr; } while (!AO_int_compare_and_swap_full(addr, old, old+incr)); return old; } # define AO_HAVE_int_fetch_and_add_full #endif #if defined(AO_HAVE_int_compare_and_swap_acquire) && \ !defined(AO_HAVE_int_fetch_and_add_acquire) AO_INLINE AO_t AO_int_fetch_and_add_acquire(volatile unsigned int *addr, unsigned int incr) { unsigned int old; do { old = *addr; } while (!AO_int_compare_and_swap_acquire(addr, old, old+incr)); return old; } # define AO_HAVE_int_fetch_and_add_acquire #endif #if defined(AO_HAVE_int_compare_and_swap_release) && \ !defined(AO_HAVE_int_fetch_and_add_release) AO_INLINE AO_t AO_int_fetch_and_add_release(volatile unsigned int *addr, unsigned int incr) { unsigned int old; do { old = *addr; } while (!AO_int_compare_and_swap_release(addr, old, old+incr)); return old; } # define AO_HAVE_int_fetch_and_add_release #endif #if defined(AO_HAVE_int_fetch_and_add_full) # if !defined(AO_HAVE_int_fetch_and_add_release) # define AO_int_fetch_and_add_release(addr, val) \ AO_int_fetch_and_add_full(addr, val) # define AO_HAVE_int_fetch_and_add_release # endif # if !defined(AO_HAVE_int_fetch_and_add_acquire) # define AO_int_fetch_and_add_acquire(addr, val) \ AO_int_fetch_and_add_full(addr, val) # define AO_HAVE_int_fetch_and_add_acquire # endif # if !defined(AO_HAVE_int_fetch_and_add_write) # define AO_int_fetch_and_add_write(addr, val) \ AO_int_fetch_and_add_full(addr, val) # define AO_HAVE_int_fetch_and_add_write # endif # if !defined(AO_HAVE_int_fetch_and_add_read) # define AO_int_fetch_and_add_read(addr, val) \ AO_int_fetch_and_add_full(addr, val) # define AO_HAVE_int_fetch_and_add_read # endif #endif /* AO_HAVE_int_fetch_and_add_full */ #if !defined(AO_HAVE_int_fetch_and_add) && \ defined(AO_HAVE_int_fetch_and_add_release) # define AO_int_fetch_and_add(addr, val) \ AO_int_fetch_and_add_release(addr, val) # define AO_HAVE_int_fetch_and_add #endif #if !defined(AO_HAVE_int_fetch_and_add) && \ defined(AO_HAVE_int_fetch_and_add_acquire) # define AO_int_fetch_and_add(addr, val) \ AO_int_fetch_and_add_acquire(addr, val) # define AO_HAVE_int_fetch_and_add #endif #if !defined(AO_HAVE_int_fetch_and_add) && \ defined(AO_HAVE_int_fetch_and_add_write) # define AO_int_fetch_and_add(addr, val) \ AO_int_fetch_and_add_write(addr, val) # define AO_HAVE_int_fetch_and_add #endif #if !defined(AO_HAVE_int_fetch_and_add) && \ defined(AO_HAVE_int_fetch_and_add_read) # define AO_int_fetch_and_add(addr, val) \ AO_int_fetch_and_add_read(addr, val) # define AO_HAVE_int_fetch_and_add #endif #if defined(AO_HAVE_int_fetch_and_add_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_int_fetch_and_add_full) # define AO_int_fetch_and_add_full(addr, val) \ (AO_nop_full(), AO_int_fetch_and_add_acquire(addr, val)) #endif #if !defined(AO_HAVE_int_fetch_and_add_release_write) && \ defined(AO_HAVE_int_fetch_and_add_write) # define AO_int_fetch_and_add_release_write(addr, val) \ AO_int_fetch_and_add_write(addr, val) # define AO_HAVE_int_fetch_and_add_release_write #endif #if !defined(AO_HAVE_int_fetch_and_add_release_write) && \ defined(AO_HAVE_int_fetch_and_add_release) # define AO_int_fetch_and_add_release_write(addr, val) \ AO_int_fetch_and_add_release(addr, val) # define AO_HAVE_int_fetch_and_add_release_write #endif #if !defined(AO_HAVE_int_fetch_and_add_acquire_read) && \ defined(AO_HAVE_int_fetch_and_add_read) # define AO_int_fetch_and_add_acquire_read(addr, val) \ AO_int_fetch_and_add_read(addr, val) # define AO_HAVE_int_fetch_and_add_acquire_read #endif #if !defined(AO_HAVE_int_fetch_and_add_acquire_read) && \ defined(AO_HAVE_int_fetch_and_add_acquire) # define AO_int_fetch_and_add_acquire_read(addr, val) \ AO_int_fetch_and_add_acquire(addr, val) # define AO_HAVE_int_fetch_and_add_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_int_fetch_and_add_acquire_read) # define AO_int_fetch_and_add_dd_acquire_read(addr, val) \ AO_int_fetch_and_add_acquire_read(addr, val) # define AO_HAVE_int_fetch_and_add_dd_acquire_read # endif #else # if defined(AO_HAVE_int_fetch_and_add) # define AO_int_fetch_and_add_dd_acquire_read(addr, val) \ AO_int_fetch_and_add(addr, val) # define AO_HAVE_int_fetch_and_add_dd_acquire_read # endif #endif /* int_fetch_and_add1 */ #if defined(AO_HAVE_int_fetch_and_add_full) &&\ !defined(AO_HAVE_int_fetch_and_add1_full) # define AO_int_fetch_and_add1_full(addr) \ AO_int_fetch_and_add_full(addr,1) # define AO_HAVE_int_fetch_and_add1_full #endif #if defined(AO_HAVE_int_fetch_and_add_release) &&\ !defined(AO_HAVE_int_fetch_and_add1_release) # define AO_int_fetch_and_add1_release(addr) \ AO_int_fetch_and_add_release(addr,1) # define AO_HAVE_int_fetch_and_add1_release #endif #if defined(AO_HAVE_int_fetch_and_add_acquire) &&\ !defined(AO_HAVE_int_fetch_and_add1_acquire) # define AO_int_fetch_and_add1_acquire(addr) \ AO_int_fetch_and_add_acquire(addr,1) # define AO_HAVE_int_fetch_and_add1_acquire #endif #if defined(AO_HAVE_int_fetch_and_add_write) &&\ !defined(AO_HAVE_int_fetch_and_add1_write) # define AO_int_fetch_and_add1_write(addr) \ AO_int_fetch_and_add_write(addr,1) # define AO_HAVE_int_fetch_and_add1_write #endif #if defined(AO_HAVE_int_fetch_and_add_read) &&\ !defined(AO_HAVE_int_fetch_and_add1_read) # define AO_int_fetch_and_add1_read(addr) \ AO_int_fetch_and_add_read(addr,1) # define AO_HAVE_int_fetch_and_add1_read #endif #if defined(AO_HAVE_int_fetch_and_add_release_write) &&\ !defined(AO_HAVE_int_fetch_and_add1_release_write) # define AO_int_fetch_and_add1_release_write(addr) \ AO_int_fetch_and_add_release_write(addr,1) # define AO_HAVE_int_fetch_and_add1_release_write #endif #if defined(AO_HAVE_int_fetch_and_add_acquire_read) &&\ !defined(AO_HAVE_int_fetch_and_add1_acquire_read) # define AO_int_fetch_and_add1_acquire_read(addr) \ AO_int_fetch_and_add_acquire_read(addr,1) # define AO_HAVE_int_fetch_and_add1_acquire_read #endif #if defined(AO_HAVE_int_fetch_and_add) &&\ !defined(AO_HAVE_int_fetch_and_add1) # define AO_int_fetch_and_add1(addr) \ AO_int_fetch_and_add(addr,1) # define AO_HAVE_int_fetch_and_add1 #endif #if defined(AO_HAVE_int_fetch_and_add1_full) # if !defined(AO_HAVE_int_fetch_and_add1_release) # define AO_int_fetch_and_add1_release(addr) \ AO_int_fetch_and_add1_full(addr) # define AO_HAVE_int_fetch_and_add1_release # endif # if !defined(AO_HAVE_int_fetch_and_add1_acquire) # define AO_int_fetch_and_add1_acquire(addr) \ AO_int_fetch_and_add1_full(addr) # define AO_HAVE_int_fetch_and_add1_acquire # endif # if !defined(AO_HAVE_int_fetch_and_add1_write) # define AO_int_fetch_and_add1_write(addr) \ AO_int_fetch_and_add1_full(addr) # define AO_HAVE_int_fetch_and_add1_write # endif # if !defined(AO_HAVE_int_fetch_and_add1_read) # define AO_int_fetch_and_add1_read(addr) \ AO_int_fetch_and_add1_full(addr) # define AO_HAVE_int_fetch_and_add1_read # endif #endif /* AO_HAVE_int_fetch_and_add1_full */ #if !defined(AO_HAVE_int_fetch_and_add1) && \ defined(AO_HAVE_int_fetch_and_add1_release) # define AO_int_fetch_and_add1(addr) \ AO_int_fetch_and_add1_release(addr) # define AO_HAVE_int_fetch_and_add1 #endif #if !defined(AO_HAVE_int_fetch_and_add1) && \ defined(AO_HAVE_int_fetch_and_add1_acquire) # define AO_int_fetch_and_add1(addr) \ AO_int_fetch_and_add1_acquire(addr) # define AO_HAVE_int_fetch_and_add1 #endif #if !defined(AO_HAVE_int_fetch_and_add1) && \ defined(AO_HAVE_int_fetch_and_add1_write) # define AO_int_fetch_and_add1(addr) \ AO_int_fetch_and_add1_write(addr) # define AO_HAVE_int_fetch_and_add1 #endif #if !defined(AO_HAVE_int_fetch_and_add1) && \ defined(AO_HAVE_int_fetch_and_add1_read) # define AO_int_fetch_and_add1(addr) \ AO_int_fetch_and_add1_read(addr) # define AO_HAVE_int_fetch_and_add1 #endif #if defined(AO_HAVE_int_fetch_and_add1_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_int_fetch_and_add1_full) # define AO_int_fetch_and_add1_full(addr) \ (AO_nop_full(), AO_int_fetch_and_add1_acquire(addr)) # define AO_HAVE_int_fetch_and_add1_full #endif #if !defined(AO_HAVE_int_fetch_and_add1_release_write) && \ defined(AO_HAVE_int_fetch_and_add1_write) # define AO_int_fetch_and_add1_release_write(addr) \ AO_int_fetch_and_add1_write(addr) # define AO_HAVE_int_fetch_and_add1_release_write #endif #if !defined(AO_HAVE_int_fetch_and_add1_release_write) && \ defined(AO_HAVE_int_fetch_and_add1_release) # define AO_int_fetch_and_add1_release_write(addr) \ AO_int_fetch_and_add1_release(addr) # define AO_HAVE_int_fetch_and_add1_release_write #endif #if !defined(AO_HAVE_int_fetch_and_add1_acquire_read) && \ defined(AO_HAVE_int_fetch_and_add1_read) # define AO_int_fetch_and_add1_acquire_read(addr) \ AO_int_fetch_and_add1_read(addr) # define AO_HAVE_int_fetch_and_add1_acquire_read #endif #if !defined(AO_HAVE_int_fetch_and_add1_acquire_read) && \ defined(AO_HAVE_int_fetch_and_add1_acquire) # define AO_int_fetch_and_add1_acquire_read(addr) \ AO_int_fetch_and_add1_acquire(addr) # define AO_HAVE_int_fetch_and_add1_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_int_fetch_and_add1_acquire_read) # define AO_int_fetch_and_add1_dd_acquire_read(addr) \ AO_int_fetch_and_add1_acquire_read(addr) # define AO_HAVE_int_fetch_and_add1_dd_acquire_read # endif #else # if defined(AO_HAVE_int_fetch_and_add1) # define AO_int_fetch_and_add1_dd_acquire_read(addr) \ AO_int_fetch_and_add1(addr) # define AO_HAVE_int_fetch_and_add1_dd_acquire_read # endif #endif /* int_fetch_and_sub1 */ #if defined(AO_HAVE_int_fetch_and_add_full) &&\ !defined(AO_HAVE_int_fetch_and_sub1_full) # define AO_int_fetch_and_sub1_full(addr) \ AO_int_fetch_and_add_full(addr,(unsigned int)(-1)) # define AO_HAVE_int_fetch_and_sub1_full #endif #if defined(AO_HAVE_int_fetch_and_add_release) &&\ !defined(AO_HAVE_int_fetch_and_sub1_release) # define AO_int_fetch_and_sub1_release(addr) \ AO_int_fetch_and_add_release(addr,(unsigned int)(-1)) # define AO_HAVE_int_fetch_and_sub1_release #endif #if defined(AO_HAVE_int_fetch_and_add_acquire) &&\ !defined(AO_HAVE_int_fetch_and_sub1_acquire) # define AO_int_fetch_and_sub1_acquire(addr) \ AO_int_fetch_and_add_acquire(addr,(unsigned int)(-1)) # define AO_HAVE_int_fetch_and_sub1_acquire #endif #if defined(AO_HAVE_int_fetch_and_add_write) &&\ !defined(AO_HAVE_int_fetch_and_sub1_write) # define AO_int_fetch_and_sub1_write(addr) \ AO_int_fetch_and_add_write(addr,(unsigned int)(-1)) # define AO_HAVE_int_fetch_and_sub1_write #endif #if defined(AO_HAVE_int_fetch_and_add_read) &&\ !defined(AO_HAVE_int_fetch_and_sub1_read) # define AO_int_fetch_and_sub1_read(addr) \ AO_int_fetch_and_add_read(addr,(unsigned int)(-1)) # define AO_HAVE_int_fetch_and_sub1_read #endif #if defined(AO_HAVE_int_fetch_and_add_release_write) &&\ !defined(AO_HAVE_int_fetch_and_sub1_release_write) # define AO_int_fetch_and_sub1_release_write(addr) \ AO_int_fetch_and_add_release_write(addr,(unsigned int)(-1)) # define AO_HAVE_int_fetch_and_sub1_release_write #endif #if defined(AO_HAVE_int_fetch_and_add_acquire_read) &&\ !defined(AO_HAVE_int_fetch_and_sub1_acquire_read) # define AO_int_fetch_and_sub1_acquire_read(addr) \ AO_int_fetch_and_add_acquire_read(addr,(unsigned int)(-1)) # define AO_HAVE_int_fetch_and_sub1_acquire_read #endif #if defined(AO_HAVE_int_fetch_and_add) &&\ !defined(AO_HAVE_int_fetch_and_sub1) # define AO_int_fetch_and_sub1(addr) \ AO_int_fetch_and_add(addr,(unsigned int)(-1)) # define AO_HAVE_int_fetch_and_sub1 #endif #if defined(AO_HAVE_int_fetch_and_sub1_full) # if !defined(AO_HAVE_int_fetch_and_sub1_release) # define AO_int_fetch_and_sub1_release(addr) \ AO_int_fetch_and_sub1_full(addr) # define AO_HAVE_int_fetch_and_sub1_release # endif # if !defined(AO_HAVE_int_fetch_and_sub1_acquire) # define AO_int_fetch_and_sub1_acquire(addr) \ AO_int_fetch_and_sub1_full(addr) # define AO_HAVE_int_fetch_and_sub1_acquire # endif # if !defined(AO_HAVE_int_fetch_and_sub1_write) # define AO_int_fetch_and_sub1_write(addr) \ AO_int_fetch_and_sub1_full(addr) # define AO_HAVE_int_fetch_and_sub1_write # endif # if !defined(AO_HAVE_int_fetch_and_sub1_read) # define AO_int_fetch_and_sub1_read(addr) \ AO_int_fetch_and_sub1_full(addr) # define AO_HAVE_int_fetch_and_sub1_read # endif #endif /* AO_HAVE_int_fetch_and_sub1_full */ #if !defined(AO_HAVE_int_fetch_and_sub1) && \ defined(AO_HAVE_int_fetch_and_sub1_release) # define AO_int_fetch_and_sub1(addr) \ AO_int_fetch_and_sub1_release(addr) # define AO_HAVE_int_fetch_and_sub1 #endif #if !defined(AO_HAVE_int_fetch_and_sub1) && \ defined(AO_HAVE_int_fetch_and_sub1_acquire) # define AO_int_fetch_and_sub1(addr) \ AO_int_fetch_and_sub1_acquire(addr) # define AO_HAVE_int_fetch_and_sub1 #endif #if !defined(AO_HAVE_int_fetch_and_sub1) && \ defined(AO_HAVE_int_fetch_and_sub1_write) # define AO_int_fetch_and_sub1(addr) \ AO_int_fetch_and_sub1_write(addr) # define AO_HAVE_int_fetch_and_sub1 #endif #if !defined(AO_HAVE_int_fetch_and_sub1) && \ defined(AO_HAVE_int_fetch_and_sub1_read) # define AO_int_fetch_and_sub1(addr) \ AO_int_fetch_and_sub1_read(addr) # define AO_HAVE_int_fetch_and_sub1 #endif #if defined(AO_HAVE_int_fetch_and_sub1_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_int_fetch_and_sub1_full) # define AO_int_fetch_and_sub1_full(addr) \ (AO_nop_full(), AO_int_fetch_and_sub1_acquire(addr)) # define AO_HAVE_int_fetch_and_sub1_full #endif #if !defined(AO_HAVE_int_fetch_and_sub1_release_write) && \ defined(AO_HAVE_int_fetch_and_sub1_write) # define AO_int_fetch_and_sub1_release_write(addr) \ AO_int_fetch_and_sub1_write(addr) # define AO_HAVE_int_fetch_and_sub1_release_write #endif #if !defined(AO_HAVE_int_fetch_and_sub1_release_write) && \ defined(AO_HAVE_int_fetch_and_sub1_release) # define AO_int_fetch_and_sub1_release_write(addr) \ AO_int_fetch_and_sub1_release(addr) # define AO_HAVE_int_fetch_and_sub1_release_write #endif #if !defined(AO_HAVE_int_fetch_and_sub1_acquire_read) && \ defined(AO_HAVE_int_fetch_and_sub1_read) # define AO_int_fetch_and_sub1_acquire_read(addr) \ AO_int_fetch_and_sub1_read(addr) # define AO_HAVE_int_fetch_and_sub1_acquire_read #endif #if !defined(AO_HAVE_int_fetch_and_sub1_acquire_read) && \ defined(AO_HAVE_int_fetch_and_sub1_acquire) # define AO_int_fetch_and_sub1_acquire_read(addr) \ AO_int_fetch_and_sub1_acquire(addr) # define AO_HAVE_int_fetch_and_sub1_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_int_fetch_and_sub1_acquire_read) # define AO_int_fetch_and_sub1_dd_acquire_read(addr) \ AO_int_fetch_and_sub1_acquire_read(addr) # define AO_HAVE_int_fetch_and_sub1_dd_acquire_read # endif #else # if defined(AO_HAVE_int_fetch_and_sub1) # define AO_int_fetch_and_sub1_dd_acquire_read(addr) \ AO_int_fetch_and_sub1(addr) # define AO_HAVE_int_fetch_and_sub1_dd_acquire_read # endif #endif ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/generalize.h ================================================ /* * Copyright (c) 2003-2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Generalize atomic operations for atomic_ops.h. * Should not be included directly. * * We make no attempt to define useless operations, such as * AO_nop_acquire * AO_nop_release * * We have also so far neglected to define some others, which * do not appear likely to be useful, e.g. stores with acquire * or read barriers. * * This file is sometimes included twice by atomic_ops.h. * All definitions include explicit checks that we are not replacing * an earlier definition. In general, more desirable expansions * appear earlier so that we are more likely to use them. * * We only make safe generalizations, except that by default we define * the ...dd_acquire_read operations to be equivalent to those without * a barrier. On platforms for which this is unsafe, the platform-specific * file must define AO_NO_DD_ORDERING. */ #ifndef ATOMIC_OPS_H # error Atomic_ops_generalize.h should not be included directly. #endif #if AO_CHAR_TS_T # define AO_TS_COMPARE_AND_SWAP_FULL(a,o,n) \ AO_char_compare_and_swap_full(a,o,n) # define AO_TS_COMPARE_AND_SWAP_ACQUIRE(a,o,n) \ AO_char_compare_and_swap_acquire(a,o,n) # define AO_TS_COMPARE_AND_SWAP_RELEASE(a,o,n) \ AO_char_compare_and_swap_release(a,o,n) # define AO_TS_COMPARE_AND_SWAP(a,o,n) \ AO_char_compare_and_swap(a,o,n) #endif #if AO_AO_TS_T # define AO_TS_COMPARE_AND_SWAP_FULL(a,o,n) \ AO_compare_and_swap_full(a,o,n) # define AO_TS_COMPARE_AND_SWAP_ACQUIRE(a,o,n) \ AO_compare_and_swap_acquire(a,o,n) # define AO_TS_COMPARE_AND_SWAP_RELEASE(a,o,n) \ AO_compare_and_swap_release(a,o,n) # define AO_TS_COMPARE_AND_SWAP(a,o,n) \ AO_compare_and_swap(a,o,n) #endif /* Generate test_and_set_full, if necessary and possible. */ #if !defined(AO_HAVE_test_and_set) && \ !defined(AO_HAVE_test_and_set_release) && \ !defined(AO_HAVE_test_and_set_acquire) && \ !defined(AO_HAVE_test_and_set_read) && \ !defined(AO_HAVE_test_and_set_full) # if AO_AO_TS_T && defined(AO_HAVE_compare_and_swap_full) || \ AO_CHAR_TS_T && defined(AO_HAVE_char_compare_and_swap_full) AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { if (AO_TS_COMPARE_AND_SWAP_FULL(addr, AO_TS_CLEAR, AO_TS_SET)) return AO_TS_CLEAR; else return AO_TS_SET; } # define AO_HAVE_test_and_set_full # endif /* AO_HAVE_compare_and_swap_full */ # if AO_AO_TS_T && defined(AO_HAVE_compare_and_swap_acquire) || \ AO_CHAR_TS_T && defined(AO_HAVE_char_compare_and_swap_acquire) AO_INLINE AO_TS_VAL_t AO_test_and_set_acquire(volatile AO_TS_t *addr) { if (AO_TS_COMPARE_AND_SWAP_ACQUIRE(addr, AO_TS_CLEAR, AO_TS_SET)) return AO_TS_CLEAR; else return AO_TS_SET; } # define AO_HAVE_test_and_set_acquire # endif /* AO_HAVE_compare_and_swap_acquire */ # if AO_AO_TS_T && defined(AO_HAVE_compare_and_swap_release) || \ AO_CHAR_TS_T && defined(AO_HAVE_char_compare_and_swap_release) AO_INLINE AO_TS_VAL_t AO_test_and_set_release(volatile AO_TS_t *addr) { if (AO_TS_COMPARE_AND_SWAP_RELEASE(addr, AO_TS_CLEAR, AO_TS_SET)) return AO_TS_CLEAR; else return AO_TS_SET; } # define AO_HAVE_test_and_set_release # endif /* AO_HAVE_compare_and_swap_release */ # if AO_AO_TS_T && defined(AO_HAVE_compare_and_swap) || \ AO_CHAR_TS_T && defined(AO_HAVE_char_compare_and_swap) AO_INLINE AO_TS_VAL_t AO_test_and_set(volatile AO_TS_t *addr) { if (AO_TS_COMPARE_AND_SWAP(addr, AO_TS_CLEAR, AO_TS_SET)) return AO_TS_CLEAR; else return AO_TS_SET; } # define AO_HAVE_test_and_set # endif /* AO_HAVE_compare_and_swap */ # if defined(AO_HAVE_test_and_set) && defined(AO_HAVE_nop_full) \ && !defined(AO_HAVE_test_and_set_acquire) AO_INLINE AO_TS_VAL_t AO_test_and_set_acquire(volatile AO_TS_t *addr) { AO_TS_VAL_t result = AO_test_and_set(addr); AO_nop_full(); return result; } # define AO_HAVE_test_and_set_acquire # endif #endif /* No prior test and set */ /* Nop */ #if !defined(AO_HAVE_nop) AO_INLINE void AO_nop(void) {} # define AO_HAVE_nop #endif #if defined(AO_HAVE_test_and_set_full) && !defined(AO_HAVE_nop_full) AO_INLINE void AO_nop_full(void) { AO_TS_t dummy = AO_TS_INITIALIZER; AO_test_and_set_full(&dummy); } # define AO_HAVE_nop_full #endif #if defined(AO_HAVE_nop_acquire) # error AO_nop_acquire is useless: dont define. #endif #if defined(AO_HAVE_nop_release) # error AO_nop_release is useless: dont define. #endif #if defined(AO_HAVE_nop_full) && !defined(AO_HAVE_nop_read) # define AO_nop_read() AO_nop_full() # define AO_HAVE_nop_read #endif #if defined(AO_HAVE_nop_full) && !defined(AO_HAVE_nop_write) # define AO_nop_write() AO_nop_full() # define AO_HAVE_nop_write #endif /* Load */ #if defined(AO_HAVE_load_full) && !defined(AO_HAVE_load_acquire) # define AO_load_acquire(addr) AO_load_full(addr) # define AO_HAVE_load_acquire #endif #if defined(AO_HAVE_load_acquire) && !defined(AO_HAVE_load) # define AO_load(addr) AO_load_acquire(addr) # define AO_HAVE_load #endif #if defined(AO_HAVE_load_full) && !defined(AO_HAVE_load_read) # define AO_load_read(addr) AO_load_full(addr) # define AO_HAVE_load_read #endif #if !defined(AO_HAVE_load_acquire_read) && defined(AO_HAVE_load_acquire) # define AO_load_acquire_read(addr) AO_load_acquire(addr) # define AO_HAVE_load_acquire_read #endif #if defined(AO_HAVE_load) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_load_acquire) AO_INLINE AO_t AO_load_acquire(const volatile AO_t *addr) { AO_t result = AO_load(addr); /* Acquire barrier would be useless, since the load could be delayed */ /* beyond it. */ AO_nop_full(); return result; } # define AO_HAVE_load_acquire #endif #if defined(AO_HAVE_load) && defined(AO_HAVE_nop_read) && \ !defined(AO_HAVE_load_read) AO_INLINE AO_t AO_load_read(const volatile AO_t *addr) { AO_t result = AO_load(addr); /* Acquire barrier would be useless, since the load could be delayed */ /* beyond it. */ AO_nop_read(); return result; } # define AO_HAVE_load_read #endif #if defined(AO_HAVE_load_acquire) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_load_full) # define AO_load_full(addr) (AO_nop_full(), AO_load_acquire(addr)) # define AO_HAVE_load_full #endif #if !defined(AO_HAVE_load_acquire_read) && defined(AO_HAVE_load_read) # define AO_load_acquire_read(addr) AO_load_read(addr) # define AO_HAVE_load_acquire_read #endif #if defined(AO_HAVE_load_acquire_read) && !defined(AO_HAVE_load) # define AO_load(addr) AO_load_acquire_read(addr) # define AO_HAVE_load #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_load_acquire_read) # define AO_load_dd_acquire_read(addr) AO_load_acquire_read(addr) # define AO_HAVE_load_dd_acquire_read # endif #else # if defined(AO_HAVE_load) # define AO_load_dd_acquire_read(addr) AO_load(addr) # define AO_HAVE_load_dd_acquire_read # endif #endif /* Store */ #if defined(AO_HAVE_store_full) && !defined(AO_HAVE_store_release) # define AO_store_release(addr,val) AO_store_full(addr,val) # define AO_HAVE_store_release #endif #if defined(AO_HAVE_store_release) && !defined(AO_HAVE_store) # define AO_store(addr, val) AO_store_release(addr,val) # define AO_HAVE_store #endif #if defined(AO_HAVE_store_full) && !defined(AO_HAVE_store_write) # define AO_store_write(addr,val) AO_store_full(addr,val) # define AO_HAVE_store_write #endif #if defined(AO_HAVE_store_release) && !defined(AO_HAVE_store_release_write) # define AO_store_release_write(addr, val) AO_store_release(addr,val) # define AO_HAVE_store_release_write #endif #if defined(AO_HAVE_store_write) && !defined(AO_HAVE_store) # define AO_store(addr, val) AO_store_write(addr,val) # define AO_HAVE_store #endif #if defined(AO_HAVE_store) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_store_release) # define AO_store_release(addr,val) (AO_nop_full(), AO_store(addr,val)) # define AO_HAVE_store_release #endif #if defined(AO_HAVE_nop_write) && defined(AO_HAVE_store) && \ !defined(AO_HAVE_store_write) # define AO_store_write(addr, val) (AO_nop_write(), AO_store(addr,val)) # define AO_HAVE_store_write #endif #if defined(AO_HAVE_store_write) && !defined(AO_HAVE_store_release_write) # define AO_store_release_write(addr, val) AO_store_write(addr,val) # define AO_HAVE_store_release_write #endif #if defined(AO_HAVE_store_release) && defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_store_full) # define AO_store_full(addr, val) (AO_store_release(addr, val), AO_nop_full()) # define AO_HAVE_store_full #endif /* NEC LE-IT: Test and set */ #if defined(AO_HAVE_test_and_set) && \ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_test_and_set_release) # define AO_test_and_set_release(addr) \ (AO_nop_full(), AO_test_and_set(addr)) # define AO_HAVE_test_and_set_release #endif #if defined(AO_HAVE_test_and_set) && \ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_test_and_set_acquire) AO_INLINE AO_TS_t AO_test_and_set_acquire(volatile AO_TS_t *addr) { AO_TS_t res = AO_test_and_set(addr); AO_nop_full(); return res; } # define AO_HAVE_test_and_set_acquire #endif /* Fetch_and_add */ /* We first try to implement fetch_and_add variants in terms */ /* of the corresponding compare_and_swap variants to minimize */ /* adding barriers. */ #if defined(AO_HAVE_compare_and_swap_full) && \ !defined(AO_HAVE_fetch_and_add_full) AO_INLINE AO_t AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) { AO_t old; do { old = *addr; } while (!AO_compare_and_swap_full(addr, old, old+incr)); return old; } # define AO_HAVE_fetch_and_add_full #endif #if defined(AO_HAVE_compare_and_swap_acquire) && \ !defined(AO_HAVE_fetch_and_add_acquire) AO_INLINE AO_t AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) { AO_t old; do { old = *addr; } while (!AO_compare_and_swap_acquire(addr, old, old+incr)); return old; } # define AO_HAVE_fetch_and_add_acquire #endif #if defined(AO_HAVE_compare_and_swap_release) && \ !defined(AO_HAVE_fetch_and_add_release) AO_INLINE AO_t AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) { AO_t old; do { old = *addr; } while (!AO_compare_and_swap_release(addr, old, old+incr)); return old; } # define AO_HAVE_fetch_and_add_release #endif #if defined(AO_HAVE_compare_and_swap) && \ !defined(AO_HAVE_fetch_and_add) AO_INLINE AO_t AO_fetch_and_add(volatile AO_t *addr, AO_t incr) { AO_t old; do { old = *addr; } while (!AO_compare_and_swap(addr, old, old+incr)); return old; } # define AO_HAVE_fetch_and_add #endif #if defined(AO_HAVE_fetch_and_add_full) # if !defined(AO_HAVE_fetch_and_add_release) # define AO_fetch_and_add_release(addr, val) \ AO_fetch_and_add_full(addr, val) # define AO_HAVE_fetch_and_add_release # endif # if !defined(AO_HAVE_fetch_and_add_acquire) # define AO_fetch_and_add_acquire(addr, val) \ AO_fetch_and_add_full(addr, val) # define AO_HAVE_fetch_and_add_acquire # endif # if !defined(AO_HAVE_fetch_and_add_write) # define AO_fetch_and_add_write(addr, val) \ AO_fetch_and_add_full(addr, val) # define AO_HAVE_fetch_and_add_write # endif # if !defined(AO_HAVE_fetch_and_add_read) # define AO_fetch_and_add_read(addr, val) \ AO_fetch_and_add_full(addr, val) # define AO_HAVE_fetch_and_add_read # endif #endif /* AO_HAVE_fetch_and_add_full */ #if !defined(AO_HAVE_fetch_and_add) && \ defined(AO_HAVE_fetch_and_add_release) # define AO_fetch_and_add(addr, val) \ AO_fetch_and_add_release(addr, val) # define AO_HAVE_fetch_and_add #endif #if !defined(AO_HAVE_fetch_and_add) && \ defined(AO_HAVE_fetch_and_add_acquire) # define AO_fetch_and_add(addr, val) \ AO_fetch_and_add_acquire(addr, val) # define AO_HAVE_fetch_and_add #endif #if !defined(AO_HAVE_fetch_and_add) && \ defined(AO_HAVE_fetch_and_add_write) # define AO_fetch_and_add(addr, val) \ AO_fetch_and_add_write(addr, val) # define AO_HAVE_fetch_and_add #endif #if !defined(AO_HAVE_fetch_and_add) && \ defined(AO_HAVE_fetch_and_add_read) # define AO_fetch_and_add(addr, val) \ AO_fetch_and_add_read(addr, val) # define AO_HAVE_fetch_and_add #endif #if defined(AO_HAVE_fetch_and_add_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_fetch_and_add_full) # define AO_fetch_and_add_full(addr, val) \ (AO_nop_full(), AO_fetch_and_add_acquire(addr, val)) # define AO_HAVE_fetch_and_add_full #endif #if !defined(AO_HAVE_fetch_and_add_release_write) && \ defined(AO_HAVE_fetch_and_add_write) # define AO_fetch_and_add_release_write(addr, val) \ AO_fetch_and_add_write(addr, val) # define AO_HAVE_fetch_and_add_release_write #endif #if !defined(AO_HAVE_fetch_and_add_release_write) && \ defined(AO_HAVE_fetch_and_add_release) # define AO_fetch_and_add_release_write(addr, val) \ AO_fetch_and_add_release(addr, val) # define AO_HAVE_fetch_and_add_release_write #endif #if !defined(AO_HAVE_fetch_and_add_acquire_read) && \ defined(AO_HAVE_fetch_and_add_read) # define AO_fetch_and_add_acquire_read(addr, val) \ AO_fetch_and_add_read(addr, val) # define AO_HAVE_fetch_and_add_acquire_read #endif #if !defined(AO_HAVE_fetch_and_add_acquire_read) && \ defined(AO_HAVE_fetch_and_add_acquire) # define AO_fetch_and_add_acquire_read(addr, val) \ AO_fetch_and_add_acquire(addr, val) # define AO_HAVE_fetch_and_add_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_fetch_and_add_acquire_read) # define AO_fetch_and_add_dd_acquire_read(addr, val) \ AO_fetch_and_add_acquire_read(addr, val) # define AO_HAVE_fetch_and_add_dd_acquire_read # endif #else # if defined(AO_HAVE_fetch_and_add) # define AO_fetch_and_add_dd_acquire_read(addr, val) \ AO_fetch_and_add(addr, val) # define AO_HAVE_fetch_and_add_dd_acquire_read # endif #endif /* Fetch_and_add1 */ #if defined(AO_HAVE_fetch_and_add_full) &&\ !defined(AO_HAVE_fetch_and_add1_full) # define AO_fetch_and_add1_full(addr) AO_fetch_and_add_full(addr,1) # define AO_HAVE_fetch_and_add1_full #endif #if defined(AO_HAVE_fetch_and_add_release) &&\ !defined(AO_HAVE_fetch_and_add1_release) # define AO_fetch_and_add1_release(addr) AO_fetch_and_add_release(addr,1) # define AO_HAVE_fetch_and_add1_release #endif #if defined(AO_HAVE_fetch_and_add_acquire) &&\ !defined(AO_HAVE_fetch_and_add1_acquire) # define AO_fetch_and_add1_acquire(addr) AO_fetch_and_add_acquire(addr,1) # define AO_HAVE_fetch_and_add1_acquire #endif #if defined(AO_HAVE_fetch_and_add_write) &&\ !defined(AO_HAVE_fetch_and_add1_write) # define AO_fetch_and_add1_write(addr) AO_fetch_and_add_write(addr,1) # define AO_HAVE_fetch_and_add1_write #endif #if defined(AO_HAVE_fetch_and_add_read) &&\ !defined(AO_HAVE_fetch_and_add1_read) # define AO_fetch_and_add1_read(addr) AO_fetch_and_add_read(addr,1) # define AO_HAVE_fetch_and_add1_read #endif #if defined(AO_HAVE_fetch_and_add_release_write) &&\ !defined(AO_HAVE_fetch_and_add1_release_write) # define AO_fetch_and_add1_release_write(addr) \ AO_fetch_and_add_release_write(addr,1) # define AO_HAVE_fetch_and_add1_release_write #endif #if defined(AO_HAVE_fetch_and_add_acquire_read) &&\ !defined(AO_HAVE_fetch_and_add1_acquire_read) # define AO_fetch_and_add1_acquire_read(addr) \ AO_fetch_and_add_acquire_read(addr,1) # define AO_HAVE_fetch_and_add1_acquire_read #endif #if defined(AO_HAVE_fetch_and_add) &&\ !defined(AO_HAVE_fetch_and_add1) # define AO_fetch_and_add1(addr) \ AO_fetch_and_add(addr,1) # define AO_HAVE_fetch_and_add1 #endif #if defined(AO_HAVE_fetch_and_add1_full) # if !defined(AO_HAVE_fetch_and_add1_release) # define AO_fetch_and_add1_release(addr) \ AO_fetch_and_add1_full(addr) # define AO_HAVE_fetch_and_add1_release # endif # if !defined(AO_HAVE_fetch_and_add1_acquire) # define AO_fetch_and_add1_acquire(addr) \ AO_fetch_and_add1_full(addr) # define AO_HAVE_fetch_and_add1_acquire # endif # if !defined(AO_HAVE_fetch_and_add1_write) # define AO_fetch_and_add1_write(addr) \ AO_fetch_and_add1_full(addr) # define AO_HAVE_fetch_and_add1_write # endif # if !defined(AO_HAVE_fetch_and_add1_read) # define AO_fetch_and_add1_read(addr) \ AO_fetch_and_add1_full(addr) # define AO_HAVE_fetch_and_add1_read # endif #endif /* AO_HAVE_fetch_and_add1_full */ #if !defined(AO_HAVE_fetch_and_add1) && \ defined(AO_HAVE_fetch_and_add1_release) # define AO_fetch_and_add1(addr) \ AO_fetch_and_add1_release(addr) # define AO_HAVE_fetch_and_add1 #endif #if !defined(AO_HAVE_fetch_and_add1) && \ defined(AO_HAVE_fetch_and_add1_acquire) # define AO_fetch_and_add1(addr) \ AO_fetch_and_add1_acquire(addr) # define AO_HAVE_fetch_and_add1 #endif #if !defined(AO_HAVE_fetch_and_add1) && \ defined(AO_HAVE_fetch_and_add1_write) # define AO_fetch_and_add1(addr) \ AO_fetch_and_add1_write(addr) # define AO_HAVE_fetch_and_add1 #endif #if !defined(AO_HAVE_fetch_and_add1) && \ defined(AO_HAVE_fetch_and_add1_read) # define AO_fetch_and_add1(addr) \ AO_fetch_and_add1_read(addr) # define AO_HAVE_fetch_and_add1 #endif #if defined(AO_HAVE_fetch_and_add1_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_fetch_and_add1_full) # define AO_fetch_and_add1_full(addr) \ (AO_nop_full(), AO_fetch_and_add1_acquire(addr)) # define AO_HAVE_fetch_and_add1_full #endif #if !defined(AO_HAVE_fetch_and_add1_release_write) && \ defined(AO_HAVE_fetch_and_add1_write) # define AO_fetch_and_add1_release_write(addr) \ AO_fetch_and_add1_write(addr) # define AO_HAVE_fetch_and_add1_release_write #endif #if !defined(AO_HAVE_fetch_and_add1_release_write) && \ defined(AO_HAVE_fetch_and_add1_release) # define AO_fetch_and_add1_release_write(addr) \ AO_fetch_and_add1_release(addr) # define AO_HAVE_fetch_and_add1_release_write #endif #if !defined(AO_HAVE_fetch_and_add1_acquire_read) && \ defined(AO_HAVE_fetch_and_add1_read) # define AO_fetch_and_add1_acquire_read(addr) \ AO_fetch_and_add1_read(addr) # define AO_HAVE_fetch_and_add1_acquire_read #endif #if !defined(AO_HAVE_fetch_and_add1_acquire_read) && \ defined(AO_HAVE_fetch_and_add1_acquire) # define AO_fetch_and_add1_acquire_read(addr) \ AO_fetch_and_add1_acquire(addr) # define AO_HAVE_fetch_and_add1_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_fetch_and_add1_acquire_read) # define AO_fetch_and_add1_dd_acquire_read(addr) \ AO_fetch_and_add1_acquire_read(addr) # define AO_HAVE_fetch_and_add1_dd_acquire_read # endif #else # if defined(AO_HAVE_fetch_and_add1) # define AO_fetch_and_add1_dd_acquire_read(addr) AO_fetch_and_add1(addr) # define AO_HAVE_fetch_and_add1_dd_acquire_read # endif #endif /* Fetch_and_sub1 */ #if defined(AO_HAVE_fetch_and_add_full) &&\ !defined(AO_HAVE_fetch_and_sub1_full) # define AO_fetch_and_sub1_full(addr) AO_fetch_and_add_full(addr,(AO_t)(-1)) # define AO_HAVE_fetch_and_sub1_full #endif #if defined(AO_HAVE_fetch_and_add_release) &&\ !defined(AO_HAVE_fetch_and_sub1_release) # define AO_fetch_and_sub1_release(addr) \ AO_fetch_and_add_release(addr,(AO_t)(-1)) # define AO_HAVE_fetch_and_sub1_release #endif #if defined(AO_HAVE_fetch_and_add_acquire) &&\ !defined(AO_HAVE_fetch_and_sub1_acquire) # define AO_fetch_and_sub1_acquire(addr) \ AO_fetch_and_add_acquire(addr,(AO_t)(-1)) # define AO_HAVE_fetch_and_sub1_acquire #endif #if defined(AO_HAVE_fetch_and_add_write) &&\ !defined(AO_HAVE_fetch_and_sub1_write) # define AO_fetch_and_sub1_write(addr) \ AO_fetch_and_add_write(addr,(AO_t)(-1)) # define AO_HAVE_fetch_and_sub1_write #endif #if defined(AO_HAVE_fetch_and_add_read) &&\ !defined(AO_HAVE_fetch_and_sub1_read) # define AO_fetch_and_sub1_read(addr) \ AO_fetch_and_add_read(addr,(AO_t)(-1)) # define AO_HAVE_fetch_and_sub1_read #endif #if defined(AO_HAVE_fetch_and_add_release_write) &&\ !defined(AO_HAVE_fetch_and_sub1_release_write) # define AO_fetch_and_sub1_release_write(addr) \ AO_fetch_and_add_release_write(addr,(AO_t)(-1)) # define AO_HAVE_fetch_and_sub1_release_write #endif #if defined(AO_HAVE_fetch_and_add_acquire_read) &&\ !defined(AO_HAVE_fetch_and_sub1_acquire_read) # define AO_fetch_and_sub1_acquire_read(addr) \ AO_fetch_and_add_acquire_read(addr,(AO_t)(-1)) # define AO_HAVE_fetch_and_sub1_acquire_read #endif #if defined(AO_HAVE_fetch_and_add) &&\ !defined(AO_HAVE_fetch_and_sub1) # define AO_fetch_and_sub1(addr) \ AO_fetch_and_add(addr,(AO_t)(-1)) # define AO_HAVE_fetch_and_sub1 #endif #if defined(AO_HAVE_fetch_and_sub1_full) # if !defined(AO_HAVE_fetch_and_sub1_release) # define AO_fetch_and_sub1_release(addr) \ AO_fetch_and_sub1_full(addr) # define AO_HAVE_fetch_and_sub1_release # endif # if !defined(AO_HAVE_fetch_and_sub1_acquire) # define AO_fetch_and_sub1_acquire(addr) \ AO_fetch_and_sub1_full(addr) # define AO_HAVE_fetch_and_sub1_acquire # endif # if !defined(AO_HAVE_fetch_and_sub1_write) # define AO_fetch_and_sub1_write(addr) \ AO_fetch_and_sub1_full(addr) # define AO_HAVE_fetch_and_sub1_write # endif # if !defined(AO_HAVE_fetch_and_sub1_read) # define AO_fetch_and_sub1_read(addr) \ AO_fetch_and_sub1_full(addr) # define AO_HAVE_fetch_and_sub1_read # endif #endif /* AO_HAVE_fetch_and_sub1_full */ #if !defined(AO_HAVE_fetch_and_sub1) && \ defined(AO_HAVE_fetch_and_sub1_release) # define AO_fetch_and_sub1(addr) \ AO_fetch_and_sub1_release(addr) # define AO_HAVE_fetch_and_sub1 #endif #if !defined(AO_HAVE_fetch_and_sub1) && \ defined(AO_HAVE_fetch_and_sub1_acquire) # define AO_fetch_and_sub1(addr) \ AO_fetch_and_sub1_acquire(addr) # define AO_HAVE_fetch_and_sub1 #endif #if !defined(AO_HAVE_fetch_and_sub1) && \ defined(AO_HAVE_fetch_and_sub1_write) # define AO_fetch_and_sub1(addr) \ AO_fetch_and_sub1_write(addr) # define AO_HAVE_fetch_and_sub1 #endif #if !defined(AO_HAVE_fetch_and_sub1) && \ defined(AO_HAVE_fetch_and_sub1_read) # define AO_fetch_and_sub1(addr) \ AO_fetch_and_sub1_read(addr) # define AO_HAVE_fetch_and_sub1 #endif #if defined(AO_HAVE_fetch_and_sub1_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_fetch_and_sub1_full) # define AO_fetch_and_sub1_full(addr) \ (AO_nop_full(), AO_fetch_and_sub1_acquire(addr)) # define AO_HAVE_fetch_and_sub1_full #endif #if !defined(AO_HAVE_fetch_and_sub1_release_write) && \ defined(AO_HAVE_fetch_and_sub1_write) # define AO_fetch_and_sub1_release_write(addr) \ AO_fetch_and_sub1_write(addr) # define AO_HAVE_fetch_and_sub1_release_write #endif #if !defined(AO_HAVE_fetch_and_sub1_release_write) && \ defined(AO_HAVE_fetch_and_sub1_release) # define AO_fetch_and_sub1_release_write(addr) \ AO_fetch_and_sub1_release(addr) # define AO_HAVE_fetch_and_sub1_release_write #endif #if !defined(AO_HAVE_fetch_and_sub1_acquire_read) && \ defined(AO_HAVE_fetch_and_sub1_read) # define AO_fetch_and_sub1_acquire_read(addr) \ AO_fetch_and_sub1_read(addr) # define AO_HAVE_fetch_and_sub1_acquire_read #endif #if !defined(AO_HAVE_fetch_and_sub1_acquire_read) && \ defined(AO_HAVE_fetch_and_sub1_acquire) # define AO_fetch_and_sub1_acquire_read(addr) \ AO_fetch_and_sub1_acquire(addr) # define AO_HAVE_fetch_and_sub1_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_fetch_and_sub1_acquire_read) # define AO_fetch_and_sub1_dd_acquire_read(addr) \ AO_fetch_and_sub1_acquire_read(addr) # define AO_HAVE_fetch_and_sub1_dd_acquire_read # endif #else # if defined(AO_HAVE_fetch_and_sub1) # define AO_fetch_and_sub1_dd_acquire_read(addr) AO_fetch_and_sub1(addr) # define AO_HAVE_fetch_and_sub1_dd_acquire_read # endif #endif /* Atomic or */ #if defined(AO_HAVE_compare_and_swap_full) && \ !defined(AO_HAVE_or_full) AO_INLINE void AO_or_full(volatile AO_t *addr, AO_t incr) { AO_t old; do { old = *addr; } while (!AO_compare_and_swap_full(addr, old, (old | incr))); } # define AO_HAVE_or_full #endif #if defined(AO_HAVE_or_full) # if !defined(AO_HAVE_or_release) # define AO_or_release(addr, val) \ AO_or_full(addr, val) # define AO_HAVE_or_release # endif # if !defined(AO_HAVE_or_acquire) # define AO_or_acquire(addr, val) \ AO_or_full(addr, val) # define AO_HAVE_or_acquire # endif # if !defined(AO_HAVE_or_write) # define AO_or_write(addr, val) \ AO_or_full(addr, val) # define AO_HAVE_or_write # endif # if !defined(AO_HAVE_or_read) # define AO_or_read(addr, val) \ AO_or_full(addr, val) # define AO_HAVE_or_read # endif #endif /* AO_HAVE_or_full */ #if !defined(AO_HAVE_or) && \ defined(AO_HAVE_or_release) # define AO_or(addr, val) \ AO_or_release(addr, val) # define AO_HAVE_or #endif #if !defined(AO_HAVE_or) && \ defined(AO_HAVE_or_acquire) # define AO_or(addr, val) \ AO_or_acquire(addr, val) # define AO_HAVE_or #endif #if !defined(AO_HAVE_or) && \ defined(AO_HAVE_or_write) # define AO_or(addr, val) \ AO_or_write(addr, val) # define AO_HAVE_or #endif #if !defined(AO_HAVE_or) && \ defined(AO_HAVE_or_read) # define AO_or(addr, val) \ AO_or_read(addr, val) # define AO_HAVE_or #endif #if defined(AO_HAVE_or_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_or_full) # define AO_or_full(addr, val) \ (AO_nop_full(), AO_or_acquire(addr, val)) #endif #if !defined(AO_HAVE_or_release_write) && \ defined(AO_HAVE_or_write) # define AO_or_release_write(addr, val) \ AO_or_write(addr, val) # define AO_HAVE_or_release_write #endif #if !defined(AO_HAVE_or_release_write) && \ defined(AO_HAVE_or_release) # define AO_or_release_write(addr, val) \ AO_or_release(addr, val) # define AO_HAVE_or_release_write #endif #if !defined(AO_HAVE_or_acquire_read) && \ defined(AO_HAVE_or_read) # define AO_or_acquire_read(addr, val) \ AO_or_read(addr, val) # define AO_HAVE_or_acquire_read #endif #if !defined(AO_HAVE_or_acquire_read) && \ defined(AO_HAVE_or_acquire) # define AO_or_acquire_read(addr, val) \ AO_or_acquire(addr, val) # define AO_HAVE_or_acquire_read #endif /* dd_aquire_read is meaningless. */ /* Test_and_set */ #if defined(AO_HAVE_test_and_set_full) # if !defined(AO_HAVE_test_and_set_release) # define AO_test_and_set_release(addr) \ AO_test_and_set_full(addr) # define AO_HAVE_test_and_set_release # endif # if !defined(AO_HAVE_test_and_set_acquire) # define AO_test_and_set_acquire(addr) \ AO_test_and_set_full(addr) # define AO_HAVE_test_and_set_acquire # endif # if !defined(AO_HAVE_test_and_set_write) # define AO_test_and_set_write(addr) \ AO_test_and_set_full(addr) # define AO_HAVE_test_and_set_write # endif # if !defined(AO_HAVE_test_and_set_read) # define AO_test_and_set_read(addr) \ AO_test_and_set_full(addr) # define AO_HAVE_test_and_set_read # endif #endif /* AO_HAVE_test_and_set_full */ #if !defined(AO_HAVE_test_and_set) && \ defined(AO_HAVE_test_and_set_release) # define AO_test_and_set(addr) \ AO_test_and_set_release(addr) # define AO_HAVE_test_and_set #endif #if !defined(AO_HAVE_test_and_set) && \ defined(AO_HAVE_test_and_set_acquire) # define AO_test_and_set(addr) \ AO_test_and_set_acquire(addr) # define AO_HAVE_test_and_set #endif #if !defined(AO_HAVE_test_and_set) && \ defined(AO_HAVE_test_and_set_write) # define AO_test_and_set(addr) \ AO_test_and_set_write(addr) # define AO_HAVE_test_and_set #endif #if !defined(AO_HAVE_test_and_set) && \ defined(AO_HAVE_test_and_set_read) # define AO_test_and_set(addr) \ AO_test_and_set_read(addr) # define AO_HAVE_test_and_set #endif #if defined(AO_HAVE_test_and_set_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_test_and_set_full) # define AO_test_and_set_full(addr) \ (AO_nop_full(), AO_test_and_set_acquire(addr)) # define AO_HAVE_test_and_set_full #endif #if !defined(AO_HAVE_test_and_set_release_write) && \ defined(AO_HAVE_test_and_set_write) # define AO_test_and_set_release_write(addr) \ AO_test_and_set_write(addr) # define AO_HAVE_test_and_set_release_write #endif #if !defined(AO_HAVE_test_and_set_release_write) && \ defined(AO_HAVE_test_and_set_release) # define AO_test_and_set_release_write(addr) \ AO_test_and_set_release(addr) # define AO_HAVE_test_and_set_release_write #endif #if !defined(AO_HAVE_test_and_set_acquire_read) && \ defined(AO_HAVE_test_and_set_read) # define AO_test_and_set_acquire_read(addr) \ AO_test_and_set_read(addr) # define AO_HAVE_test_and_set_acquire_read #endif #if !defined(AO_HAVE_test_and_set_acquire_read) && \ defined(AO_HAVE_test_and_set_acquire) # define AO_test_and_set_acquire_read(addr) \ AO_test_and_set_acquire(addr) # define AO_HAVE_test_and_set_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_test_and_set_acquire_read) # define AO_test_and_set_dd_acquire_read(addr) \ AO_test_and_set_acquire_read(addr) # define AO_HAVE_test_and_set_dd_acquire_read # endif #else # if defined(AO_HAVE_test_and_set) # define AO_test_and_set_dd_acquire_read(addr) AO_test_and_set(addr) # define AO_HAVE_test_and_set_dd_acquire_read # endif #endif /* Compare_and_swap */ #if defined(AO_HAVE_compare_and_swap) && defined(AO_HAVE_nop_full)\ && !defined(AO_HAVE_compare_and_swap_acquire) AO_INLINE int AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) { int result = AO_compare_and_swap(addr, old, new_val); AO_nop_full(); return result; } # define AO_HAVE_compare_and_swap_acquire #endif #if defined(AO_HAVE_compare_and_swap) && defined(AO_HAVE_nop_full)\ && !defined(AO_HAVE_compare_and_swap_release) # define AO_compare_and_swap_release(addr, old, new_val) \ (AO_nop_full(), AO_compare_and_swap(addr, old, new_val)) # define AO_HAVE_compare_and_swap_release #endif #if defined(AO_HAVE_compare_and_swap_full) # if !defined(AO_HAVE_compare_and_swap_release) # define AO_compare_and_swap_release(addr, old, new_val) \ AO_compare_and_swap_full(addr, old, new_val) # define AO_HAVE_compare_and_swap_release # endif # if !defined(AO_HAVE_compare_and_swap_acquire) # define AO_compare_and_swap_acquire(addr, old, new_val) \ AO_compare_and_swap_full(addr, old, new_val) # define AO_HAVE_compare_and_swap_acquire # endif # if !defined(AO_HAVE_compare_and_swap_write) # define AO_compare_and_swap_write(addr, old, new_val) \ AO_compare_and_swap_full(addr, old, new_val) # define AO_HAVE_compare_and_swap_write # endif # if !defined(AO_HAVE_compare_and_swap_read) # define AO_compare_and_swap_read(addr, old, new_val) \ AO_compare_and_swap_full(addr, old, new_val) # define AO_HAVE_compare_and_swap_read # endif #endif /* AO_HAVE_compare_and_swap_full */ #if !defined(AO_HAVE_compare_and_swap) && \ defined(AO_HAVE_compare_and_swap_release) # define AO_compare_and_swap(addr, old, new_val) \ AO_compare_and_swap_release(addr, old, new_val) # define AO_HAVE_compare_and_swap #endif #if !defined(AO_HAVE_compare_and_swap) && \ defined(AO_HAVE_compare_and_swap_acquire) # define AO_compare_and_swap(addr, old, new_val) \ AO_compare_and_swap_acquire(addr, old, new_val) # define AO_HAVE_compare_and_swap #endif #if !defined(AO_HAVE_compare_and_swap) && \ defined(AO_HAVE_compare_and_swap_write) # define AO_compare_and_swap(addr, old, new_val) \ AO_compare_and_swap_write(addr, old, new_val) # define AO_HAVE_compare_and_swap #endif #if !defined(AO_HAVE_compare_and_swap) && \ defined(AO_HAVE_compare_and_swap_read) # define AO_compare_and_swap(addr, old, new_val) \ AO_compare_and_swap_read(addr, old, new_val) # define AO_HAVE_compare_and_swap #endif #if defined(AO_HAVE_compare_and_swap_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_compare_and_swap_full) # define AO_compare_and_swap_full(addr, old, new_val) \ (AO_nop_full(), AO_compare_and_swap_acquire(addr, old, new_val)) # define AO_HAVE_compare_and_swap_full #endif #if !defined(AO_HAVE_compare_and_swap_release_write) && \ defined(AO_HAVE_compare_and_swap_write) # define AO_compare_and_swap_release_write(addr, old, new_val) \ AO_compare_and_swap_write(addr, old, new_val) # define AO_HAVE_compare_and_swap_release_write #endif #if !defined(AO_HAVE_compare_and_swap_release_write) && \ defined(AO_HAVE_compare_and_swap_release) # define AO_compare_and_swap_release_write(addr, old, new_val) \ AO_compare_and_swap_release(addr, old, new_val) # define AO_HAVE_compare_and_swap_release_write #endif #if !defined(AO_HAVE_compare_and_swap_acquire_read) && \ defined(AO_HAVE_compare_and_swap_read) # define AO_compare_and_swap_acquire_read(addr, old, new_val) \ AO_compare_and_swap_read(addr, old, new_val) # define AO_HAVE_compare_and_swap_acquire_read #endif #if !defined(AO_HAVE_compare_and_swap_acquire_read) && \ defined(AO_HAVE_compare_and_swap_acquire) # define AO_compare_and_swap_acquire_read(addr, old, new_val) \ AO_compare_and_swap_acquire(addr, old, new_val) # define AO_HAVE_compare_and_swap_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_compare_and_swap_acquire_read) # define AO_compare_and_swap_dd_acquire_read(addr, old, new_val) \ AO_compare_and_swap_acquire_read(addr, old, new_val) # define AO_HAVE_compare_and_swap_dd_acquire_read # endif #else # if defined(AO_HAVE_compare_and_swap) # define AO_compare_and_swap_dd_acquire_read(addr, old, new_val) \ AO_compare_and_swap(addr, old, new_val) # define AO_HAVE_compare_and_swap_dd_acquire_read # endif #endif #include "generalize-small.h" /* Compare_double_and_swap_double */ #if defined(AO_HAVE_compare_double_and_swap_double) && defined(AO_HAVE_nop_full)\ && !defined(AO_HAVE_compare_double_and_swap_double_acquire) AO_INLINE int AO_compare_double_and_swap_double_acquire(volatile AO_double_t *addr, AO_t o1, AO_t o2, AO_t n1, AO_t n2) { int result = AO_compare_double_and_swap_double(addr, o1, o2, n1, n2); AO_nop_full(); return result; } # define AO_HAVE_compare_double_and_swap_double_acquire #endif #if defined(AO_HAVE_compare_double_and_swap_double) \ && defined(AO_HAVE_nop_full)\ && !defined(AO_HAVE_compare_double_and_swap_double_release) # define AO_compare_double_and_swap_double_release(addr, o1, o2, n1, n2) \ (AO_nop_full(), AO_compare_double_and_swap_double(addr, o1, o2, n1, n2)) # define AO_HAVE_compare_double_and_swap_double_release #endif #if defined(AO_HAVE_compare_double_and_swap_double_full) # if !defined(AO_HAVE_compare_double_and_swap_double_release) # define AO_compare_double_and_swap_double_release(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_full(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double_release # endif # if !defined(AO_HAVE_compare_double_and_swap_double_acquire) # define AO_compare_double_and_swap_double_acquire(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_full(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double_acquire # endif # if !defined(AO_HAVE_compare_double_and_swap_double_write) # define AO_compare_double_and_swap_double_write(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_full(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double_write # endif # if !defined(AO_HAVE_compare_double_and_swap_double_read) # define AO_compare_double_and_swap_double_read(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_full(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double_read # endif #endif /* AO_HAVE_compare_double_and_swap_double_full */ #if !defined(AO_HAVE_compare_double_and_swap_double) && \ defined(AO_HAVE_compare_double_and_swap_double_release) # define AO_compare_double_and_swap_double(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_release(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double #endif #if !defined(AO_HAVE_compare_double_and_swap_double) && \ defined(AO_HAVE_compare_double_and_swap_double_acquire) # define AO_compare_double_and_swap_double(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_acquire(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double #endif #if !defined(AO_HAVE_compare_double_and_swap_double) && \ defined(AO_HAVE_compare_double_and_swap_double_write) # define AO_compare_double_and_swap_double(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_write(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double #endif #if !defined(AO_HAVE_compare_double_and_swap_double) && \ defined(AO_HAVE_compare_double_and_swap_double_read) # define AO_compare_double_and_swap_double(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_read(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double #endif #if defined(AO_HAVE_compare_double_and_swap_double_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_compare_double_and_swap_double_full) # define AO_compare_double_and_swap_double_full(addr, o1, o2, n1, n2) \ (AO_nop_full(), AO_compare_double_and_swap_double_acquire(addr, o1, o2, n1, n2)) # define AO_HAVE_compare_double_and_swap_double_full #endif #if !defined(AO_HAVE_compare_double_and_swap_double_release_write) && \ defined(AO_HAVE_compare_double_and_swap_double_write) # define AO_compare_double_and_swap_double_release_write(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_write(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double_release_write #endif #if !defined(AO_HAVE_compare_double_and_swap_double_release_write) && \ defined(AO_HAVE_compare_double_and_swap_double_release) # define AO_compare_double_and_swap_double_release_write(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_release(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double_release_write #endif #if !defined(AO_HAVE_compare_double_and_swap_double_acquire_read) && \ defined(AO_HAVE_compare_double_and_swap_double_read) # define AO_compare_double_and_swap_double_acquire_read(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_read(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double_acquire_read #endif #if !defined(AO_HAVE_compare_double_and_swap_double_acquire_read) && \ defined(AO_HAVE_compare_double_and_swap_double_acquire) # define AO_compare_double_and_swap_double_acquire_read(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_acquire(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_compare_double_and_swap_double_acquire_read) # define AO_compare_double_and_swap_double_dd_acquire_read(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double_acquire_read(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double_dd_acquire_read # endif #else # if defined(AO_HAVE_compare_double_and_swap_double) # define AO_compare_double_and_swap_double_dd_acquire_read(addr, o1, o2, n1, n2) \ AO_compare_double_and_swap_double(addr, o1, o2, n1, n2) # define AO_HAVE_compare_double_and_swap_double_dd_acquire_read # endif #endif /* Compare_and_swap_double */ #if defined(AO_HAVE_compare_and_swap_double) && defined(AO_HAVE_nop_full)\ && !defined(AO_HAVE_compare_and_swap_double_acquire) AO_INLINE int AO_compare_and_swap_double_acquire(volatile AO_double_t *addr, AO_t o1, AO_t n1, AO_t n2) { int result = AO_compare_and_swap_double(addr, o1, n1, n2); AO_nop_full(); return result; } # define AO_HAVE_compare_and_swap_double_acquire #endif #if defined(AO_HAVE_compare_and_swap_double) \ && defined(AO_HAVE_nop_full)\ && !defined(AO_HAVE_compare_and_swap_double_release) # define AO_compare_and_swap_double_release(addr, o1, n1, n2) \ (AO_nop_full(), AO_compare_and_swap_double(addr, o1, n1, n2)) # define AO_HAVE_compare_and_swap_double_release #endif #if defined(AO_HAVE_compare_and_swap_double_full) # if !defined(AO_HAVE_compare_and_swap_double_release) # define AO_compare_and_swap_double_release(addr, o1, n1, n2) \ AO_compare_and_swap_double_full(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double_release # endif # if !defined(AO_HAVE_compare_and_swap_double_acquire) # define AO_compare_and_swap_double_acquire(addr, o1, n1, n2) \ AO_compare_and_swap_double_full(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double_acquire # endif # if !defined(AO_HAVE_compare_and_swap_double_write) # define AO_compare_and_swap_double_write(addr, o1, n1, n2) \ AO_compare_and_swap_double_full(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double_write # endif # if !defined(AO_HAVE_compare_and_swap_double_read) # define AO_compare_and_swap_double_read(addr, o1, n1, n2) \ AO_compare_and_swap_double_full(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double_read # endif #endif /* AO_HAVE_compare_and_swap_double_full */ #if !defined(AO_HAVE_compare_and_swap_double) && \ defined(AO_HAVE_compare_and_swap_double_release) # define AO_compare_and_swap_double(addr, o1, n1, n2) \ AO_compare_and_swap_double_release(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double #endif #if !defined(AO_HAVE_compare_and_swap_double) && \ defined(AO_HAVE_compare_and_swap_double_acquire) # define AO_compare_and_swap_double(addr, o1, n1, n2) \ AO_compare_and_swap_double_acquire(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double #endif #if !defined(AO_HAVE_compare_and_swap_double) && \ defined(AO_HAVE_compare_and_swap_double_write) # define AO_compare_and_swap_double(addr, o1, n1, n2) \ AO_compare_and_swap_double_write(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double #endif #if !defined(AO_HAVE_compare_and_swap_double) && \ defined(AO_HAVE_compare_and_swap_double_read) # define AO_compare_and_swap_double(addr, o1, n1, n2) \ AO_compare_and_swap_double_read(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double #endif #if defined(AO_HAVE_compare_and_swap_double_acquire) &&\ defined(AO_HAVE_nop_full) && \ !defined(AO_HAVE_compare_and_swap_double_full) # define AO_compare_and_swap_double_full(addr, o1, n1, n2) \ (AO_nop_full(), AO_compare_and_swap_double_acquire(addr, o1, n1, n2)) # define AO_HAVE_compare_and_swap_double_full #endif #if !defined(AO_HAVE_compare_and_swap_double_release_write) && \ defined(AO_HAVE_compare_and_swap_double_write) # define AO_compare_and_swap_double_release_write(addr, o1, n1, n2) \ AO_compare_and_swap_double_write(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double_release_write #endif #if !defined(AO_HAVE_compare_and_swap_double_release_write) && \ defined(AO_HAVE_compare_and_swap_double_release) # define AO_compare_and_swap_double_release_write(addr, o1, n1, n2) \ AO_compare_and_swap_double_release(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double_release_write #endif #if !defined(AO_HAVE_compare_and_swap_double_acquire_read) && \ defined(AO_HAVE_compare_and_swap_double_read) # define AO_compare_and_swap_double_acquire_read(addr, o1, n1, n2) \ AO_compare_and_swap_double_read(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double_acquire_read #endif #if !defined(AO_HAVE_compare_and_swap_double_acquire_read) && \ defined(AO_HAVE_compare_and_swap_double_acquire) # define AO_compare_and_swap_double_acquire_read(addr, o1, n1, n2) \ AO_compare_and_swap_double_acquire(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double_acquire_read #endif #ifdef AO_NO_DD_ORDERING # if defined(AO_HAVE_compare_and_swap_double_acquire_read) # define AO_compare_and_swap_double_dd_acquire_read(addr, o1, n1, n2) \ AO_compare_and_swap_double_acquire_read(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double_dd_acquire_read # endif #else # if defined(AO_HAVE_compare_and_swap_double) # define AO_compare_and_swap_double_dd_acquire_read(addr, o1, n1, n2) \ AO_compare_and_swap_double(addr, o1, n1, n2) # define AO_HAVE_compare_and_swap_double_dd_acquire_read # endif #endif /* NEC LE-IT: Convenience functions for AO_double compare and swap which */ /* types and reads easier in code */ #if defined(AO_HAVE_compare_double_and_swap_double_release) && \ !defined(AO_HAVE_double_compare_and_swap_release) AO_INLINE int AO_double_compare_and_swap_release(volatile AO_double_t *addr, AO_double_t old_val, AO_double_t new_val) { return AO_compare_double_and_swap_double_release(addr, old_val.AO_val1, old_val.AO_val2, new_val.AO_val1, new_val.AO_val2); } #define AO_HAVE_double_compare_and_swap_release #endif #if defined(AO_HAVE_compare_double_and_swap_double_acquire) && \ !defined(AO_HAVE_double_compare_and_swap_acquire) AO_INLINE int AO_double_compare_and_swap_acquire(volatile AO_double_t *addr, AO_double_t old_val, AO_double_t new_val) { return AO_compare_double_and_swap_double_acquire(addr, old_val.AO_val1, old_val.AO_val2, new_val.AO_val1, new_val.AO_val2); } #define AO_HAVE_double_compare_and_swap_acquire #endif #if defined(AO_HAVE_compare_double_and_swap_double_full) && \ !defined(AO_HAVE_double_compare_and_swap_full) AO_INLINE int AO_double_compare_and_swap_full(volatile AO_double_t *addr, AO_double_t old_val, AO_double_t new_val) { return AO_compare_double_and_swap_double_full(addr, old_val.AO_val1, old_val.AO_val2, new_val.AO_val1, new_val.AO_val2); } #define AO_HAVE_double_compare_and_swap_full #endif ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/README ================================================ There are two kinds of entities in this directory: - Subdirectories corresponding to specific compilers (or compiler/OS combinations). Each of these includes one or more architecture-specific headers. - More generic header files corresponding to a particular ordering and/or atomicity property that might be shared by multiple hardware platforms. ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/acquire_release_volatile.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * This file adds definitions appropriate for environments in which an AO_t * volatile load has acquire semantics, and an AO_t volatile store has release * semantics. This is arguably supposed to be true with the standard Itanium * software conventions. */ /* * Empirically gcc/ia64 does some reordering of ordinary operations around volatiles * even when we think it shouldn't. Gcc 3.3 and earlier could reorder a volatile store * with another store. As of March 2005, gcc pre-4 reused previously computed * common subexpressions across a volatile load. * Hence we now add compiler barriers for gcc. */ #if !defined(AO_GCC_BARRIER) # if defined(__GNUC__) # define AO_GCC_BARRIER() AO_compiler_barrier() # else # define AO_GCC_BARRIER() # endif #endif AO_INLINE AO_t AO_load_acquire(const volatile AO_t *p) { AO_t result = *p; /* A normal volatile load generates an ld.acq */ AO_GCC_BARRIER(); return result; } #define AO_HAVE_load_acquire AO_INLINE void AO_store_release(volatile AO_t *p, AO_t val) { AO_GCC_BARRIER(); /* A normal volatile store generates an st.rel */ *p = val; } #define AO_HAVE_store_release ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/aligned_atomic_load_store.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Definitions for architectures on which loads and stores of AO_t are * atomic fo all legal alignments. */ AO_INLINE AO_t AO_load(const volatile AO_t *addr) { assert(((size_t)addr & (sizeof(AO_t) - 1)) == 0); /* Cast away the volatile for architectures where */ /* volatile adds barrier semantics. */ return *(AO_t *)addr; } #define AO_HAVE_load AO_INLINE void AO_store(volatile AO_t *addr, AO_t new_val) { assert(((size_t)addr & (sizeof(AO_t) - 1)) == 0); (*(AO_t *)addr) = new_val; } #define AO_HAVE_store ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/all_acquire_release_volatile.h ================================================ /* * Copyright (c) 2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Describes architectures on which volatile AO_t, unsigned char, unsigned * short, and unsigned int loads and stores have acquire/release semantics for * all normally legal alignments. */ #include "acquire_release_volatile.h" #include "char_acquire_release_volatile.h" #include "short_acquire_release_volatile.h" #include "int_acquire_release_volatile.h" ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/all_aligned_atomic_load_store.h ================================================ /* * Copyright (c) 2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Describes architectures on which AO_t, unsigned char, unsigned short, * and unsigned int loads and stores are atomic for all normally legal * alignments. */ #include "aligned_atomic_load_store.h" #include "char_atomic_load_store.h" #include "short_aligned_atomic_load_store.h" #include "int_aligned_atomic_load_store.h" ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/all_atomic_load_store.h ================================================ /* * Copyright (c) 2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Describes architectures on which AO_t, unsigned char, unsigned short, * and unsigned int loads and stores are atomic for all normally legal * alignments. */ #include "atomic_load_store.h" #include "char_atomic_load_store.h" #include "short_atomic_load_store.h" #include "int_atomic_load_store.h" ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/ao_t_is_int.h ================================================ /* * Copyright (c) 2003-2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Inclusion of this file signifies that AO_t is in fact int. Hence * any AO_... operations can also server as AO_int_... operations. * We currently define only the more important ones here, and allow for * the normal generalization process to define the others. * We should probably add others in the future. */ #if defined(AO_HAVE_compare_and_swap_full) && \ !defined(AO_HAVE_int_compare_and_swap_full) # define AO_int_compare_and_swap_full(addr, old, new_val) \ AO_compare_and_swap_full((volatile AO_t *)(addr), \ (AO_t)(old), (AO_t)(new_val)) # define AO_HAVE_int_compare_and_swap_full # endif #if defined(AO_HAVE_compare_and_swap_acquire) && \ !defined(AO_HAVE_int_compare_and_swap_acquire) # define AO_int_compare_and_swap_acquire(addr, old, new_val) \ AO_compare_and_swap_acquire((volatile AO_t *)(addr), \ (AO_t)(old), (AO_t)(new_val)) # define AO_HAVE_int_compare_and_swap_acquire # endif #if defined(AO_HAVE_compare_and_swap_release) && \ !defined(AO_HAVE_int_compare_and_swap_release) # define AO_int_compare_and_swap_release(addr, old, new_val) \ AO_compare_and_swap_release((volatile AO_t *)(addr), \ (AO_t)(old), (AO_t)(new_val)) # define AO_HAVE_int_compare_and_swap_release # endif #if defined(AO_HAVE_compare_and_swap_write) && \ !defined(AO_HAVE_int_compare_and_swap_write) # define AO_int_compare_and_swap_write(addr, old, new_val) \ AO_compare_and_swap_write((volatile AO_t *)(addr), \ (AO_t)(old), (AO_t)(new_val)) # define AO_HAVE_int_compare_and_swap_write # endif #if defined(AO_HAVE_compare_and_swap_read) && \ !defined(AO_HAVE_int_compare_and_swap_read) # define AO_int_compare_and_swap_read(addr, old, new_val) \ AO_compare_and_swap_read((volatile AO_t *)(addr), \ (AO_t)(old), (AO_t)(new_val)) # define AO_HAVE_int_compare_and_swap_read # endif #if defined(AO_HAVE_compare_and_swap) && \ !defined(AO_HAVE_int_compare_and_swap) # define AO_int_compare_and_swap(addr, old, new_val) \ AO_compare_and_swap((volatile AO_t *)(addr), \ (AO_t)(old), (AO_t)(new_val)) # define AO_HAVE_int_compare_and_swap # endif #if defined(AO_HAVE_load_acquire) && \ !defined(AO_HAVE_int_load_acquire) # define AO_int_load_acquire(addr) \ (int)AO_load_acquire((const volatile AO_t *)(addr)) # define AO_HAVE_int_load_acquire # endif #if defined(AO_HAVE_store_release) && \ !defined(AO_HAVE_int_store_release) # define AO_int_store_release(addr, val) \ AO_store_release((volatile AO_t *)(addr), (AO_t)(val)) # define AO_HAVE_int_store_release # endif #if defined(AO_HAVE_fetch_and_add_full) && \ !defined(AO_HAVE_int_fetch_and_add_full) # define AO_int_fetch_and_add_full(addr, incr) \ (int)AO_fetch_and_add_full((volatile AO_t *)(addr), (AO_t)(incr)) # define AO_HAVE_int_fetch_and_add_full # endif #if defined(AO_HAVE_fetch_and_add1_acquire) && \ !defined(AO_HAVE_int_fetch_and_add1_acquire) # define AO_int_fetch_and_add1_acquire(addr) \ (int)AO_fetch_and_add1_acquire((volatile AO_t *)(addr)) # define AO_HAVE_int_fetch_and_add1_acquire # endif #if defined(AO_HAVE_fetch_and_add1_release) && \ !defined(AO_HAVE_int_fetch_and_add1_release) # define AO_int_fetch_and_add1_release(addr) \ (int)AO_fetch_and_add1_release((volatile AO_t *)(addr)) # define AO_HAVE_int_fetch_and_add1_release # endif #if defined(AO_HAVE_fetch_and_sub1_acquire) && \ !defined(AO_HAVE_int_fetch_and_sub1_acquire) # define AO_int_fetch_and_sub1_acquire(addr) \ (int)AO_fetch_and_sub1_acquire((volatile AO_t *)(addr)) # define AO_HAVE_int_fetch_and_sub1_acquire # endif #if defined(AO_HAVE_fetch_and_sub1_release) && \ !defined(AO_HAVE_int_fetch_and_sub1_release) # define AO_int_fetch_and_sub1_release(addr) \ (int)AO_fetch_and_sub1_release((volatile AO_t *)(addr)) # define AO_HAVE_int_fetch_and_sub1_release # endif ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/armcc/arm_v6.h ================================================ /* * Copyright (c) 2007 by NEC LE-IT: All rights reserved. * A transcription of ARMv6 atomic operations for the ARM Realview Toolchain. * This code works with armcc from RVDS 3.1 * This is based on work in gcc/arm.h by * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. * * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * */ #include "../read_ordered.h" #include "../test_and_set_t_is_ao_t.h" /* Probably suboptimal */ #if __TARGET_ARCH_ARM < 6 Dont use with ARM instruction sets lower than v6 #else #include "../standard_ao_double_t.h" /* NEC LE-IT: ARMv6 is the first architecture providing support for simple LL/SC * A data memory barrier must be raised via CP15 command (see documentation). * * ARMv7 is compatible to ARMv6 but has a simpler command for issuing a * memory barrier (DMB). Raising it via CP15 should still work as told me by the * support engineers. If it turns out to be much quicker than we should implement * custom code for ARMv7 using the asm { dmb } command. * * If only a single processor is used, we can define AO_UNIPROCESSOR * and do not need to access CP15 for ensuring a DMB at all. */ AO_INLINE void AO_nop_full(void) { #ifndef AO_UNIPROCESSOR unsigned int dest=0; /* issue an data memory barrier (keeps ordering of memory transactions */ /* before and after this operation) */ __asm { mcr p15,0,dest,c7,c10,5 } ; #endif } #define AO_HAVE_nop_full AO_INLINE AO_t AO_load(const volatile AO_t *addr) { /* Cast away the volatile in case it adds fence semantics */ return (*(const AO_t *)addr); } #define AO_HAVE_load /* NEC LE-IT: atomic "store" - according to ARM documentation this is * the only safe way to set variables also used in LL/SC environment. * A direct write won't be recognized by the LL/SC construct in other CPUs. * * HB: Based on subsequent discussion, I think it would be OK to use an * ordinary store here if we knew that interrupt handlers always cleared * the reservation. They should, but there is some doubt that this is * currently always the case for e.g. Linux. */ AO_INLINE void AO_store(volatile AO_t *addr, AO_t value) { unsigned long tmp; retry: __asm { ldrex tmp, [addr] strex tmp, value, [addr] teq tmp, #0 bne retry }; } #define AO_HAVE_store /* NEC LE-IT: replace the SWAP as recommended by ARM: "Applies to: ARM11 Cores Though the SWP instruction will still work with ARM V6 cores, it is recommended to use the new V6 synchronization instructions. The SWP instruction produces locked read and write accesses which are atomic, i.e. another operation cannot be done between these locked accesses which ties up external bus (AHB,AXI) bandwidth and can increase worst case interrupt latencies. LDREX,STREX are more flexible, other instructions can be done between the LDREX and STREX accesses. " */ AO_INLINE AO_TS_t AO_test_and_set(volatile AO_TS_t *addr) { AO_TS_t oldval; unsigned long tmp; unsigned long one = 1; retry: __asm { ldrex oldval, [addr] strex tmp, one, [addr] teq tmp, #0 bne retry } return oldval; } #define AO_HAVE_test_and_set /* NEC LE-IT: fetch and add for ARMv6 */ AO_INLINE AO_t AO_fetch_and_add(volatile AO_t *p, AO_t incr) { unsigned long tmp,tmp2; AO_t result; retry: __asm { ldrex result, [p] add tmp, incr, result strex tmp2, tmp, [p] teq tmp2, #0 bne retry } return result; } #define AO_HAVE_fetch_and_add /* NEC LE-IT: fetch and add1 for ARMv6 */ AO_INLINE AO_t AO_fetch_and_add1(volatile AO_t *p) { unsigned long tmp,tmp2; AO_t result; retry: __asm { ldrex result, [p] add tmp, result, #1 strex tmp2, tmp, [p] teq tmp2, #0 bne retry } return result; } #define AO_HAVE_fetch_and_add1 /* NEC LE-IT: fetch and sub for ARMv6 */ AO_INLINE AO_t AO_fetch_and_sub1(volatile AO_t *p) { unsigned long tmp,tmp2; AO_t result; retry: __asm { ldrex result, [p] sub tmp, result, #1 strex tmp2, tmp, [p] teq tmp2, #0 bne retry } return result; } #define AO_HAVE_fetch_and_sub1 /* NEC LE-IT: compare and swap */ /* Returns nonzero if the comparison succeeded. */ AO_INLINE int AO_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val) { AO_t result,tmp; retry: __asm__ { mov result, #2 ldrex tmp, [addr] teq tmp, old_val it eq strexeq result, new_val, [addr] teq result, #1 beq retry } return !(result&2); } #define AO_HAVE_compare_and_swap /* helper functions for the Realview compiler: LDREXD is not usable * with inline assembler, so use the "embedded" assembler as * suggested by ARM Dev. support (June 2008). */ __asm inline double_ptr_storage load_ex(volatile AO_double_t *addr) { LDREXD r0,r1,[r0] } __asm inline int store_ex(AO_t val1, AO_t val2, volatile AO_double_t *addr) { STREXD r3,r0,r1,[r2] MOV r0,r3 } AO_INLINE int AO_compare_double_and_swap_double(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2) { double_ptr_storage old_val = ((double_ptr_storage)old_val2 << 32) | old_val1; double_ptr_storage tmp; int result; while(1) { tmp = load_ex(addr); if(tmp != old_val) return 0; result = store_ex(new_val1, new_val2, addr); if(!result) return 1; } } #define AO_HAVE_compare_double_and_swap_double #endif // __TARGET_ARCH_ARM ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/atomic_load_store.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Definitions for architectures on which loads and stores of AO_t are * atomic for all legal alignments. */ AO_INLINE AO_t AO_load(const volatile AO_t *addr) { /* Cast away the volatile for architectures like IA64 where */ /* volatile adds barrier semantics. */ return (*(const AO_t *)addr); } #define AO_HAVE_load AO_INLINE void AO_store(volatile AO_t *addr, AO_t new_val) { (*(AO_t *)addr) = new_val; } #define AO_HAVE_store ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/char_acquire_release_volatile.h ================================================ /* * Copyright (c) 2003-2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * This file adds definitions appropriate for environments in which an unsigned char * volatile load has acquire semantics, and an unsigned char volatile store has release * semantics. This is true with the standard Itanium ABI. */ #if !defined(AO_GCC_BARRIER) # if defined(__GNUC__) # define AO_GCC_BARRIER() AO_compiler_barrier() # else # define AO_GCC_BARRIER() # endif #endif AO_INLINE unsigned char AO_char_load_acquire(const volatile unsigned char *p) { unsigned char result = *p; /* A normal volatile load generates an ld.acq */ AO_GCC_BARRIER(); return result; } #define AO_HAVE_char_load_acquire AO_INLINE void AO_char_store_release(volatile unsigned char *p, unsigned char val) { AO_GCC_BARRIER(); /* A normal volatile store generates an st.rel */ *p = val; } #define AO_HAVE_char_store_release ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/char_atomic_load_store.h ================================================ /* * Copyright (c) 2003 by Hewlett-Packard Company. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Definitions for architectures on which loads and stores of unsigned char are * atomic for all legal alignments. */ AO_INLINE unsigned char AO_char_load(const volatile unsigned char *addr) { /* Cast away the volatile for architectures like IA64 where */ /* volatile adds barrier semantics. */ return (*(const unsigned char *)addr); } #define AO_HAVE_char_load AO_INLINE void AO_char_store(volatile unsigned char *addr, unsigned char new_val) { (*(unsigned char *)addr) = new_val; } #define AO_HAVE_char_store ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/emul_cas.h ================================================ /* * Copyright (c) 2003 by Hewlett-Packard Company. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Ensure, if at all possible, that AO_compare_and_swap_full() is * available. The emulation should be brute-force signal-safe, even * though it actually blocks. * Including this file will generate an error if AO_compare_and_swap_full() * cannot be made available. * This will be included from platform-specific atomic_ops files * id appropriate, and if AO_FORCE_CAS is defined. It should not be * included directly, especially since it affects the implementation * of other atomic update primitives. * The implementation assumes that only AO_store_XXX and AO_test_and_set_XXX * variants are defined, and that AO_test_and_set_XXX is not used to * operate on compare_and_swap locations. */ #if !defined(ATOMIC_OPS_H) # error This file should not be included directly. #endif #ifndef AO_HAVE_double_t # include "standard_ao_double_t.h" #endif int AO_compare_and_swap_emulation(volatile AO_t *addr, AO_t old, AO_t new_val); int AO_compare_double_and_swap_double_emulation(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2); void AO_store_full_emulation(volatile AO_t *addr, AO_t val); #define AO_compare_and_swap_full(addr, old, newval) \ AO_compare_and_swap_emulation(addr, old, newval) #define AO_HAVE_compare_and_swap_full #ifndef AO_HAVE_compare_double_and_swap_double # define AO_compare_double_and_swap_double_full(addr, old1, old2, \ newval1, newval2) \ AO_compare_double_and_swap_double_emulation(addr, old1, old2, \ newval1, newval2) # define AO_HAVE_compare_double_and_swap_double_full #endif #undef AO_store #undef AO_HAVE_store #undef AO_store_write #undef AO_HAVE_store_write #undef AO_store_release #undef AO_HAVE_store_release #undef AO_store_full #undef AO_HAVE_store_full #define AO_store_full(addr, val) AO_store_full_emulation(addr, val) #define AO_HAVE_store_full ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/alpha.h ================================================ /* * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * */ #include "../atomic_load_store.h" #include "../test_and_set_t_is_ao_t.h" #define AO_NO_DD_ORDERING /* Data dependence does not imply read ordering. */ AO_INLINE void AO_nop_full(void) { __asm__ __volatile__("mb" : : : "memory"); } #define AO_HAVE_nop_full AO_INLINE void AO_nop_write(void) { __asm__ __volatile__("wmb" : : : "memory"); } #define AO_HAVE_nop_write /* mb should be used for AO_nop_read(). That's the default. */ /* We believe that ldq_l ... stq_c does not imply any memory barrier. */ /* We should add an explicit fetch_and_add definition. */ AO_INLINE int AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) { unsigned long was_equal; unsigned long temp; __asm__ __volatile__( "1: ldq_l %0,%1\n" " cmpeq %0,%4,%2\n" " mov %3,%0\n" " beq %2,2f\n" " stq_c %0,%1\n" " beq %0,1b\n" "2:\n" :"=&r" (temp), "=m" (*addr), "=&r" (was_equal) : "r" (new_val), "Ir" (old) :"memory"); return was_equal; } #define AO_HAVE_compare_and_swap ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/arm.h ================================================ /* * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * */ #include "../read_ordered.h" #include "../test_and_set_t_is_ao_t.h" /* Probably suboptimal */ /* NEC LE-IT: ARMv6 is the first architecture providing support for simple LL/SC * A data memory barrier must be raised via CP15 command (see documentation). * * ARMv7 is compatible to ARMv6 but has a simpler command for issuing a * memory barrier (DMB). Raising it via CP15 should still work as told me by the * support engineers. If it turns out to be much quicker than we should implement * custom code for ARMv7 using the asm { dmb } command. * * If only a single processor is used, we can define AO_UNIPROCESSOR * and do not need to access CP15 for ensuring a DMB */ /* NEC LE-IT: gcc has no way to easily check the arm architecture * but defines only one of __ARM_ARCH_x__ to be true */ #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__) \ || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7R__) #include "../standard_ao_double_t.h" AO_INLINE void AO_nop_full(void) { #ifndef AO_UNIPROCESSOR /* issue an data memory barrier (keeps ordering of memory transactions */ /* before and after this operation) */ unsigned int arg=0; __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" : : "r" (arg) : "memory"); #endif } #define AO_HAVE_nop_full /* NEC LE-IT: AO_t load is simple reading */ AO_INLINE AO_t AO_load(const volatile AO_t *addr) { /* Cast away the volatile for architectures like IA64 where */ /* volatile adds barrier semantics. */ return (*(const AO_t *)addr); } #define AO_HAVE_load /* NEC LE-IT: atomic "store" - according to ARM documentation this is * the only safe way to set variables also used in LL/SC environment. * A direct write won't be recognized by the LL/SC construct on the _same_ CPU. * Support engineers response for behaviour of ARMv6: * Core1 Core2 SUCCESS =================================== LDREX(x) STREX(x) Yes ----------------------------------- LDREX(x) STR(x) STREX(x) No ----------------------------------- LDREX(x) STR(x) STREX(x) Yes ----------------------------------- * ARMv7 behaves similar, see documentation CortexA8 TRM, point 8.5 * * HB: I think this is only a problem if interrupt handlers do not clear * the reservation, as they almost certainly should. Probably change this back * in a while? */ AO_INLINE void AO_store(volatile AO_t *addr, AO_t value) { AO_t flag; __asm__ __volatile__("@AO_store\n" "1: ldrex %0, [%2]\n" " strex %0, %3, [%2]\n" " teq %0, #0\n" " bne 1b" : "=&r"(flag), "+m"(*addr) : "r" (addr), "r"(value) : "cc"); } #define AO_HAVE_store /* NEC LE-IT: replace the SWAP as recommended by ARM: "Applies to: ARM11 Cores Though the SWP instruction will still work with ARM V6 cores, it is recommended to use the new V6 synchronization instructions. The SWP instruction produces 'locked' read and write accesses which are atomic, i.e. another operation cannot be done between these locked accesses which ties up external bus (AHB,AXI) bandwidth and can increase worst case interrupt latencies. LDREX,STREX are more flexible, other instructions can be done between the LDREX and STREX accesses. " */ AO_INLINE AO_TS_t AO_test_and_set(volatile AO_TS_t *addr) { AO_TS_t oldval; unsigned long flag; __asm__ __volatile__("@AO_test_and_set\n" "1: ldrex %0, [%3]\n" " strex %1, %4, [%3]\n" " teq %1, #0\n" " bne 1b\n" : "=&r"(oldval),"=&r"(flag), "+m"(*addr) : "r"(addr), "r"(1) : "cc"); return oldval; } #define AO_HAVE_test_and_set /* NEC LE-IT: fetch and add for ARMv6 */ AO_INLINE AO_t AO_fetch_and_add(volatile AO_t *p, AO_t incr) { unsigned long flag,tmp; AO_t result; __asm__ __volatile__("@AO_fetch_and_add\n" "1: ldrex %0, [%5]\n" /* get original */ " add %2, %0, %4\n" /* sum up in incr */ " strex %1, %2, [%5]\n" /* store them */ " teq %1, #0\n" " bne 1b\n" : "=&r"(result),"=&r"(flag),"=&r"(tmp),"+m"(*p) /* 0..3 */ : "r"(incr), "r"(p) /* 4..5 */ : "cc"); return result; } #define AO_HAVE_fetch_and_add /* NEC LE-IT: fetch and add1 for ARMv6 */ AO_INLINE AO_t AO_fetch_and_add1(volatile AO_t *p) { unsigned long flag,tmp; AO_t result; __asm__ __volatile__("@AO_fetch_and_add1\n" "1: ldrex %0, [%4]\n" /* get original */ " add %1, %0, #1\n" /* increment */ " strex %2, %1, [%4]\n" /* store them */ " teq %2, #0\n" " bne 1b\n" : "=&r"(result), "=&r"(tmp), "=&r"(flag), "+m"(*p) : "r"(p) : "cc"); return result; } #define AO_HAVE_fetch_and_add1 /* NEC LE-IT: fetch and sub for ARMv6 */ AO_INLINE AO_t AO_fetch_and_sub1(volatile AO_t *p) { unsigned long flag,tmp; AO_t result; __asm__ __volatile__("@AO_fetch_and_sub1\n" "1: ldrex %0, [%4]\n" /* get original */ " sub %1, %0, #1\n" /* decrement */ " strex %2, %1, [%4]\n" /* store them */ " teq %2, #0\n" " bne 1b\n" : "=&r"(result), "=&r"(tmp), "=&r"(flag), "+m"(*p) : "r"(p) : "cc"); return result; } #define AO_HAVE_fetch_and_sub1 /* NEC LE-IT: compare and swap */ /* Returns nonzero if the comparison succeeded. */ AO_INLINE int AO_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val) { AO_t result,tmp; __asm__ __volatile__("@ AO_compare_and_swap\n" "1: mov %0, #2\n" /* store a flag */ " ldrex %1, [%3]\n" /* get original */ " teq %1, %4\n" /* see if match */ " it eq\n" " strexeq %0, %5, [%3]\n" /* store new one if matched */ " teq %0, #1\n" " beq 1b\n" /* if update failed, repeat */ : "=&r"(result), "=&r"(tmp), "+m"(*addr) : "r"(addr), "r"(old_val), "r"(new_val) : "cc"); return !(result&2); /* if succeded, return 1, else 0 */ } #define AO_HAVE_compare_and_swap AO_INLINE int AO_compare_double_and_swap_double(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2) { double_ptr_storage old_val = ((double_ptr_storage)old_val2 << 32) | old_val1; double_ptr_storage new_val = ((double_ptr_storage)new_val2 << 32) | new_val1; double_ptr_storage tmp; int result; while(1) { __asm__ __volatile__("@ AO_compare_and_swap_double\n" " ldrexd %0, [%1]\n" /* get original to r1 & r2 */ : "=&r"(tmp) : "r"(addr) : "cc"); if(tmp != old_val) return 0; __asm__ __volatile__( " strexd %0, %2, [%3]\n" /* store new one if matched */ : "=&r"(result),"+m"(*addr) : "r"(new_val), "r"(addr) : "cc"); if(!result) return 1; } } #define AO_HAVE_compare_double_and_swap_double #else /* pre ARMv6 architectures ... */ /* I found a slide set that, if I read it correctly, claims that */ /* Loads followed by either a Load or Store are ordered, but nothing */ /* else is. */ /* It appears that SWP is the only simple memory barrier. */ #include "../all_atomic_load_store.h" AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { AO_TS_VAL_t oldval; /* SWP on ARM is very similar to XCHG on x86. */ /* The first operand is the result, the second the value */ /* to be stored. Both registers must be different from addr. */ /* Make the address operand an early clobber output so it */ /* doesn't overlap with the other operands. The early clobber*/ /* on oldval is necessary to prevent the compiler allocating */ /* them to the same register if they are both unused. */ __asm__ __volatile__("swp %0, %2, [%3]" : "=&r"(oldval), "=&r"(addr) : "r"(1), "1"(addr) : "memory"); return oldval; } #define AO_HAVE_test_and_set_full #endif /* __ARM_ARCH_x */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/avr32.h ================================================ /* * Copyright (C) 2009 Bradley Smith * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ #include "../all_atomic_load_store.h" #include "../ordered.h" /* There are no multiprocessor implementations. */ #include "../test_and_set_t_is_ao_t.h" AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { register long ret; __asm__ __volatile__( "xchg %[oldval], %[mem], %[newval]" : [oldval] "=&r"(ret) : [mem] "r"(addr), [newval] "r"(1) : "memory"); return (AO_TS_VAL_t)ret; } #define AO_HAVE_test_and_set_full AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { register long ret; __asm__ __volatile__( "1: ssrf 5\n" " ld.w %[res], %[mem]\n" " eor %[res], %[oldval]\n" " brne 2f\n" " stcond %[mem], %[newval]\n" " brne 1b\n" "2:\n" : [res] "=&r"(ret), [mem] "=m"(*addr) : "m"(*addr), [newval] "r"(new_val), [oldval] "r"(old) : "cc", "memory"); return (int)ret; } #define AO_HAVE_compare_and_swap_full ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/cris.h ================================================ /* * Copyright (c) 2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Most of this code originally comes from Hans-Peter Nilsson. It is included * here with his permission. * * This version has not been tested. It was coped here from a GC * patch so that we wouldn't lose the code in the upgrade to gc7. */ #include "../all_atomic_load_store.h" #include "../ordered.h" /* There are no multiprocessor implementations. */ #include "../test_and_set_t_is_ao_t.h" /* * The architecture apparently supports an "f" flag which is * set on preemption. This essentially gives us load-locked, * store-conditional primitives, though I'm not quite sure how * this would work on a hypothetical multiprocessor. -HB * * For details, see * http://developer.axis.com/doc/hardware/etrax100lx/prog_man/ * 1_architectural_description.pdf * * Presumably many other primitives (notably CAS, including the double- * width versions) could be implemented in this manner, if someone got * around to it. */ AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { /* Ripped from linuxthreads/sysdeps/cris/pt-machine.h */ register unsigned long int ret; /* Note the use of a dummy output of *addr to expose the write. The memory barrier is to stop *other* writes being moved past this code. */ __asm__ __volatile__("clearf\n" "0:\n\t" "movu.b [%2],%0\n\t" "ax\n\t" "move.b %3,[%2]\n\t" "bwf 0b\n\t" "clearf" : "=&r" (ret), "=m" (*addr) : "r" (addr), "r" ((int) 1), "m" (*addr) : "memory"); return ret; } #define AO_HAVE_test_and_set_full ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/hppa.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Modified by Carlos O'Donell , 2003 * - Added self-aligning lock. * */ #include "../all_atomic_load_store.h" /* Some architecture set descriptions include special "ordered" memory */ /* operations. As far as we can tell, no existing processors actually */ /* require those. Nor does it appear likely that future processors */ /* will. */ #include "../ordered.h" /* GCC will not guarantee the alignment we need, use four lock words */ /* and select the correctly aligned datum. See the glibc 2.3.2 */ /* linuxthread port for the original implementation. */ struct AO_pa_clearable_loc { int data[4]; }; #undef AO_TS_INITIALIZER #define AO_TS_t struct AO_pa_clearable_loc #define AO_TS_INITIALIZER {1,1,1,1} /* Switch meaning of set and clear, since we only have an atomic clear */ /* instruction. */ typedef enum {AO_PA_TS_set = 0, AO_PA_TS_clear = 1} AO_PA_TS_val; #define AO_TS_VAL_t AO_PA_TS_val #define AO_TS_CLEAR AO_PA_TS_clear #define AO_TS_SET AO_PA_TS_set /* The hppa only has one atomic read and modify memory operation, */ /* load and clear, so hppa spinlocks must use zero to signify that */ /* someone is holding the lock. The address used for the ldcw */ /* semaphore must be 16-byte aligned. */ #define __ldcw(a) ({ \ volatile unsigned int __ret; \ __asm__ __volatile__("ldcw 0(%2),%0" \ : "=r" (__ret), "=m" (*(a)) : "r" (a)); \ __ret; \ }) /* Because malloc only guarantees 8-byte alignment for malloc'd data, */ /* and GCC only guarantees 8-byte alignment for stack locals, we can't */ /* be assured of 16-byte alignment for atomic lock data even if we */ /* specify "__attribute ((aligned(16)))" in the type declaration. So, */ /* we use a struct containing an array of four ints for the atomic lock */ /* type and dynamically select the 16-byte aligned int from the array */ /* for the semaphore. */ #define __PA_LDCW_ALIGNMENT 16 #define __ldcw_align(a) ({ \ unsigned long __ret = (unsigned long) a; \ __ret += __PA_LDCW_ALIGNMENT - 1; \ __ret &= ~(__PA_LDCW_ALIGNMENT - 1); \ (volatile unsigned int *) __ret; \ }) /* Works on PA 1.1 and PA 2.0 systems */ AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t * addr) { volatile unsigned int *a = __ldcw_align (addr); return (AO_TS_VAL_t) __ldcw (a); } AO_INLINE void AO_pa_clear(volatile AO_TS_t * addr) { volatile unsigned int *a = __ldcw_align (addr); AO_compiler_barrier(); *a = 1; } #define AO_CLEAR(addr) AO_pa_clear(addr) #define AO_HAVE_test_and_set_full ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/ia64.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "../all_atomic_load_store.h" #include "../all_acquire_release_volatile.h" #include "../test_and_set_t_is_char.h" #ifdef _ILP32 /* 32-bit HP/UX code. */ /* This requires pointer "swizzling". Pointers need to be expanded */ /* to 64 bits using the addp4 instruction before use. This makes it */ /* hard to share code, but we try anyway. */ # define AO_LEN "4" /* We assume that addr always appears in argument position 1 in asm */ /* code. If it is clobbered due to swizzling, we also need it in */ /* second position. Any later arguments are referenced symbolically, */ /* so that we don't have to worry about their position. This requires*/ /* gcc 3.1, but you shouldn't be using anything older than that on */ /* IA64 anyway. */ /* The AO_MASK macro is a workaround for the fact that HP/UX gcc */ /* appears to otherwise store 64-bit pointers in ar.ccv, i.e. it */ /* doesn't appear to clear high bits in a pointer value we pass into */ /* assembly code, even if it is supposedly of type AO_t. */ # define AO_IN_ADDR "1"(addr) # define AO_OUT_ADDR , "=r"(addr) # define AO_SWIZZLE "addp4 %1=0,%1;;\n" # define AO_MASK(ptr) __asm__("zxt4 %1=%1": "=r"(ptr) : "0"(ptr)); #else # define AO_LEN "8" # define AO_IN_ADDR "r"(addr) # define AO_OUT_ADDR # define AO_SWIZZLE # define AO_MASK(ptr) #endif AO_INLINE void AO_nop_full(void) { __asm__ __volatile__("mf" : : : "memory"); } #define AO_HAVE_nop_full AO_INLINE AO_t AO_fetch_and_add1_acquire (volatile AO_t *addr) { AO_t result; __asm__ __volatile__ (AO_SWIZZLE "fetchadd" AO_LEN ".acq %0=[%1],1": "=r" (result) AO_OUT_ADDR: AO_IN_ADDR :"memory"); return result; } #define AO_HAVE_fetch_and_add1_acquire AO_INLINE AO_t AO_fetch_and_add1_release (volatile AO_t *addr) { AO_t result; __asm__ __volatile__ (AO_SWIZZLE "fetchadd" AO_LEN ".rel %0=[%1],1": "=r" (result) AO_OUT_ADDR: AO_IN_ADDR :"memory"); return result; } #define AO_HAVE_fetch_and_add1_release AO_INLINE AO_t AO_fetch_and_sub1_acquire (volatile AO_t *addr) { AO_t result; __asm__ __volatile__ (AO_SWIZZLE "fetchadd" AO_LEN ".acq %0=[%1],-1": "=r" (result) AO_OUT_ADDR: AO_IN_ADDR :"memory"); return result; } #define AO_HAVE_fetch_and_sub1_acquire AO_INLINE AO_t AO_fetch_and_sub1_release (volatile AO_t *addr) { AO_t result; __asm__ __volatile__ (AO_SWIZZLE "fetchadd" AO_LEN ".rel %0=[%1],-1": "=r" (result) AO_OUT_ADDR: AO_IN_ADDR :"memory"); return result; } #define AO_HAVE_fetch_and_sub1_release #ifndef _ILP32 AO_INLINE unsigned int AO_int_fetch_and_add1_acquire (volatile unsigned int *addr) { unsigned int result; __asm__ __volatile__ ("fetchadd4.acq %0=[%1],1": "=r" (result): AO_IN_ADDR :"memory"); return result; } #define AO_HAVE_int_fetch_and_add1_acquire AO_INLINE unsigned int AO_int_fetch_and_add1_release (volatile unsigned int *addr) { unsigned int result; __asm__ __volatile__ ("fetchadd4.rel %0=[%1],1": "=r" (result): AO_IN_ADDR :"memory"); return result; } #define AO_HAVE_int_fetch_and_add1_release AO_INLINE unsigned int AO_int_fetch_and_sub1_acquire (volatile unsigned int *addr) { unsigned int result; __asm__ __volatile__ ("fetchadd4.acq %0=[%1],-1": "=r" (result): AO_IN_ADDR :"memory"); return result; } #define AO_HAVE_int_fetch_and_sub1_acquire AO_INLINE unsigned int AO_int_fetch_and_sub1_release (volatile unsigned int *addr) { unsigned int result; __asm__ __volatile__ ("fetchadd4.rel %0=[%1],-1": "=r" (result): AO_IN_ADDR :"memory"); return result; } #define AO_HAVE_int_fetch_and_sub1_release #endif /* !_ILP32 */ AO_INLINE int AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t oldval; AO_MASK(old); __asm__ __volatile__(AO_SWIZZLE "mov ar.ccv=%[old] ;; cmpxchg" AO_LEN ".acq %0=[%1],%[new_val],ar.ccv" : "=r"(oldval) AO_OUT_ADDR : AO_IN_ADDR, [new_val]"r"(new_val), [old]"r"(old) : "memory"); return (oldval == old); } #define AO_HAVE_compare_and_swap_acquire AO_INLINE int AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t oldval; AO_MASK(old); __asm__ __volatile__(AO_SWIZZLE "mov ar.ccv=%[old] ;; cmpxchg" AO_LEN ".rel %0=[%1],%[new_val],ar.ccv" : "=r"(oldval) AO_OUT_ADDR : AO_IN_ADDR, [new_val]"r"(new_val), [old]"r"(old) : "memory"); return (oldval == old); } #define AO_HAVE_compare_and_swap_release AO_INLINE int AO_char_compare_and_swap_acquire(volatile unsigned char *addr, unsigned char old, unsigned char new_val) { unsigned char oldval; __asm__ __volatile__(AO_SWIZZLE "mov ar.ccv=%[old] ;; cmpxchg1.acq %0=[%1],%[new_val],ar.ccv" : "=r"(oldval) AO_OUT_ADDR : AO_IN_ADDR, [new_val]"r"(new_val), [old]"r"((AO_t)old) : "memory"); return (oldval == old); } #define AO_HAVE_char_compare_and_swap_acquire AO_INLINE int AO_char_compare_and_swap_release(volatile unsigned char *addr, unsigned char old, unsigned char new_val) { unsigned char oldval; __asm__ __volatile__(AO_SWIZZLE "mov ar.ccv=%[old] ;; cmpxchg1.rel %0=[%1],%[new_val],ar.ccv" : "=r"(oldval) AO_OUT_ADDR : AO_IN_ADDR, [new_val]"r"(new_val), [old]"r"((AO_t)old) : "memory"); return (oldval == old); } #define AO_HAVE_char_compare_and_swap_release AO_INLINE int AO_short_compare_and_swap_acquire(volatile unsigned short *addr, unsigned short old, unsigned short new_val) { unsigned short oldval; __asm__ __volatile__(AO_SWIZZLE "mov ar.ccv=%[old] ;; cmpxchg2.acq %0=[%1],%[new_val],ar.ccv" : "=r"(oldval) AO_OUT_ADDR : AO_IN_ADDR, [new_val]"r"(new_val), [old]"r"((AO_t)old) : "memory"); return (oldval == old); } #define AO_HAVE_short_compare_and_swap_acquire AO_INLINE int AO_short_compare_and_swap_release(volatile unsigned short *addr, unsigned short old, unsigned short new_val) { unsigned short oldval; __asm__ __volatile__(AO_SWIZZLE "mov ar.ccv=%[old] ;; cmpxchg2.rel %0=[%1],%[new_val],ar.ccv" : "=r"(oldval) AO_OUT_ADDR : AO_IN_ADDR, [new_val]"r"(new_val), [old]"r"((AO_t)old) : "memory"); return (oldval == old); } #define AO_HAVE_short_compare_and_swap_release #ifndef _ILP32 AO_INLINE int AO_int_compare_and_swap_acquire(volatile unsigned int *addr, unsigned int old, unsigned int new_val) { unsigned int oldval; __asm__ __volatile__("mov ar.ccv=%3 ;; cmpxchg4.acq %0=[%1],%2,ar.ccv" : "=r"(oldval) : AO_IN_ADDR, "r"(new_val), "r"((AO_t)old) : "memory"); return (oldval == old); } #define AO_HAVE_int_compare_and_swap_acquire AO_INLINE int AO_int_compare_and_swap_release(volatile unsigned int *addr, unsigned int old, unsigned int new_val) { unsigned int oldval; __asm__ __volatile__("mov ar.ccv=%3 ;; cmpxchg4.rel %0=[%1],%2,ar.ccv" : "=r"(oldval) : AO_IN_ADDR, "r"(new_val), "r"((AO_t)old) : "memory"); return (oldval == old); } #define AO_HAVE_int_compare_and_swap_release #endif /* !_ILP32 */ /* FIXME: Add compare_and_swap_double as soon as there is widely */ /* available hardware that implements it. */ /* FIXME: Add compare_double_and_swap_double for the _ILP32 case. */ #ifdef _ILP32 # include "../ao_t_is_int.h" #endif ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/m68k.h ================================================ /* * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * */ /* The cas instruction causes an emulation trap for the */ /* 060 with a misaligned pointer, so let's avoid this. */ #undef AO_t typedef unsigned long AO_t __attribute__ ((aligned (4))); /* FIXME. Very incomplete. */ #include "../all_aligned_atomic_load_store.h" /* Are there any m68k multiprocessors still around? */ /* AFAIK, Alliants were sequentially consistent. */ #include "../ordered.h" #include "../test_and_set_t_is_char.h" /* Contributed by Tony Mantler or new. Should be changed to MIT license? */ AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { AO_TS_t oldval; /* The value at addr is semi-phony. */ /* 'tas' sets bit 7 while the return */ /* value pretends all bits were set, */ /* which at least matches AO_TS_SET. */ __asm__ __volatile__( "tas %1; sne %0" : "=d" (oldval), "=m" (*addr) : "m" (*addr) : "memory"); return oldval; } #define AO_HAVE_test_and_set_full /* Returns nonzero if the comparison succeeded. */ AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { char result; __asm__ __volatile__( "cas.l %3,%4,%1; seq %0" : "=d" (result), "=m" (*addr) : "m" (*addr), "d" (old), "d" (new_val) : "memory"); return -result; } #define AO_HAVE_compare_and_swap_full #include "../ao_t_is_int.h" ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/mips.h ================================================ /* * Copyright (c) 2005,2007 Thiemo Seufer * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. */ /* * FIXME: This should probably make finer distinctions. SGI MIPS is * much more strongly ordered, and in fact closer to sequentially * consistent. This is really aimed at modern embedded implementations. * It looks to me like this assumes a 32-bit ABI. -HB */ #include "../all_aligned_atomic_load_store.h" #include "../acquire_release_volatile.h" #include "../test_and_set_t_is_ao_t.h" #include "../standard_ao_double_t.h" /* Data dependence does not imply read ordering. */ #define AO_NO_DD_ORDERING AO_INLINE void AO_nop_full(void) { __asm__ __volatile__( " .set push \n" " .set mips2 \n" " .set noreorder \n" " .set nomacro \n" " sync \n" " .set pop " : : : "memory"); } #define AO_HAVE_nop_full AO_INLINE int AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) { register int was_equal = 0; register int temp; __asm__ __volatile__( " .set push \n" " .set mips2 \n" " .set noreorder \n" " .set nomacro \n" "1: ll %0, %1 \n" " bne %0, %4, 2f \n" " move %0, %3 \n" " sc %0, %1 \n" " .set pop \n" " beqz %0, 1b \n" " li %2, 1 \n" "2: " : "=&r" (temp), "+R" (*addr), "+r" (was_equal) : "r" (new_val), "r" (old) : "memory"); return was_equal; } #define AO_HAVE_compare_and_swap /* FIXME: I think the implementations below should be automatically */ /* generated if we omit them. - HB */ AO_INLINE int AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) { int result = AO_compare_and_swap(addr, old, new_val); AO_nop_full(); return result; } #define AO_HAVE_compare_and_swap_acquire AO_INLINE int AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_nop_full(); return AO_compare_and_swap(addr, old, new_val); } #define AO_HAVE_compare_and_swap_release AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t result; AO_nop_full(); result = AO_compare_and_swap(addr, old, new_val); AO_nop_full(); return result; } #define AO_HAVE_compare_and_swap_full /* * FIXME: We should also implement fetch_and_add and or primitives * directly. */ #include "../ao_t_is_int.h" ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/powerpc.h ================================================ /* * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P. * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * */ /* Memory model documented at http://www-106.ibm.com/developerworks/ */ /* eserver/articles/archguide.html and (clearer) */ /* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */ /* There appears to be no implicit ordering between any kind of */ /* independent memory references. */ /* Architecture enforces some ordering based on control dependence. */ /* I don't know if that could help. */ /* Data-dependent loads are always ordered. */ /* Based on the above references, eieio is intended for use on */ /* uncached memory, which we don't support. It does not order loads */ /* from cached memory. */ /* Thanks to Maged Michael, Doug Lea, and Roger Hoover for helping to */ /* track some of this down and correcting my misunderstandings. -HB */ /* Earl Chew subsequently contributed further fixes & additions. */ #include "../all_aligned_atomic_load_store.h" #include "../test_and_set_t_is_ao_t.h" /* There seems to be no byte equivalent of lwarx, so this */ /* may really be what we want, at least in the 32-bit case. */ AO_INLINE void AO_nop_full(void) { __asm__ __volatile__("sync" : : : "memory"); } #define AO_HAVE_nop_full /* lwsync apparently works for everything but a StoreLoad barrier. */ AO_INLINE void AO_lwsync(void) { #ifdef __NO_LWSYNC__ __asm__ __volatile__("sync" : : : "memory"); #else __asm__ __volatile__("lwsync" : : : "memory"); #endif } #define AO_nop_write() AO_lwsync() #define AO_HAVE_nop_write #define AO_nop_read() AO_lwsync() #define AO_HAVE_nop_read /* We explicitly specify load_acquire, since it is important, and can */ /* be implemented relatively cheaply. It could be implemented */ /* with an ordinary load followed by a lwsync. But the general wisdom */ /* seems to be that a data dependent branch followed by an isync is */ /* cheaper. And the documentation is fairly explicit that this also */ /* has acquire semantics. */ /* ppc64 uses ld not lwz */ #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) AO_INLINE AO_t AO_load_acquire(const volatile AO_t *addr) { AO_t result; __asm__ __volatile__ ( "ld%U1%X1 %0,%1\n" "cmpw %0,%0\n" "bne- 1f\n" "1: isync\n" : "=r" (result) : "m"(*addr) : "memory", "cr0"); return result; } #else AO_INLINE AO_t AO_load_acquire(const volatile AO_t *addr) { AO_t result; /* FIXME: We should get gcc to allocate one of the condition */ /* registers. I always got "impossible constraint" when I */ /* tried the "y" constraint. */ __asm__ __volatile__ ( "lwz%U1%X1 %0,%1\n" "cmpw %0,%0\n" "bne- 1f\n" "1: isync\n" : "=r" (result) : "m"(*addr) : "memory", "cc"); return result; } #endif #define AO_HAVE_load_acquire /* We explicitly specify store_release, since it relies */ /* on the fact that lwsync is also a LoadStore barrier. */ AO_INLINE void AO_store_release(volatile AO_t *addr, AO_t value) { AO_lwsync(); *addr = value; } #define AO_HAVE_load_acquire /* This is similar to the code in the garbage collector. Deleting */ /* this and having it synthesized from compare_and_swap would probably */ /* only cost us a load immediate instruction. */ #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) /* Completely untested. And we should be using smaller objects anyway. */ AO_INLINE AO_TS_VAL_t AO_test_and_set(volatile AO_TS_t *addr) { unsigned long oldval; unsigned long temp = 1; /* locked value */ __asm__ __volatile__( "1:ldarx %0,0,%1\n" /* load and reserve */ "cmpdi %0, 0\n" /* if load is */ "bne 2f\n" /* non-zero, return already set */ "stdcx. %2,0,%1\n" /* else store conditional */ "bne- 1b\n" /* retry if lost reservation */ "2:\n" /* oldval is zero if we set */ : "=&r"(oldval) : "r"(addr), "r"(temp) : "memory", "cr0"); return (AO_TS_VAL_t)oldval; } #else AO_INLINE AO_TS_VAL_t AO_test_and_set(volatile AO_TS_t *addr) { int oldval; int temp = 1; /* locked value */ __asm__ __volatile__( "1:lwarx %0,0,%1\n" /* load and reserve */ "cmpwi %0, 0\n" /* if load is */ "bne 2f\n" /* non-zero, return already set */ "stwcx. %2,0,%1\n" /* else store conditional */ "bne- 1b\n" /* retry if lost reservation */ "2:\n" /* oldval is zero if we set */ : "=&r"(oldval) : "r"(addr), "r"(temp) : "memory", "cr0"); return (AO_TS_VAL_t)oldval; } #endif #define AO_HAVE_test_and_set AO_INLINE AO_TS_VAL_t AO_test_and_set_acquire(volatile AO_TS_t *addr) { AO_TS_VAL_t result = AO_test_and_set(addr); AO_lwsync(); return result; } #define AO_HAVE_test_and_set_acquire AO_INLINE AO_TS_VAL_t AO_test_and_set_release(volatile AO_TS_t *addr) { AO_lwsync(); return AO_test_and_set(addr); } #define AO_HAVE_test_and_set_release AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { AO_TS_VAL_t result; AO_lwsync(); result = AO_test_and_set(addr); AO_lwsync(); return result; } #define AO_HAVE_test_and_set_full #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) /* FIXME: Completely untested. */ AO_INLINE int AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t oldval; int result = 0; __asm__ __volatile__( "1:ldarx %0,0,%2\n" /* load and reserve */ "cmpd %0, %4\n" /* if load is not equal to */ "bne 2f\n" /* old, fail */ "stdcx. %3,0,%2\n" /* else store conditional */ "bne- 1b\n" /* retry if lost reservation */ "li %1,1\n" /* result = 1; */ "2:\n" : "=&r"(oldval), "=&r"(result) : "r"(addr), "r"(new_val), "r"(old), "1"(result) : "memory", "cr0"); return result; } #else AO_INLINE int AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t oldval; int result = 0; __asm__ __volatile__( "1:lwarx %0,0,%2\n" /* load and reserve */ "cmpw %0, %4\n" /* if load is not equal to */ "bne 2f\n" /* old, fail */ "stwcx. %3,0,%2\n" /* else store conditional */ "bne- 1b\n" /* retry if lost reservation */ "li %1,1\n" /* result = 1; */ "2:\n" : "=&r"(oldval), "=&r"(result) : "r"(addr), "r"(new_val), "r"(old), "1"(result) : "memory", "cr0"); return result; } #endif #define AO_HAVE_compare_and_swap AO_INLINE int AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) { int result = AO_compare_and_swap(addr, old, new_val); AO_lwsync(); return result; } #define AO_HAVE_compare_and_swap_acquire AO_INLINE int AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_lwsync(); return AO_compare_and_swap(addr, old, new_val); } #define AO_HAVE_compare_and_swap_release AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t result; AO_lwsync(); result = AO_compare_and_swap(addr, old, new_val); AO_lwsync(); return result; } #define AO_HAVE_compare_and_swap_full #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) /* FIXME: Completely untested. */ AO_INLINE AO_t AO_fetch_and_add(volatile AO_t *addr, AO_t incr) { AO_t oldval; AO_t newval; __asm__ __volatile__( "1:ldarx %0,0,%2\n" /* load and reserve */ "add %1,%0,%3\n" /* increment */ "stdcx. %1,0,%2\n" /* store conditional */ "bne- 1b\n" /* retry if lost reservation */ : "=&r"(oldval), "=&r"(newval) : "r"(addr), "r"(incr) : "memory", "cr0"); return oldval; } #define AO_HAVE_fetch_and_add #else AO_INLINE AO_t AO_fetch_and_add(volatile AO_t *addr, AO_t incr) { AO_t oldval; AO_t newval; __asm__ __volatile__( "1:lwarx %0,0,%2\n" /* load and reserve */ "add %1,%0,%3\n" /* increment */ "stwcx. %1,0,%2\n" /* store conditional */ "bne- 1b\n" /* retry if lost reservation */ : "=&r"(oldval), "=&r"(newval) : "r"(addr), "r"(incr) : "memory", "cr0"); return oldval; } #define AO_HAVE_fetch_and_add #endif AO_INLINE AO_t AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) { AO_t result = AO_fetch_and_add(addr, incr); AO_lwsync(); return result; } #define AO_HAVE_fetch_and_add_acquire AO_INLINE AO_t AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) { AO_lwsync(); return AO_fetch_and_add(addr, incr); } #define AO_HAVE_fetch_and_add_release AO_INLINE AO_t AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) { AO_t result; AO_lwsync(); result = AO_fetch_and_add(addr, incr); AO_lwsync(); return result; } #define AO_HAVE_fetch_and_add_full #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) #else # include "../ao_t_is_int.h" #endif ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/s390.h ================================================ /* * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * */ /* FIXME: untested. */ /* The relevant documentation appears to be at */ /* http://publibz.boulder.ibm.com/epubs/pdf/dz9zr003.pdf */ /* around page 5-96. Apparently: */ /* - Memory references in general are atomic only for a single */ /* byte. But it appears that the most common load/store */ /* instructions also guarantee atomicity for aligned */ /* operands of standard types. WE FOOLISHLY ASSUME that */ /* compilers only generate those. If that turns out to be */ /* wrong, we need inline assembly code for AO_load and */ /* AO_store. */ /* - A store followed by a load is unordered since the store */ /* may be delayed. Otherwise everything is ordered. */ /* - There is a hardware compare-and-swap (CS) instruction. */ #include "../ordered_except_wr.h" #include "../all_aligned_atomic_load_store.h" #include "../test_and_set_t_is_ao_t.h" /* FIXME: Is there a way to do byte-sized test-and-set? */ /* FIXME: AO_nop_full should probably be implemented directly. */ /* It appears that certain BCR instructions have that effect. */ /* Presumably they're cheaper than CS? */ AO_INLINE AO_t AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { int retval; __asm__ __volatile__ ( # ifndef __s390x__ " cs %1,%2,0(%3)\n" # else " csg %1,%2,0(%3)\n" # endif " ipm %0\n" " srl %0,28\n" : "=&d" (retval), "+d" (old) : "d" (new_val), "a" (addr) : "cc", "memory"); return retval == 0; } #define AO_HAVE_compare_and_swap_full /* FIXME: Add double-wide compare-and-swap for 32-bit executables. */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/sh.h ================================================ /* * Copyright (c) 2009 by Takashi YOSHII. All rights reserved. * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. */ #include "../all_atomic_load_store.h" #include "../ordered.h" /* sh has tas.b(byte) only */ #include "../test_and_set_t_is_char.h" AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { int oldval; __asm__ __volatile__( "tas.b @%1; movt %0" : "=r" (oldval) : "r" (addr) : "t", "memory"); return oldval? AO_TS_CLEAR : AO_TS_SET; } #define AO_HAVE_test_and_set_full ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/sparc.h ================================================ /* * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * */ /* FIXME. Very incomplete. No support for sparc64. */ /* Non-ancient SPARCs provide compare-and-swap (casa). */ /* We should make that available. */ #include "../all_atomic_load_store.h" /* Real SPARC code uses TSO: */ #include "../ordered_except_wr.h" /* Test_and_set location is just a byte. */ #include "../test_and_set_t_is_char.h" AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { AO_TS_VAL_t oldval; __asm__ __volatile__("ldstub %1,%0" : "=r"(oldval), "=m"(*addr) : "m"(*addr) : "memory"); return oldval; } #define AO_HAVE_test_and_set_full #ifndef AO_NO_SPARC_V9 /* Returns nonzero if the comparison succeeded. */ AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { char ret; __asm__ __volatile__ ("membar #StoreLoad | #LoadLoad\n\t" # if defined(__arch64__) "casx [%2],%0,%1\n\t" # else "cas [%2],%0,%1\n\t" /* 32-bit version */ # endif "membar #StoreLoad | #StoreStore\n\t" "cmp %0,%1\n\t" "be,a 0f\n\t" "mov 1,%0\n\t"/* one insn after branch always executed */ "clr %0\n\t" "0:\n\t" : "=r" (ret), "+r" (new_val) : "r" (addr), "0" (old) : "memory", "cc"); return (int)ret; } #define AO_HAVE_compare_and_swap_full #endif /* AO_NO_SPARC_V9 */ /* FIXME: This needs to be extended for SPARC v8 and v9. */ /* SPARC V8 also has swap. V9 has CAS. */ /* There are barriers like membar #LoadStore. */ /* CASA (32-bit) and CASXA(64-bit) instructions were */ /* added in V9. */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/x86.h ================================================ /* * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * * Some of the machine specific code was borrowed from our GC distribution. */ /* The following really assume we have a 486 or better. Unfortunately */ /* gcc doesn't define a suitable feature test macro based on command */ /* line options. */ /* We should perhaps test dynamically. */ #include "../all_aligned_atomic_load_store.h" /* Real X86 implementations, except for some old WinChips, appear */ /* to enforce ordering between memory operations, EXCEPT that a later */ /* read can pass earlier writes, presumably due to the visible */ /* presence of store buffers. */ /* We ignore both the WinChips, and the fact that the official specs */ /* seem to be much weaker (and arguably too weak to be usable). */ #include "../ordered_except_wr.h" #include "../test_and_set_t_is_char.h" #include "../standard_ao_double_t.h" #if defined(AO_USE_PENTIUM4_INSTRS) AO_INLINE void AO_nop_full(void) { __asm__ __volatile__("mfence" : : : "memory"); } #define AO_HAVE_nop_full #else /* We could use the cpuid instruction. But that seems to be slower */ /* than the default implementation based on test_and_set_full. Thus */ /* we omit that bit of misinformation here. */ #endif /* As far as we can tell, the lfence and sfence instructions are not */ /* currently needed or useful for cached memory accesses. */ /* Really only works for 486 and later */ AO_INLINE AO_t AO_fetch_and_add_full (volatile AO_t *p, AO_t incr) { AO_t result; __asm__ __volatile__ ("lock; xaddl %0, %1" : "=r" (result), "=m" (*p) : "0" (incr), "m" (*p) : "memory"); return result; } #define AO_HAVE_fetch_and_add_full AO_INLINE unsigned char AO_char_fetch_and_add_full (volatile unsigned char *p, unsigned char incr) { unsigned char result; __asm__ __volatile__ ("lock; xaddb %0, %1" : "=q" (result), "=m" (*p) : "0" (incr), "m" (*p) : "memory"); return result; } #define AO_HAVE_char_fetch_and_add_full AO_INLINE unsigned short AO_short_fetch_and_add_full (volatile unsigned short *p, unsigned short incr) { unsigned short result; __asm__ __volatile__ ("lock; xaddw %0, %1" : "=r" (result), "=m" (*p) : "0" (incr), "m" (*p) : "memory"); return result; } #define AO_HAVE_short_fetch_and_add_full /* Really only works for 486 and later */ AO_INLINE void AO_or_full (volatile AO_t *p, AO_t incr) { __asm__ __volatile__ ("lock; orl %1, %0" : "=m" (*p) : "r" (incr), "m" (*p) : "memory"); } #define AO_HAVE_or_full AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { unsigned char oldval; /* Note: the "xchg" instruction does not need a "lock" prefix */ __asm__ __volatile__("xchgb %0, %1" : "=q"(oldval), "=m"(*addr) : "0"(0xff), "m"(*addr) : "memory"); return (AO_TS_VAL_t)oldval; } #define AO_HAVE_test_and_set_full /* Returns nonzero if the comparison succeeded. */ AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { # ifdef AO_USE_SYNC_CAS_BUILTIN return (int)__sync_bool_compare_and_swap(addr, old, new_val); # else char result; __asm__ __volatile__("lock; cmpxchgl %3, %0; setz %1" : "=m" (*addr), "=a" (result) : "m" (*addr), "r" (new_val), "a" (old) : "memory"); return (int)result; # endif } #define AO_HAVE_compare_and_swap_full /* Returns nonzero if the comparison succeeded. */ /* Really requires at least a Pentium. */ AO_INLINE int AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2) { char result; #if __PIC__ /* If PIC is turned on, we can't use %ebx as it is reserved for the GOT pointer. We can save and restore %ebx because GCC won't be using it for anything else (such as any of the m operands) */ __asm__ __volatile__("pushl %%ebx;" /* save ebx used for PIC GOT ptr */ "movl %6,%%ebx;" /* move new_val2 to %ebx */ "lock; cmpxchg8b %0; setz %1;" "pop %%ebx;" /* restore %ebx */ : "=m"(*addr), "=a"(result) : "m"(*addr), "d" (old_val2), "a" (old_val1), "c" (new_val2), "m" (new_val1) : "memory"); #else /* We can't just do the same thing in non-PIC mode, because GCC * might be using %ebx as the memory operand. We could have ifdef'd * in a clobber, but there's no point doing the push/pop if we don't * have to. */ __asm__ __volatile__("lock; cmpxchg8b %0; setz %1;" : "=m"(*addr), "=a"(result) : "m"(*addr), "d" (old_val2), "a" (old_val1), "c" (new_val2), "b" (new_val1) : "memory"); #endif return (int) result; } #define AO_HAVE_compare_double_and_swap_double_full #include "../ao_t_is_int.h" ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/gcc/x86_64.h ================================================ /* * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * * Some of the machine specific code was borrowed from our GC distribution. */ #include "../all_aligned_atomic_load_store.h" /* Real X86 implementations appear */ /* to enforce ordering between memory operations, EXCEPT that a later */ /* read can pass earlier writes, presumably due to the visible */ /* presence of store buffers. */ /* We ignore the fact that the official specs */ /* seem to be much weaker (and arguably too weak to be usable). */ #include "../ordered_except_wr.h" #include "../test_and_set_t_is_char.h" #include "../standard_ao_double_t.h" AO_INLINE void AO_nop_full(void) { /* Note: "mfence" (SSE2) is supported on all x86_64/amd64 chips. */ __asm__ __volatile__("mfence" : : : "memory"); } #define AO_HAVE_nop_full /* As far as we can tell, the lfence and sfence instructions are not */ /* currently needed or useful for cached memory accesses. */ AO_INLINE AO_t AO_fetch_and_add_full (volatile AO_t *p, AO_t incr) { AO_t result; __asm__ __volatile__ ("lock; xaddq %0, %1" : "=r" (result), "=m" (*p) : "0" (incr), "m" (*p) : "memory"); return result; } #define AO_HAVE_fetch_and_add_full AO_INLINE unsigned char AO_char_fetch_and_add_full (volatile unsigned char *p, unsigned char incr) { unsigned char result; __asm__ __volatile__ ("lock; xaddb %0, %1" : "=q" (result), "=m" (*p) : "0" (incr), "m" (*p) : "memory"); return result; } #define AO_HAVE_char_fetch_and_add_full AO_INLINE unsigned short AO_short_fetch_and_add_full (volatile unsigned short *p, unsigned short incr) { unsigned short result; __asm__ __volatile__ ("lock; xaddw %0, %1" : "=r" (result), "=m" (*p) : "0" (incr), "m" (*p) : "memory"); return result; } #define AO_HAVE_short_fetch_and_add_full AO_INLINE unsigned int AO_int_fetch_and_add_full (volatile unsigned int *p, unsigned int incr) { unsigned int result; __asm__ __volatile__ ("lock; xaddl %0, %1" : "=r" (result), "=m" (*p) : "0" (incr), "m" (*p) : "memory"); return result; } #define AO_HAVE_int_fetch_and_add_full AO_INLINE void AO_or_full (volatile AO_t *p, AO_t incr) { __asm__ __volatile__ ("lock; orq %1, %0" : "=m" (*p) : "r" (incr), "m" (*p) : "memory"); } #define AO_HAVE_or_full AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { unsigned char oldval; /* Note: the "xchg" instruction does not need a "lock" prefix */ __asm__ __volatile__("xchgb %0, %1" : "=q"(oldval), "=m"(*addr) : "0"(0xff), "m"(*addr) : "memory"); return (AO_TS_VAL_t)oldval; } #define AO_HAVE_test_and_set_full /* Returns nonzero if the comparison succeeded. */ AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { # ifdef AO_USE_SYNC_CAS_BUILTIN return (int)__sync_bool_compare_and_swap(addr, old, new_val); # else char result; __asm__ __volatile__("lock; cmpxchgq %3, %0; setz %1" : "=m" (*addr), "=a" (result) : "m" (*addr), "r" (new_val), "a" (old) : "memory"); return (int) result; # endif } #define AO_HAVE_compare_and_swap_full #ifdef AO_CMPXCHG16B_AVAILABLE /* NEC LE-IT: older AMD Opterons are missing this instruction. * On these machines SIGILL will be thrown. * Define AO_WEAK_DOUBLE_CAS_EMULATION to have an emulated * (lock based) version available */ /* HB: Changed this to not define either by default. There are * enough machines and tool chains around on which cmpxchg16b * doesn't work. And the emulation is unsafe by our usual rules. * Hoewever both are clearly useful in certain cases. */ AO_INLINE int AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2) { char result; __asm__ __volatile__("lock; cmpxchg16b %0; setz %1" : "=m"(*addr), "=a"(result) : "m"(*addr), "d" (old_val2), "a" (old_val1), "c" (new_val2), "b" (new_val1) : "memory"); return (int) result; } #define AO_HAVE_compare_double_and_swap_double_full #else /* this one provides spinlock based emulation of CAS implemented in */ /* atomic_ops.c. We probably do not want to do this here, since it is */ /* not atomic with respect to other kinds of updates of *addr. On the */ /* other hand, this may be a useful facility on occasion. */ #ifdef AO_WEAK_DOUBLE_CAS_EMULATION int AO_compare_double_and_swap_double_emulation(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2); AO_INLINE int AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2) { return AO_compare_double_and_swap_double_emulation(addr, old_val1, old_val2, new_val1, new_val2); } #define AO_HAVE_compare_double_and_swap_double_full #endif /* AO_WEAK_DOUBLE_CAS_EMULATION */ #endif /* AO_CMPXCHG16B_AVAILABLE */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/generic_pthread.h ================================================ /* * Copyright (c) 2003 by Hewlett-Packard Company. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* The following is useful primarily for debugging and documentation. */ /* We define various atomic operations by acquiring a global pthread */ /* lock. The resulting implementation will perform poorly, but should */ /* be correct unless it is used from signal handlers. */ /* We assume that all pthread operations act like full memory barriers. */ /* (We believe that is the intent of the specification.) */ #include #include "test_and_set_t_is_ao_t.h" /* This is not necessarily compatible with the native */ /* implementation. But those can't be safely mixed anyway. */ /* We define only the full barrier variants, and count on the */ /* generalization section below to fill in the rest. */ extern pthread_mutex_t AO_pt_lock; AO_INLINE void AO_nop_full(void) { pthread_mutex_lock(&AO_pt_lock); pthread_mutex_unlock(&AO_pt_lock); } #define AO_HAVE_nop_full AO_INLINE AO_t AO_load_full(const volatile AO_t *addr) { AO_t result; pthread_mutex_lock(&AO_pt_lock); result = *addr; pthread_mutex_unlock(&AO_pt_lock); return result; } #define AO_HAVE_load_full AO_INLINE void AO_store_full(volatile AO_t *addr, AO_t val) { pthread_mutex_lock(&AO_pt_lock); *addr = val; pthread_mutex_unlock(&AO_pt_lock); } #define AO_HAVE_store_full AO_INLINE unsigned char AO_char_load_full(const volatile unsigned char *addr) { unsigned char result; pthread_mutex_lock(&AO_pt_lock); result = *addr; pthread_mutex_unlock(&AO_pt_lock); return result; } #define AO_HAVE_char_load_full AO_INLINE void AO_char_store_full(volatile unsigned char *addr, unsigned char val) { pthread_mutex_lock(&AO_pt_lock); *addr = val; pthread_mutex_unlock(&AO_pt_lock); } #define AO_HAVE_char_store_full AO_INLINE unsigned short AO_short_load_full(const volatile unsigned short *addr) { unsigned short result; pthread_mutex_lock(&AO_pt_lock); result = *addr; pthread_mutex_unlock(&AO_pt_lock); return result; } #define AO_HAVE_short_load_full AO_INLINE void AO_short_store_full(volatile unsigned short *addr, unsigned short val) { pthread_mutex_lock(&AO_pt_lock); *addr = val; pthread_mutex_unlock(&AO_pt_lock); } #define AO_HAVE_short_store_full AO_INLINE unsigned int AO_int_load_full(const volatile unsigned int *addr) { unsigned int result; pthread_mutex_lock(&AO_pt_lock); result = *addr; pthread_mutex_unlock(&AO_pt_lock); return result; } #define AO_HAVE_int_load_full AO_INLINE void AO_int_store_full(volatile unsigned int *addr, unsigned int val) { pthread_mutex_lock(&AO_pt_lock); *addr = val; pthread_mutex_unlock(&AO_pt_lock); } #define AO_HAVE_int_store_full AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { AO_TS_VAL_t result; pthread_mutex_lock(&AO_pt_lock); result = (AO_TS_VAL_t)(*addr); *addr = AO_TS_SET; pthread_mutex_unlock(&AO_pt_lock); assert(result == AO_TS_SET || result == AO_TS_CLEAR); return result; } #define AO_HAVE_test_and_set_full AO_INLINE AO_t AO_fetch_and_add_full(volatile AO_t *p, AO_t incr) { AO_t tmp; pthread_mutex_lock(&AO_pt_lock); tmp = *p; *p = tmp + incr; pthread_mutex_unlock(&AO_pt_lock); return tmp; } #define AO_HAVE_fetch_and_add_full AO_INLINE unsigned char AO_char_fetch_and_add_full(volatile unsigned char *p, unsigned char incr) { unsigned char tmp; pthread_mutex_lock(&AO_pt_lock); tmp = *p; *p = tmp + incr; pthread_mutex_unlock(&AO_pt_lock); return tmp; } #define AO_HAVE_char_fetch_and_add_full AO_INLINE unsigned short AO_short_fetch_and_add_full(volatile unsigned short *p, unsigned short incr) { unsigned short tmp; pthread_mutex_lock(&AO_pt_lock); tmp = *p; *p = tmp + incr; pthread_mutex_unlock(&AO_pt_lock); return tmp; } #define AO_HAVE_short_fetch_and_add_full AO_INLINE unsigned int AO_int_fetch_and_add_full(volatile unsigned int *p, unsigned int incr) { unsigned int tmp; pthread_mutex_lock(&AO_pt_lock); tmp = *p; *p = tmp + incr; pthread_mutex_unlock(&AO_pt_lock); return tmp; } #define AO_HAVE_int_fetch_and_add_full AO_INLINE void AO_or_full(volatile AO_t *p, AO_t incr) { AO_t tmp; pthread_mutex_lock(&AO_pt_lock); tmp = *p; *p = (tmp | incr); pthread_mutex_unlock(&AO_pt_lock); } #define AO_HAVE_or_full AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { pthread_mutex_lock(&AO_pt_lock); if (*addr == old) { *addr = new_val; pthread_mutex_unlock(&AO_pt_lock); return 1; } else pthread_mutex_unlock(&AO_pt_lock); return 0; } #define AO_HAVE_compare_and_swap_full /* Unlike real architectures, we define both double-width CAS variants. */ typedef struct { AO_t AO_val1; AO_t AO_val2; } AO_double_t; #define AO_HAVE_double_t AO_INLINE int AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, AO_t old1, AO_t old2, AO_t new1, AO_t new2) { pthread_mutex_lock(&AO_pt_lock); if (addr -> AO_val1 == old1 && addr -> AO_val2 == old2) { addr -> AO_val1 = new1; addr -> AO_val2 = new2; pthread_mutex_unlock(&AO_pt_lock); return 1; } else pthread_mutex_unlock(&AO_pt_lock); return 0; } #define AO_HAVE_compare_double_and_swap_double_full AO_INLINE int AO_compare_and_swap_double_full(volatile AO_double_t *addr, AO_t old1, AO_t new1, AO_t new2) { pthread_mutex_lock(&AO_pt_lock); if (addr -> AO_val1 == old1) { addr -> AO_val1 = new1; addr -> AO_val2 = new2; pthread_mutex_unlock(&AO_pt_lock); return 1; } else pthread_mutex_unlock(&AO_pt_lock); return 0; } #define AO_HAVE_compare_and_swap_double_full /* We can't use hardware loads and stores, since they don't */ /* interact correctly with atomic updates. */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/hpc/hppa.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Derived from the corresponding header file for gcc. * */ #include "../atomic_load_store.h" /* Some architecture set descriptions include special "ordered" memory */ /* operations. As far as we can tell, no existing processors actually */ /* require those. Nor does it appear likely that future processors */ /* will. */ /* FIXME: */ /* The PA emulator on Itanium may obey weaker restrictions. */ /* There should be a mode in which we don't assume sequential */ /* consistency here. */ #include "../ordered.h" #include /* GCC will not guarantee the alignment we need, use four lock words */ /* and select the correctly aligned datum. See the glibc 2.3.2 */ /* linuxthread port for the original implementation. */ struct AO_pa_clearable_loc { int data[4]; }; #undef AO_TS_INITIALIZER #define AO_TS_t struct AO_pa_clearable_loc #define AO_TS_INITIALIZER {1,1,1,1} /* Switch meaning of set and clear, since we only have an atomic clear */ /* instruction. */ typedef enum {AO_PA_TS_set = 0, AO_PA_TS_clear = 1} AO_PA_TS_val; #define AO_TS_VAL_t AO_PA_TS_val #define AO_TS_CLEAR AO_PA_TS_clear #define AO_TS_SET AO_PA_TS_set /* The hppa only has one atomic read and modify memory operation, */ /* load and clear, so hppa spinlocks must use zero to signify that */ /* someone is holding the lock. The address used for the ldcw */ /* semaphore must be 16-byte aligned. */ #define __ldcw(a, ret) \ _LDCWX(0 /* index */, 0 /* s */, a /* base */, ret); /* Because malloc only guarantees 8-byte alignment for malloc'd data, */ /* and GCC only guarantees 8-byte alignment for stack locals, we can't */ /* be assured of 16-byte alignment for atomic lock data even if we */ /* specify "__attribute ((aligned(16)))" in the type declaration. So, */ /* we use a struct containing an array of four ints for the atomic lock */ /* type and dynamically select the 16-byte aligned int from the array */ /* for the semaphore. */ #define __PA_LDCW_ALIGNMENT 16 #define __ldcw_align(a, ret) { \ ret = (unsigned long) a; \ ret += __PA_LDCW_ALIGNMENT - 1; \ ret &= ~(__PA_LDCW_ALIGNMENT - 1); \ } /* Works on PA 1.1 and PA 2.0 systems */ AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t * addr) { register unsigned int ret; register unsigned long a; __ldcw_align (addr, a); __ldcw (a, ret); return ret; } AO_INLINE void AO_pa_clear(volatile AO_TS_t * addr) { unsigned long a; __ldcw_align (addr,a); AO_compiler_barrier(); *(volatile unsigned int *)a = 1; } #define AO_CLEAR(addr) AO_pa_clear(addr) #define AO_HAVE_test_and_set_full ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/hpc/ia64.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * This file specifies Itanimum primitives for use with the HP compiler * under HP/UX. We use intrinsics instead of the inline assembly code in the * gcc file. */ #include "../all_atomic_load_store.h" #include "../all_acquire_release_volatile.h" #include "../test_and_set_t_is_char.h" #include #ifdef __LP64__ # define AO_T_FASIZE _FASZ_D # define AO_T_SIZE _SZ_D #else # define AO_T_FASIZE _FASZ_W # define AO_T_SIZE _SZ_W #endif AO_INLINE void AO_nop_full(void) { _Asm_mf(); } #define AO_HAVE_nop_full AO_INLINE AO_t AO_fetch_and_add1_acquire (volatile AO_t *p) { return _Asm_fetchadd(AO_T_FASIZE, _SEM_ACQ, p, 1, _LDHINT_NONE, _DOWN_MEM_FENCE); } #define AO_HAVE_fetch_and_add1_acquire AO_INLINE AO_t AO_fetch_and_add1_release (volatile AO_t *p) { return _Asm_fetchadd(AO_T_FASIZE, _SEM_REL, p, 1, _LDHINT_NONE, _UP_MEM_FENCE); } #define AO_HAVE_fetch_and_add1_release AO_INLINE AO_t AO_fetch_and_sub1_acquire (volatile AO_t *p) { return _Asm_fetchadd(AO_T_FASIZE, _SEM_ACQ, p, -1, _LDHINT_NONE, _DOWN_MEM_FENCE); } #define AO_HAVE_fetch_and_sub1_acquire AO_INLINE AO_t AO_fetch_and_sub1_release (volatile AO_t *p) { return _Asm_fetchadd(AO_T_FASIZE, _SEM_REL, p, -1, _LDHINT_NONE, _UP_MEM_FENCE); } #define AO_HAVE_fetch_and_sub1_release AO_INLINE int AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t oldval; _Asm_mov_to_ar(_AREG_CCV, old, _DOWN_MEM_FENCE); oldval = _Asm_cmpxchg(AO_T_SIZE, _SEM_ACQ, addr, new_val, _LDHINT_NONE, _DOWN_MEM_FENCE); return (oldval == old); } #define AO_HAVE_compare_and_swap_acquire AO_INLINE int AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t oldval; _Asm_mov_to_ar(_AREG_CCV, old, _UP_MEM_FENCE); oldval = _Asm_cmpxchg(AO_T_SIZE, _SEM_REL, addr, new_val, _LDHINT_NONE, _UP_MEM_FENCE); /* Hopefully the compiler knows not to reorder the above two? */ return (oldval == old); } #define AO_HAVE_compare_and_swap_release AO_INLINE int AO_char_compare_and_swap_acquire(volatile unsigned char *addr, unsigned char old, unsigned char new_val) { unsigned char oldval; _Asm_mov_to_ar(_AREG_CCV, old, _DOWN_MEM_FENCE); oldval = _Asm_cmpxchg(_SZ_B, _SEM_ACQ, addr, new_val, _LDHINT_NONE, _DOWN_MEM_FENCE); return (oldval == old); } #define AO_HAVE_char_compare_and_swap_acquire AO_INLINE int AO_char_compare_and_swap_release(volatile unsigned char *addr, unsigned char old, unsigned char new_val) { unsigned char oldval; _Asm_mov_to_ar(_AREG_CCV, old, _UP_MEM_FENCE); oldval = _Asm_cmpxchg(_SZ_B, _SEM_REL, addr, new_val, _LDHINT_NONE, _UP_MEM_FENCE); /* Hopefully the compiler knows not to reorder the above two? */ return (oldval == old); } #define AO_HAVE_char_compare_and_swap_release AO_INLINE int AO_short_compare_and_swap_acquire(volatile unsigned short *addr, unsigned short old, unsigned short new_val) { unsigned short oldval; _Asm_mov_to_ar(_AREG_CCV, old, _DOWN_MEM_FENCE); oldval = _Asm_cmpxchg(_SZ_B, _SEM_ACQ, addr, new_val, _LDHINT_NONE, _DOWN_MEM_FENCE); return (oldval == old); } #define AO_HAVE_short_compare_and_swap_acquire AO_INLINE int AO_short_compare_and_swap_release(volatile unsigned short *addr, unsigned short old, unsigned short new_val) { unsigned short oldval; _Asm_mov_to_ar(_AREG_CCV, old, _UP_MEM_FENCE); oldval = _Asm_cmpxchg(_SZ_B, _SEM_REL, addr, new_val, _LDHINT_NONE, _UP_MEM_FENCE); /* Hopefully the compiler knows not to reorder the above two? */ return (oldval == old); } #define AO_HAVE_short_compare_and_swap_release #ifndef __LP64__ # include "../ao_t_is_int.h" #endif ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/ibmc/powerpc.h ================================================ /* FIXME. This is only a placeholder for the AIX compiler. */ /* It doesn't work. Please send a patch. */ /* Memory model documented at http://www-106.ibm.com/developerworks/ */ /* eserver/articles/archguide.html and (clearer) */ /* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */ /* There appears to be no implicit ordering between any kind of */ /* independent memory references. */ /* Architecture enforces some ordering based on control dependence. */ /* I don't know if that could help. */ /* Data-dependent loads are always ordered. */ /* Based on the above references, eieio is intended for use on */ /* uncached memory, which we don't support. It does not order loads */ /* from cached memory. */ /* Thanks to Maged Michael, Doug Lea, and Roger Hoover for helping to */ /* track some of this down and correcting my misunderstandings. -HB */ #include "../all_aligned_atomic_load_store.h" void AO_sync(void); #pragma mc_func AO_sync { "7c0004ac" } #ifdef __NO_LWSYNC__ # define AO_lwsync AO_sync #else void AO_lwsync(void); #pragma mc_func AO_lwsync { "7c2004ac" } #endif #define AO_nop_write() AO_lwsync() #define AO_HAVE_nop_write #define AO_nop_read() AO_lwsync() #define AO_HAVE_nop_read /* We explicitly specify load_acquire and store_release, since these */ /* rely on the fact that lwsync is also a LoadStore barrier. */ AO_INLINE AO_t AO_load_acquire(const volatile AO_t *addr) { AO_t result = *addr; AO_lwsync(); return result; } #define AO_HAVE_load_acquire AO_INLINE void AO_store_release(volatile AO_t *addr, AO_t value) { AO_lwsync(); *addr = value; } #define AO_HAVE_load_acquire /* This is similar to the code in the garbage collector. Deleting */ /* this and having it synthesized from compare_and_swap would probably */ /* only cost us a load immediate instruction. */ /*AO_INLINE AO_TS_VAL_t AO_test_and_set(volatile AO_TS_t *addr) { # error FIXME Implement me } #define AO_HAVE_test_and_set*/ AO_INLINE AO_TS_VAL_t AO_test_and_set_acquire(volatile AO_TS_t *addr) { AO_TS_VAL_t result = AO_test_and_set(addr); AO_lwsync(); return result; } #define AO_HAVE_test_and_set_acquire AO_INLINE AO_TS_VAL_t AO_test_and_set_release(volatile AO_TS_t *addr) { AO_lwsync(); return AO_test_and_set(addr); } #define AO_HAVE_test_and_set_release AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { AO_TS_VAL_t result; AO_lwsync(); result = AO_test_and_set(addr); AO_lwsync(); return result; } #define AO_HAVE_test_and_set_full /*AO_INLINE AO_t AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) { # error FIXME Implement me } #define AO_HAVE_compare_and_swap*/ AO_INLINE AO_t AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t result = AO_compare_and_swap(addr, old, new_val); AO_lwsync(); return result; } #define AO_HAVE_compare_and_swap_acquire AO_INLINE AO_t AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_lwsync(); return AO_compare_and_swap(addr, old, new_val); } #define AO_HAVE_compare_and_swap_release AO_INLINE AO_t AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t result; AO_lwsync(); result = AO_compare_and_swap(addr, old, new_val); AO_lwsync(); return result; } #define AO_HAVE_compare_and_swap_full /* FIXME: We should also implement fetch_and_add and or primitives */ /* directly. */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/icc/ia64.h ================================================ /* * Copyright (c) 2003 by Hewlett-Packard Company. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * This file specifies Itanimum primitives for use with the Intel (ecc) * compiler. We use intrinsics instead of the inline assembly code in the * gcc file. */ #include "../all_atomic_load_store.h" #include "../test_and_set_t_is_char.h" #include /* The acquire release semantics of volatile can be turned off. And volatile */ /* operations in icc9 don't imply ordering with respect to other nonvolatile */ /* operations. */ #define AO_INTEL_PTR_t void * AO_INLINE AO_t AO_load_acquire(const volatile AO_t *p) { return (AO_t)(__ld8_acq((AO_INTEL_PTR_t)p)); } #define AO_HAVE_load_acquire AO_INLINE void AO_store_release(volatile AO_t *p, AO_t val) { __st8_rel((AO_INTEL_PTR_t)p, (__int64)val); } #define AO_HAVE_store_release AO_INLINE unsigned char AO_char_load_acquire(const volatile unsigned char *p) { /* A normal volatile load generates an ld.acq */ return (__ld1_acq((AO_INTEL_PTR_t)p)); } #define AO_HAVE_char_load_acquire AO_INLINE void AO_char_store_release(volatile unsigned char *p, unsigned char val) { __st1_rel((AO_INTEL_PTR_t)p, val); } #define AO_HAVE_char_store_release AO_INLINE unsigned short AO_short_load_acquire(const volatile unsigned short *p) { /* A normal volatile load generates an ld.acq */ return (__ld2_acq((AO_INTEL_PTR_t)p)); } #define AO_HAVE_short_load_acquire AO_INLINE void AO_short_store_release(volatile unsigned short *p, unsigned short val) { __st2_rel((AO_INTEL_PTR_t)p, val); } #define AO_HAVE_short_store_release AO_INLINE unsigned int AO_int_load_acquire(const volatile unsigned int *p) { /* A normal volatile load generates an ld.acq */ return (__ld4_acq((AO_INTEL_PTR_t)p)); } #define AO_HAVE_int_load_acquire AO_INLINE void AO_int_store_release(volatile unsigned int *p, unsigned int val) { __st4_rel((AO_INTEL_PTR_t)p, val); } #define AO_HAVE_int_store_release AO_INLINE void AO_nop_full(void) { __mf(); } #define AO_HAVE_nop_full AO_INLINE AO_t AO_fetch_and_add1_acquire (volatile AO_t *p) { return __fetchadd8_acq((unsigned __int64 *)p, 1); } #define AO_HAVE_fetch_and_add1_acquire AO_INLINE AO_t AO_fetch_and_add1_release (volatile AO_t *p) { return __fetchadd8_rel((unsigned __int64 *)p, 1); } #define AO_HAVE_fetch_and_add1_release AO_INLINE AO_t AO_fetch_and_sub1_acquire (volatile AO_t *p) { return __fetchadd8_acq((unsigned __int64 *)p, -1); } #define AO_HAVE_fetch_and_sub1_acquire AO_INLINE AO_t AO_fetch_and_sub1_release (volatile AO_t *p) { return __fetchadd8_rel((unsigned __int64 *)p, -1); } #define AO_HAVE_fetch_and_sub1_release AO_INLINE int AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t oldval; oldval = _InterlockedCompareExchange64_acq(addr, new_val, old); return (oldval == old); } #define AO_HAVE_compare_and_swap_acquire AO_INLINE int AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) { AO_t oldval; oldval = _InterlockedCompareExchange64_rel(addr, new_val, old); return (oldval == old); } #define AO_HAVE_compare_and_swap_release AO_INLINE int AO_char_compare_and_swap_acquire(volatile unsigned char *addr, unsigned char old, unsigned char new_val) { unsigned char oldval; oldval = _InterlockedCompareExchange8_acq(addr, new_val, old); return (oldval == old); } #define AO_HAVE_char_compare_and_swap_acquire AO_INLINE int AO_char_compare_and_swap_release(volatile unsigned char *addr, unsigned char old, unsigned char new_val) { unsigned char oldval; oldval = _InterlockedCompareExchange8_rel(addr, new_val, old); return (oldval == old); } #define AO_HAVE_char_compare_and_swap_release AO_INLINE int AO_short_compare_and_swap_acquire(volatile unsigned short *addr, unsigned short old, unsigned short new_val) { unsigned short oldval; oldval = _InterlockedCompareExchange16_acq(addr, new_val, old); return (oldval == old); } #define AO_HAVE_short_compare_and_swap_acquire AO_INLINE int AO_short_compare_and_swap_release(volatile unsigned short *addr, unsigned short old, unsigned short new_val) { unsigned short oldval; oldval = _InterlockedCompareExchange16_rel(addr, new_val, old); return (oldval == old); } #define AO_HAVE_short_compare_and_swap_release AO_INLINE int AO_int_compare_and_swap_acquire(volatile unsigned int *addr, unsigned int old, unsigned int new_val) { unsigned int oldval; oldval = _InterlockedCompareExchange_acq(addr, new_val, old); return (oldval == old); } #define AO_HAVE_int_compare_and_swap_acquire AO_INLINE int AO_int_compare_and_swap_release(volatile unsigned int *addr, unsigned int old, unsigned int new_val) { unsigned int oldval; oldval = _InterlockedCompareExchange_rel(addr, new_val, old); return (oldval == old); } #define AO_HAVE_int_compare_and_swap_release ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/int_acquire_release_volatile.h ================================================ /* * Copyright (c) 2003-2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * This file adds definitions appropriate for environments in which an unsigned * int volatile load has acquire semantics, and an unsigned short volatile * store has release semantics. This is true with the standard Itanium ABI. */ #if !defined(AO_GCC_BARRIER) # if defined(__GNUC__) # define AO_GCC_BARRIER() AO_compiler_barrier() # else # define AO_GCC_BARRIER() # endif #endif AO_INLINE unsigned int AO_int_load_acquire(const volatile unsigned int *p) { unsigned int result = *p; /* A normal volatile load generates an ld.acq */ AO_GCC_BARRIER(); return result; } #define AO_HAVE_int_load_acquire AO_INLINE void AO_int_store_release(volatile unsigned int *p, unsigned int val) { AO_GCC_BARRIER(); /* A normal volatile store generates an st.rel */ *p = val; } #define AO_HAVE_int_store_release ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/int_aligned_atomic_load_store.h ================================================ /* * Copyright (c) 2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Definitions for architectures on which loads and stores of unsigned int are * atomic for all legal alignments. */ AO_INLINE unsigned int AO_int_load(const volatile unsigned int *addr) { assert(((size_t)addr & (sizeof(unsigned int) - 1)) == 0); /* Cast away the volatile for architectures like IA64 where */ /* volatile adds barrier semantics. */ return (*(unsigned int *)addr); } #define AO_HAVE_int_load AO_INLINE void AO_int_store(volatile unsigned int *addr, unsigned int new_val) { assert(((size_t)addr & (sizeof(unsigned int) - 1)) == 0); (*(unsigned int *)addr) = new_val; } #define AO_HAVE_int_store ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/int_atomic_load_store.h ================================================ /* * Copyright (c) 2003 by Hewlett-Packard Company. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Definitions for architectures on which loads and stores of unsigned int are * atomic for all legal alignments. */ AO_INLINE unsigned int AO_int_load(const volatile unsigned int *addr) { /* Cast away the volatile for architectures like IA64 where */ /* volatile adds barrier semantics. */ return (*(const unsigned int *)addr); } #define AO_HAVE_int_load AO_INLINE void AO_int_store(volatile unsigned int *addr, unsigned int new_val) { (*(unsigned int *)addr) = new_val; } #define AO_HAVE_int_store ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/msftc/arm.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "../read_ordered.h" #ifndef AO_ASSUME_WINDOWS98 /* CAS is always available */ # define AO_ASSUME_WINDOWS98 #endif #include "common32_defs.h" /* FIXME: Do _InterlockedOps really have a full memory barrier? */ /* (MSDN WinCE docs say nothing about it.) */ #if _M_ARM >= 6 /* ARMv6 is the first architecture providing support for simple LL/SC. */ #include "../standard_ao_double_t.h" /* If only a single processor is used, we can define AO_UNIPROCESSOR */ /* and do not need to access CP15 for ensuring a DMB at all. */ #ifdef AO_UNIPROCESSOR AO_INLINE void AO_nop_full(void) {} # define AO_HAVE_nop_full #else /* AO_nop_full() is emulated using AO_test_and_set_full(). */ #endif #include "../test_and_set_t_is_ao_t.h" /* AO_test_and_set() is emulated using CAS. */ AO_INLINE AO_t AO_load(const volatile AO_t *addr) { /* Cast away the volatile in case it adds fence semantics */ return (*(const AO_t *)addr); } #define AO_HAVE_load AO_INLINE void AO_store_full(volatile AO_t *addr, AO_t value) { /* Emulate atomic store using CAS. */ AO_t old = AO_load(addr); AO_t current; # ifdef AO_OLD_STYLE_INTERLOCKED_COMPARE_EXCHANGE while ((current = (AO_t)_InterlockedCompareExchange( (PVOID AO_INTERLOCKED_VOLATILE *)addr, (PVOID)value, (PVOID)old)) != old) old = current; # else while ((current = (AO_t)_InterlockedCompareExchange( (LONG AO_INTERLOCKED_VOLATILE *)addr, (LONG)value, (LONG)old)) != old) old = current; # endif } #define AO_HAVE_store_full /* FIXME: implement AO_compare_double_and_swap_double() */ #else /* _M_ARM < 6 */ /* Some slide set, if it has been red correctly, claims that Loads */ /* followed by either a Load or a Store are ordered, but nothing */ /* else is. It appears that SWP is the only simple memory barrier. */ #include "../all_atomic_load_store.h" #include "../test_and_set_t_is_ao_t.h" /* AO_test_and_set_full() is emulated using CAS. */ #endif /* _M_ARM < 6 */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/msftc/common32_defs.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* This file contains AO primitives based on VC++ built-in intrinsic */ /* functions commonly available across 32-bit architectures. */ /* This file should be included from arch-specific header files. */ /* Define AO_USE_INTERLOCKED_INTRINSICS if _Interlocked primitives */ /* (used below) are available as intrinsic ones for a target arch */ /* (otherwise "Interlocked" functions family is used instead). */ /* Define AO_ASSUME_WINDOWS98 if CAS is available. */ #include /* Seems like over-kill, but that's what MSDN recommends. */ /* And apparently winbase.h is not always self-contained. */ #if _MSC_VER < 1310 || !defined(AO_USE_INTERLOCKED_INTRINSICS) # define _InterlockedIncrement InterlockedIncrement # define _InterlockedDecrement InterlockedDecrement # define _InterlockedExchange InterlockedExchange # define _InterlockedExchangeAdd InterlockedExchangeAdd # define _InterlockedCompareExchange InterlockedCompareExchange # define AO_INTERLOCKED_VOLATILE /**/ #else /* elif _MSC_VER >= 1310 */ # if _MSC_VER >= 1400 # ifndef _WIN32_WCE # include # endif # pragma intrinsic (_ReadWriteBarrier) # else /* elif _MSC_VER < 1400 */ # ifdef __cplusplus extern "C" { # endif LONG __cdecl _InterlockedIncrement(LONG volatile *); LONG __cdecl _InterlockedDecrement(LONG volatile *); LONG __cdecl _InterlockedExchangeAdd(LONG volatile *, LONG); LONG __cdecl _InterlockedExchange(LONG volatile *, LONG); LONG __cdecl _InterlockedCompareExchange(LONG volatile *, LONG /* Exchange */, LONG /* Comp */); # ifdef __cplusplus } # endif # endif /* _MSC_VER < 1400 */ # pragma intrinsic (_InterlockedIncrement) # pragma intrinsic (_InterlockedDecrement) # pragma intrinsic (_InterlockedExchange) # pragma intrinsic (_InterlockedExchangeAdd) # pragma intrinsic (_InterlockedCompareExchange) # define AO_INTERLOCKED_VOLATILE volatile #endif /* _MSC_VER >= 1310 */ AO_INLINE AO_t AO_fetch_and_add_full(volatile AO_t *p, AO_t incr) { return _InterlockedExchangeAdd((LONG AO_INTERLOCKED_VOLATILE *)p, (LONG)incr); } #define AO_HAVE_fetch_and_add_full AO_INLINE AO_t AO_fetch_and_add1_full(volatile AO_t *p) { return _InterlockedIncrement((LONG AO_INTERLOCKED_VOLATILE *)p) - 1; } #define AO_HAVE_fetch_and_add1_full AO_INLINE AO_t AO_fetch_and_sub1_full(volatile AO_t *p) { return _InterlockedDecrement((LONG AO_INTERLOCKED_VOLATILE *)p) + 1; } #define AO_HAVE_fetch_and_sub1_full #ifdef AO_ASSUME_WINDOWS98 /* Returns nonzero if the comparison succeeded. */ AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { # ifdef AO_OLD_STYLE_INTERLOCKED_COMPARE_EXCHANGE return _InterlockedCompareExchange((PVOID AO_INTERLOCKED_VOLATILE *)addr, (PVOID)new_val, (PVOID)old) == (PVOID)old; # else return _InterlockedCompareExchange((LONG AO_INTERLOCKED_VOLATILE *)addr, (LONG)new_val, (LONG)old) == (LONG)old; # endif } # define AO_HAVE_compare_and_swap_full #endif /* AO_ASSUME_WINDOWS98 */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/msftc/x86.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* If AO_ASSUME_WINDOWS98 is defined, we assume Windows 98 or newer. */ /* If AO_ASSUME_VISTA is defined, we assume Windows Server 2003, Vista */ /* or later. */ #include "../all_aligned_atomic_load_store.h" /* Real X86 implementations, except for some old WinChips, appear */ /* to enforce ordering between memory operations, EXCEPT that a later */ /* read can pass earlier writes, presumably due to the visible */ /* presence of store buffers. */ /* We ignore both the WinChips, and the fact that the official specs */ /* seem to be much weaker (and arguably too weak to be usable). */ #include "../ordered_except_wr.h" #include "../test_and_set_t_is_char.h" #ifndef AO_USE_INTERLOCKED_INTRINSICS /* _Interlocked primitives (Inc, Dec, Xchg, Add) are always available */ # define AO_USE_INTERLOCKED_INTRINSICS #endif #include "common32_defs.h" /* As far as we can tell, the lfence and sfence instructions are not */ /* currently needed or useful for cached memory accesses. */ /* Unfortunately mfence doesn't exist everywhere. */ /* IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) is */ /* probably a conservative test for it? */ #if defined(AO_USE_PENTIUM4_INSTRS) AO_INLINE void AO_nop_full(void) { __asm { mfence } } #define AO_HAVE_nop_full #else /* We could use the cpuid instruction. But that seems to be slower */ /* than the default implementation based on test_and_set_full. Thus */ /* we omit that bit of misinformation here. */ #endif AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { __asm { mov eax,0xff ; /* AO_TS_SET */ mov ebx,addr ; xchg byte ptr [ebx],al ; } /* Ignore possible "missing return value" warning here. */ } #define AO_HAVE_test_and_set_full #ifdef _WIN64 # error wrong architecture #endif #ifdef AO_ASSUME_VISTA /* NEC LE-IT: whenever we run on a pentium class machine we have that * certain function */ #include "../standard_ao_double_t.h" #pragma intrinsic (_InterlockedCompareExchange64) /* Returns nonzero if the comparison succeeded. */ AO_INLINE int AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2) { __int64 oldv = (__int64)old_val1 | ((__int64)old_val2 << 32); __int64 newv = (__int64)new_val1 | ((__int64)new_val2 << 32); return _InterlockedCompareExchange64((__int64 volatile *)addr, newv, oldv) == oldv; } #define AO_HAVE_compare_double_and_swap_double_full #ifdef __cplusplus AO_INLINE int AO_double_compare_and_swap_full(volatile AO_double_t *addr, AO_double_t old_val, AO_double_t new_val) { return _InterlockedCompareExchange64((__int64 volatile *)addr, new_val.AO_whole, old_val.AO_whole) == old_val.AO_whole; } #define AO_HAVE_double_compare_and_swap_full #endif /* __cplusplus */ #endif /* AO_ASSUME_VISTA */ #include "../ao_t_is_int.h" ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/msftc/x86_64.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "../all_aligned_atomic_load_store.h" /* Real X86 implementations appear */ /* to enforce ordering between memory operations, EXCEPT that a later */ /* read can pass earlier writes, presumably due to the visible */ /* presence of store buffers. */ /* We ignore the fact that the official specs */ /* seem to be much weaker (and arguably too weak to be usable). */ #include "../ordered_except_wr.h" #ifdef AO_ASM_X64_AVAILABLE # include "../test_and_set_t_is_char.h" #else # include "../test_and_set_t_is_ao_t.h" #endif #include "../standard_ao_double_t.h" #include /* Seems like over-kill, but that's what MSDN recommends. */ /* And apparently winbase.h is not always self-contained. */ /* Assume _MSC_VER >= 1400 */ #include #pragma intrinsic (_ReadWriteBarrier) #pragma intrinsic (_InterlockedIncrement64) #pragma intrinsic (_InterlockedDecrement64) #pragma intrinsic (_InterlockedExchange64) #pragma intrinsic (_InterlockedExchangeAdd64) #pragma intrinsic (_InterlockedCompareExchange64) AO_INLINE AO_t AO_fetch_and_add_full (volatile AO_t *p, AO_t incr) { return _InterlockedExchangeAdd64((LONGLONG volatile *)p, (LONGLONG)incr); } #define AO_HAVE_fetch_and_add_full AO_INLINE AO_t AO_fetch_and_add1_full (volatile AO_t *p) { return _InterlockedIncrement64((LONGLONG volatile *)p) - 1; } #define AO_HAVE_fetch_and_add1_full AO_INLINE AO_t AO_fetch_and_sub1_full (volatile AO_t *p) { return _InterlockedDecrement64((LONGLONG volatile *)p) + 1; } #define AO_HAVE_fetch_and_sub1_full AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { return _InterlockedCompareExchange64((LONGLONG volatile *)addr, (LONGLONG)new_val, (LONGLONG)old) == (LONGLONG)old; } #define AO_HAVE_compare_and_swap_full /* As far as we can tell, the lfence and sfence instructions are not */ /* currently needed or useful for cached memory accesses. */ #ifdef AO_ASM_X64_AVAILABLE AO_INLINE void AO_nop_full(void) { /* Note: "mfence" (SSE2) is supported on all x86_64/amd64 chips. */ __asm { mfence } } #define AO_HAVE_nop_full AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { __asm { mov rax,AO_TS_SET ; mov rbx,addr ; xchg byte ptr [rbx],al ; } } #define AO_HAVE_test_and_set_full #endif /* AO_ASM_X64_AVAILABLE */ #ifdef AO_CMPXCHG16B_AVAILABLE /* AO_compare_double_and_swap_double_full needs implementation for Win64. * Also see ../gcc/x86_64.h for partial old Opteron workaround. */ # if _MSC_VER >= 1500 #pragma intrinsic (_InterlockedCompareExchange128) AO_INLINE int AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2) { __int64 comparandResult[2]; comparandResult[0] = old_val1; /* low */ comparandResult[1] = old_val2; /* high */ return _InterlockedCompareExchange128((volatile __int64 *)addr, new_val2 /* high */, new_val1 /* low */, comparandResult); } # define AO_HAVE_compare_double_and_swap_double_full # elif defined(AO_ASM_X64_AVAILABLE) /* If there is no intrinsic _InterlockedCompareExchange128 then we * need basically what's given below. */ AO_INLINE int AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2) { __asm { mov rdx,QWORD PTR [old_val2] ; mov rax,QWORD PTR [old_val1] ; mov rcx,QWORD PTR [new_val2] ; mov rbx,QWORD PTR [new_val1] ; lock cmpxchg16b [addr] ; setz rax ; } } # define AO_HAVE_compare_double_and_swap_double_full # endif /* _MSC_VER >= 1500 || AO_ASM_X64_AVAILABLE */ #endif /* AO_CMPXCHG16B_AVAILABLE */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/ordered.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * These are common definitions for architectures that provide processor * ordered memory operations. */ #include "ordered_except_wr.h" AO_INLINE void AO_nop_full(void) { AO_compiler_barrier(); } #define AO_HAVE_nop_full ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/ordered_except_wr.h ================================================ /* * Copyright (c) 2003 by Hewlett-Packard Company. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * These are common definitions for architectures that provide processor * ordered memory operations except that a later read may pass an * earlier write. Real x86 implementations seem to be in this category, * except apparently for some IDT WinChips, which we ignore. */ #include "read_ordered.h" AO_INLINE void AO_nop_write(void) { AO_compiler_barrier(); /* sfence according to Intel docs. Pentium 3 and up. */ /* Unnecessary for cached accesses? */ } #define AO_HAVE_NOP_WRITE #if defined(AO_HAVE_store) AO_INLINE void AO_store_write(volatile AO_t *addr, AO_t val) { AO_compiler_barrier(); AO_store(addr, val); } # define AO_HAVE_store_write # define AO_store_release(addr, val) AO_store_write(addr, val) # define AO_HAVE_store_release #endif /* AO_HAVE_store */ #if defined(AO_HAVE_char_store) AO_INLINE void AO_char_store_write(volatile unsigned char *addr, unsigned char val) { AO_compiler_barrier(); AO_char_store(addr, val); } # define AO_HAVE_char_store_write # define AO_char_store_release(addr, val) AO_char_store_write(addr, val) # define AO_HAVE_char_store_release #endif /* AO_HAVE_char_store */ #if defined(AO_HAVE_short_store) AO_INLINE void AO_short_store_write(volatile unsigned short *addr, unsigned short val) { AO_compiler_barrier(); AO_short_store(addr, val); } # define AO_HAVE_short_store_write # define AO_short_store_release(addr, val) AO_short_store_write(addr, val) # define AO_HAVE_short_store_release #endif /* AO_HAVE_short_store */ #if defined(AO_HAVE_int_store) AO_INLINE void AO_int_store_write(volatile unsigned int *addr, unsigned int val) { AO_compiler_barrier(); AO_int_store(addr, val); } # define AO_HAVE_int_store_write # define AO_int_store_release(addr, val) AO_int_store_write(addr, val) # define AO_HAVE_int_store_release #endif /* AO_HAVE_int_store */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/read_ordered.h ================================================ /* * Copyright (c) 2003 by Hewlett-Packard Company. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * These are common definitions for architectures that provide processor * ordered memory operations except that a later read may pass an * earlier write. Real x86 implementations seem to be in this category, * except apparently for some IDT WinChips, which we ignore. */ AO_INLINE void AO_nop_read(void) { AO_compiler_barrier(); } #define AO_HAVE_NOP_READ #ifdef AO_HAVE_load AO_INLINE AO_t AO_load_read(const volatile AO_t *addr) { AO_t result = AO_load(addr); AO_compiler_barrier(); return result; } #define AO_HAVE_load_read #define AO_load_acquire(addr) AO_load_read(addr) #define AO_HAVE_load_acquire #endif /* AO_HAVE_load */ #ifdef AO_HAVE_char_load AO_INLINE AO_t AO_char_load_read(const volatile unsigned char *addr) { AO_t result = AO_char_load(addr); AO_compiler_barrier(); return result; } #define AO_HAVE_char_load_read #define AO_char_load_acquire(addr) AO_char_load_read(addr) #define AO_HAVE_char_load_acquire #endif /* AO_HAVE_char_load */ #ifdef AO_HAVE_short_load AO_INLINE AO_t AO_short_load_read(const volatile unsigned short *addr) { AO_t result = AO_short_load(addr); AO_compiler_barrier(); return result; } #define AO_HAVE_short_load_read #define AO_short_load_acquire(addr) AO_short_load_read(addr) #define AO_HAVE_short_load_acquire #endif /* AO_HAVE_short_load */ #ifdef AO_HAVE_int_load AO_INLINE AO_t AO_int_load_read(const volatile unsigned int *addr) { AO_t result = AO_int_load(addr); AO_compiler_barrier(); return result; } #define AO_HAVE_int_load_read #define AO_int_load_acquire(addr) AO_int_load_read(addr) #define AO_HAVE_int_load_acquire #endif /* AO_HAVE_int_load */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/short_acquire_release_volatile.h ================================================ /* * Copyright (c) 2003-2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * This file adds definitions appropriate for environments in which an unsigned short * volatile load has acquire semantics, and an unsigned short volatile store has release * semantics. This is true with the standard Itanium ABI. */ #if !defined(AO_GCC_BARRIER) # if defined(__GNUC__) # define AO_GCC_BARRIER() AO_compiler_barrier() # else # define AO_GCC_BARRIER() # endif #endif AO_INLINE unsigned short AO_short_load_acquire(const volatile unsigned short *p) { unsigned short result = *p; /* A normal volatile load generates an ld.acq */ AO_GCC_BARRIER(); return result; } #define AO_HAVE_short_load_acquire AO_INLINE void AO_short_store_release(volatile unsigned short *p, unsigned short val) { AO_GCC_BARRIER(); /* A normal volatile store generates an st.rel */ *p = val; } #define AO_HAVE_short_store_release ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/short_aligned_atomic_load_store.h ================================================ /* * Copyright (c) 2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Definitions for architectures on which loads and stores of unsigned short * are atomic for all legal alignments. */ AO_INLINE unsigned short AO_short_load(const volatile unsigned short *addr) { assert(((size_t)addr & (sizeof(unsigned short) - 1)) == 0); /* Cast away the volatile for architectures like IA64 where */ /* volatile adds barrier semantics. */ return (*(unsigned short *)addr); } #define AO_HAVE_short_load AO_INLINE void AO_short_store(volatile unsigned short *addr, unsigned short new_val) { assert(((size_t)addr & (sizeof(unsigned short) - 1)) == 0); (*(unsigned short *)addr) = new_val; } #define AO_HAVE_short_store ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/short_atomic_load_store.h ================================================ /* * Copyright (c) 2003 by Hewlett-Packard Company. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Definitions for architectures on which loads and stores of unsigned short * are atomic for all legal alignments. */ AO_INLINE unsigned short AO_short_load(const volatile unsigned short *addr) { /* Cast away the volatile for architectures like IA64 where */ /* volatile adds barrier semantics. */ return (*(const unsigned short *)addr); } #define AO_HAVE_short_load AO_INLINE void AO_short_store(volatile unsigned short *addr, unsigned short new_val) { (*(unsigned short *)addr) = new_val; } #define AO_HAVE_short_store ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/standard_ao_double_t.h ================================================ /* NEC LE-IT: For 64Bit OS we extend the double type to hold two int64's * * x86-64: __m128 serves as placeholder which also requires the compiler * to align it on 16 byte boundary (as required by cmpxchg16. * Similar things could be done for PowerPC 64bit using a VMX data type... */ #if (defined(__x86_64__) && defined(__GNUC__)) || defined(_WIN64) # include typedef __m128 double_ptr_storage; #elif defined(_WIN32) && !defined(__GNUC__) typedef unsigned __int64 double_ptr_storage; #else typedef unsigned long long double_ptr_storage; #endif # define AO_HAVE_DOUBLE_PTR_STORAGE typedef union { double_ptr_storage AO_whole; struct {AO_t AO_v1; AO_t AO_v2;} AO_parts; } AO_double_t; #define AO_HAVE_double_t #define AO_val1 AO_parts.AO_v1 #define AO_val2 AO_parts.AO_v2 ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/sunc/sparc.h ================================================ /* * Copyright (c) 2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "../all_atomic_load_store.h" /* Real SPARC code uses TSO: */ #include "../ordered_except_wr.h" /* Test_and_set location is just a byte. */ #include "../test_and_set_t_is_char.h" extern AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr); /* Implemented in separate .S file, for now. */ #define AO_HAVE_test_and_set_full /* FIXME: Like the gcc version, this needs to be extended for V8 */ /* and V9. */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/sunc/x86.h ================================================ /* * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * * Some of the machine specific code was borrowed from our GC distribution. */ /* The following really assume we have a 486 or better. */ #include "../all_aligned_atomic_load_store.h" /* Real X86 implementations, except for some old WinChips, appear */ /* to enforce ordering between memory operations, EXCEPT that a later */ /* read can pass earlier writes, presumably due to the visible */ /* presence of store buffers. */ /* We ignore both the WinChips, and the fact that the official specs */ /* seem to be much weaker (and arguably too weak to be usable). */ #include "../ordered_except_wr.h" #include "../test_and_set_t_is_char.h" #include "../standard_ao_double_t.h" #if defined(AO_USE_PENTIUM4_INSTRS) AO_INLINE void AO_nop_full(void) { __asm__ __volatile__("mfence" : : : "memory"); } #define AO_HAVE_nop_full #else /* We could use the cpuid instruction. But that seems to be slower */ /* than the default implementation based on test_and_set_full. Thus */ /* we omit that bit of misinformation here. */ #endif /* As far as we can tell, the lfence and sfence instructions are not */ /* currently needed or useful for cached memory accesses. */ /* Really only works for 486 and later */ AO_INLINE AO_t AO_fetch_and_add_full (volatile AO_t *p, AO_t incr) { AO_t result; __asm__ __volatile__ ("lock; xaddl %0, %1" : "=r" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ : "memory"); return result; } #define AO_HAVE_fetch_and_add_full AO_INLINE unsigned char AO_char_fetch_and_add_full (volatile unsigned char *p, unsigned char incr) { unsigned char result; __asm__ __volatile__ ("lock; xaddb %0, %1" : "=q" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ : "memory"); return result; } #define AO_HAVE_char_fetch_and_add_full AO_INLINE unsigned short AO_short_fetch_and_add_full (volatile unsigned short *p, unsigned short incr) { unsigned short result; __asm__ __volatile__ ("lock; xaddw %0, %1" : "=r" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ : "memory"); return result; } #define AO_HAVE_short_fetch_and_add_full /* Really only works for 486 and later */ AO_INLINE void AO_or_full (volatile AO_t *p, AO_t incr) { __asm__ __volatile__ ("lock; orl %1, %0" : "=m" (*p) : "r" (incr) /* , "m" (*p) */ : "memory"); } #define AO_HAVE_or_full AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { AO_TS_t oldval; /* Note: the "xchg" instruction does not need a "lock" prefix */ __asm__ __volatile__("xchg %0, %1" : "=q"(oldval), "=m"(*addr) : "0"(0xff) /* , "m"(*addr) */ : "memory"); return (AO_TS_VAL_t)oldval; } #define AO_HAVE_test_and_set_full /* Returns nonzero if the comparison succeeded. */ AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { char result; __asm__ __volatile__("lock; cmpxchgl %2, %0; setz %1" : "=m"(*addr), "=a"(result) : "r" (new_val), "a"(old) : "memory"); return (int) result; } #define AO_HAVE_compare_and_swap_full #if 0 /* FIXME: not tested (and probably wrong). Besides, */ /* it tickles a bug in Sun C 5.10 (when optimizing). */ /* Returns nonzero if the comparison succeeded. */ /* Really requires at least a Pentium. */ AO_INLINE int AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2) { char result; #if __PIC__ /* If PIC is turned on, we can't use %ebx as it is reserved for the GOT pointer. We can save and restore %ebx because GCC won't be using it for anything else (such as any of the m operands) */ __asm__ __volatile__("pushl %%ebx;" /* save ebx used for PIC GOT ptr */ "movl %6,%%ebx;" /* move new_val2 to %ebx */ "lock; cmpxchg8b %0; setz %1;" "pop %%ebx;" /* restore %ebx */ : "=m"(*addr), "=a"(result) : "m"(*addr), "d" (old_val2), "a" (old_val1), "c" (new_val2), "m" (new_val1) : "memory"); #else /* We can't just do the same thing in non-PIC mode, because GCC * might be using %ebx as the memory operand. We could have ifdef'd * in a clobber, but there's no point doing the push/pop if we don't * have to. */ __asm__ __volatile__("lock; cmpxchg8b %0; setz %1;" : "=m"(*addr), "=a"(result) : /* "m"(*addr), */ "d" (old_val2), "a" (old_val1), "c" (new_val2), "b" (new_val1) : "memory"); #endif return (int) result; } #define AO_HAVE_compare_double_and_swap_double_full #endif #include "../ao_t_is_int.h" ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/sunc/x86_64.h ================================================ /* * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. * * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. * * Permission is hereby granted to use or copy this program * for any purpose, provided the above notices are retained on all copies. * Permission to modify the code and to distribute modified code is granted, * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. * * Some of the machine specific code was borrowed from our GC distribution. */ #include "../all_aligned_atomic_load_store.h" /* Real X86 implementations, appear */ /* to enforce ordering between memory operations, EXCEPT that a later */ /* read can pass earlier writes, presumably due to the visible */ /* presence of store buffers. */ /* We ignore the fact that the official specs */ /* seem to be much weaker (and arguably too weak to be usable). */ #include "../ordered_except_wr.h" #include "../test_and_set_t_is_char.h" #include "../standard_ao_double_t.h" AO_INLINE void AO_nop_full(void) { /* Note: "mfence" (SSE2) is supported on all x86_64/amd64 chips. */ __asm__ __volatile__("mfence" : : : "memory"); } #define AO_HAVE_nop_full /* As far as we can tell, the lfence and sfence instructions are not */ /* currently needed or useful for cached memory accesses. */ AO_INLINE AO_t AO_fetch_and_add_full (volatile AO_t *p, AO_t incr) { AO_t result; __asm__ __volatile__ ("lock; xaddq %0, %1" : "=r" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ : "memory"); return result; } #define AO_HAVE_fetch_and_add_full AO_INLINE unsigned char AO_char_fetch_and_add_full (volatile unsigned char *p, unsigned char incr) { unsigned char result; __asm__ __volatile__ ("lock; xaddb %0, %1" : "=q" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ : "memory"); return result; } #define AO_HAVE_char_fetch_and_add_full AO_INLINE unsigned short AO_short_fetch_and_add_full (volatile unsigned short *p, unsigned short incr) { unsigned short result; __asm__ __volatile__ ("lock; xaddw %0, %1" : "=r" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ : "memory"); return result; } #define AO_HAVE_short_fetch_and_add_full AO_INLINE unsigned int AO_int_fetch_and_add_full (volatile unsigned int *p, unsigned int incr) { unsigned int result; __asm__ __volatile__ ("lock; xaddl %0, %1" : "=r" (result), "=m" (*p) : "0" (incr) /* , "m" (*p) */ : "memory"); return result; } #define AO_HAVE_int_fetch_and_add_full AO_INLINE void AO_or_full (volatile AO_t *p, AO_t incr) { __asm__ __volatile__ ("lock; orq %1, %0" : "=m" (*p) : "r" (incr) /* , "m" (*p) */ : "memory"); } #define AO_HAVE_or_full AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr) { AO_TS_t oldval; /* Note: the "xchg" instruction does not need a "lock" prefix */ __asm__ __volatile__("xchg %0, %1" : "=q"(oldval), "=m"(*addr) : "0"(0xff) /* , "m"(*addr) */ : "memory"); return (AO_TS_VAL_t)oldval; } #define AO_HAVE_test_and_set_full /* Returns nonzero if the comparison succeeded. */ AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) { char result; __asm__ __volatile__("lock; cmpxchgq %2, %0; setz %1" : "=m"(*addr), "=a"(result) : "r" (new_val), "a"(old) : "memory"); return (int) result; } #define AO_HAVE_compare_and_swap_full #ifdef AO_CMPXCHG16B_AVAILABLE /* NEC LE-IT: older AMD Opterons are missing this instruction. * On these machines SIGILL will be thrown. * Define AO_WEAK_DOUBLE_CAS_EMULATION to have an emulated * (lock based) version available */ /* HB: Changed this to not define either by default. There are * enough machines and tool chains around on which cmpxchg16b * doesn't work. And the emulation is unsafe by our usual rules. * Hoewever both are clearly useful in certain cases. */ AO_INLINE int AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2) { char result; __asm__ __volatile__("lock; cmpxchg16b %0; setz %1" : "=m"(*addr), "=a"(result) : "m"(*addr), "d" (old_val2), "a" (old_val1), "c" (new_val2), "b" (new_val1) : "memory"); return (int) result; } #define AO_HAVE_compare_double_and_swap_double_full #else /* this one provides spinlock based emulation of CAS implemented in */ /* atomic_ops.c. We probably do not want to do this here, since it is */ /* not atomic with respect to other kinds of updates of *addr. On the */ /* other hand, this may be a useful facility on occasion. */ #ifdef AO_WEAK_DOUBLE_CAS_EMULATION int AO_compare_double_and_swap_double_emulation(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2); AO_INLINE int AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, AO_t old_val1, AO_t old_val2, AO_t new_val1, AO_t new_val2) { return AO_compare_double_and_swap_double_emulation(addr, old_val1, old_val2, new_val1, new_val2); } #define AO_HAVE_compare_double_and_swap_double_full #endif /* AO_WEAK_DOUBLE_CAS_EMULATION */ #endif /* AO_CMPXCHG16B_AVAILABLE */ ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/test_and_set_t_is_ao_t.h ================================================ /* * Copyright (c) 2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * These are common definitions for architectures on which test_and_set * operates on pointer-sized quantities, the "clear" value contains * all zeroes, and the "set" value contains only one lowest bit set. * This can be used if test_and_set is synthesized from compare_and_swap. */ typedef enum {AO_TS_clear = 0, AO_TS_set = 1} AO_TS_val; #define AO_TS_VAL_t AO_TS_val #define AO_TS_CLEAR AO_TS_clear #define AO_TS_SET AO_TS_set #define AO_TS_t AO_t #define AO_AO_TS_T 1 ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops/sysdeps/test_and_set_t_is_char.h ================================================ /* * Copyright (c) 2004 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * These are common definitions for architectures on which test_and_set * operates on byte sized quantities, the "clear" value contains * all zeroes, and the "set" value contains all ones. */ #define AO_TS_t unsigned char typedef enum {AO_BYTE_TS_clear = 0, AO_BYTE_TS_set = 0xff} AO_BYTE_TS_val; #define AO_TS_VAL_t AO_BYTE_TS_val #define AO_TS_CLEAR AO_BYTE_TS_clear #define AO_TS_SET AO_BYTE_TS_set #define AO_CHAR_TS_T 1 ================================================ FILE: datastructures/trevor_brown_abtree/common/atomic_ops/atomic_ops.h ================================================ /* * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef ATOMIC_OPS_H #define ATOMIC_OPS_H #include #include /* We define various atomic operations on memory in a */ /* machine-specific way. Unfortunately, this is complicated */ /* by the fact that these may or may not be combined with */ /* various memory barriers. Thus the actual operations we */ /* define have the form AO__, for all */ /* plausible combinations of and . */ /* This of course results in a mild combinatorial explosion. */ /* To deal with it, we try to generate derived */ /* definitions for as many of the combinations as we can, as */ /* automatically as possible. */ /* */ /* Our assumption throughout is that the programmer will */ /* specify the least demanding operation and memory barrier */ /* that will guarantee correctness for the implementation. */ /* Our job is to find the least expensive way to implement it */ /* on the applicable hardware. In many cases that will */ /* involve, for example, a stronger memory barrier, or a */ /* combination of hardware primitives. */ /* */ /* Conventions: */ /* "plain" atomic operations are not guaranteed to include */ /* a barrier. The suffix in the name specifies the barrier */ /* type. Suffixes are: */ /* _release: Earlier operations may not be delayed past it. */ /* _acquire: Later operations may not move ahead of it. */ /* _read: Subsequent reads must follow this operation and */ /* preceding reads. */ /* _write: Earlier writes precede both this operation and */ /* later writes. */ /* _full: Ordered with respect to both earlier and later memops.*/ /* _release_write: Ordered with respect to earlier writes. */ /* _acquire_read: Ordered with respect to later reads. */ /* */ /* Currently we try to define the following atomic memory */ /* operations, in combination with the above barriers: */ /* AO_nop */ /* AO_load */ /* AO_store */ /* AO_test_and_set (binary) */ /* AO_fetch_and_add */ /* AO_fetch_and_add1 */ /* AO_fetch_and_sub1 */ /* AO_or */ /* AO_compare_and_swap */ /* */ /* Note that atomicity guarantees are valid only if both */ /* readers and writers use AO_ operations to access the */ /* shared value, while ordering constraints are intended to */ /* apply all memory operations. If a location can potentially */ /* be accessed simultaneously from multiple threads, and one of */ /* those accesses may be a write access, then all such */ /* accesses to that location should be through AO_ primitives. */ /* However if AO_ operations enforce sufficient ordering to */ /* ensure that a location x cannot be accessed concurrently, */ /* or can only be read concurrently, then x can be accessed */ /* via ordinary references and assignments. */ /* */ /* Compare_and_exchange takes an address and an expected old */ /* value and a new value, and returns an int. Nonzero */ /* indicates that it succeeded. */ /* Test_and_set takes an address, atomically replaces it by */ /* AO_TS_SET, and returns the prior value. */ /* An AO_TS_t location can be reset with the */ /* AO_CLEAR macro, which normally uses AO_store_release. */ /* AO_fetch_and_add takes an address and an AO_t increment */ /* value. The AO_fetch_and_add1 and AO_fetch_and_sub1 variants */ /* are provided, since they allow faster implementations on */ /* some hardware. AO_or atomically ors an AO_t value into a */ /* memory location, but does not provide access to the original.*/ /* */ /* We expect this list to grow slowly over time. */ /* */ /* Note that AO_nop_full is a full memory barrier. */ /* */ /* Note that if some data is initialized with */ /* data.x = ...; data.y = ...; ... */ /* AO_store_release_write(&data_is_initialized, 1) */ /* then data is guaranteed to be initialized after the test */ /* if (AO_load_release_read(&data_is_initialized)) ... */ /* succeeds. Furthermore, this should generate near-optimal */ /* code on all common platforms. */ /* */ /* All operations operate on unsigned AO_t, which */ /* is the natural word size, and usually unsigned long. */ /* It is possible to check whether a particular operation op */ /* is available on a particular platform by checking whether */ /* AO_HAVE_op is defined. We make heavy use of these macros */ /* internally. */ /* The rest of this file basically has three sections: */ /* */ /* Some utility and default definitions. */ /* */ /* The architecture dependent section: */ /* This defines atomic operations that have direct hardware */ /* support on a particular platform, mostly by including the */ /* appropriate compiler- and hardware-dependent file. */ /* */ /* The synthesis section: */ /* This tries to define other atomic operations in terms of */ /* those that are explicitly available on the platform. */ /* This section is hardware independent. */ /* We make no attempt to synthesize operations in ways that */ /* effectively introduce locks, except for the debugging/demo */ /* pthread-based implementation at the beginning. A more */ /* realistic implementation that falls back to locks could be */ /* added as a higher layer. But that would sacrifice */ /* usability from signal handlers. */ /* The synthesis section is implemented almost entirely in */ /* atomic_ops_generalize.h. */ /* Some common defaults. Overridden for some architectures. */ #define AO_t size_t /* The test_and_set primitive returns an AO_TS_VAL_t value. */ /* AO_TS_t is the type of an in-memory test-and-set location. */ #define AO_TS_INITIALIZER (AO_t)AO_TS_CLEAR /* Platform-dependent stuff: */ #if defined(__GNUC__) || defined(_MSC_VER) || defined(__INTEL_COMPILER) \ || defined(__DMC__) || defined(__WATCOMC__) # define AO_INLINE static __inline #elif defined(__sun) # define AO_INLINE static inline #else # define AO_INLINE static #endif #if defined(__GNUC__) && !defined(__INTEL_COMPILER) # define AO_compiler_barrier() __asm__ __volatile__("" : : : "memory") #elif defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \ || defined(__WATCOMC__) # if defined(_AMD64_) || defined(_M_X64) || _MSC_VER >= 1400 # if defined(_WIN32_WCE) /* # include */ # elif defined(_MSC_VER) # include # endif # pragma intrinsic(_ReadWriteBarrier) # define AO_compiler_barrier() _ReadWriteBarrier() /* We assume this does not generate a fence instruction. */ /* The documentation is a bit unclear. */ # else # define AO_compiler_barrier() __asm { } /* The preceding implementation may be preferable here too. */ /* But the documentation warns about VC++ 2003 and earlier. */ # endif #elif defined(__INTEL_COMPILER) # define AO_compiler_barrier() __memory_barrier() /* Too strong? IA64-only? */ #elif defined(_HPUX_SOURCE) # if defined(__ia64) # include # define AO_compiler_barrier() _Asm_sched_fence() # else /* FIXME - We dont know how to do this. This is a guess. */ /* And probably a bad one. */ static volatile int AO_barrier_dummy; # define AO_compiler_barrier() AO_barrier_dummy = AO_barrier_dummy # endif #else /* We conjecture that the following usually gives us the right */ /* semantics or an error. */ # define AO_compiler_barrier() asm("") #endif #if defined(AO_USE_PTHREAD_DEFS) # include "atomic_ops/sysdeps/generic_pthread.h" #endif /* AO_USE_PTHREAD_DEFS */ #if (defined(__CC_ARM) || defined(__ARMCC__)) && !defined(__GNUC__) \ && !defined(AO_USE_PTHREAD_DEFS) # include "atomic_ops/sysdeps/armcc/arm_v6.h" # define AO_GENERALIZE_TWICE #endif #if defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) \ && !defined(__INTEL_COMPILER) # if defined(__i386__) /* We don't define AO_USE_SYNC_CAS_BUILTIN for x86 here because */ /* it might require specifying additional options (like -march) */ /* or additional link libraries (if -march is not specified). */ # include "atomic_ops/sysdeps/gcc/x86.h" # endif /* __i386__ */ # if defined(__x86_64__) # if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) /* It is safe to use __sync CAS built-in on this architecture. */ # define AO_USE_SYNC_CAS_BUILTIN # endif # include "atomic_ops/sysdeps/gcc/x86_64.h" # endif /* __x86_64__ */ # if defined(__ia64__) # include "atomic_ops/sysdeps/gcc/ia64.h" # define AO_GENERALIZE_TWICE # endif /* __ia64__ */ # if defined(__hppa__) # include "atomic_ops/sysdeps/gcc/hppa.h" # define AO_CAN_EMUL_CAS # endif /* __hppa__ */ # if defined(__alpha__) # include "atomic_ops/sysdeps/gcc/alpha.h" # define AO_GENERALIZE_TWICE # endif /* __alpha__ */ # if defined(__s390__) # include "atomic_ops/sysdeps/gcc/s390.h" # endif /* __s390__ */ # if defined(__sparc__) # include "atomic_ops/sysdeps/gcc/sparc.h" # define AO_CAN_EMUL_CAS # endif /* __sparc__ */ # if defined(__m68k__) # include "atomic_ops/sysdeps/gcc/m68k.h" # endif /* __m68k__ */ # if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ || defined(__powerpc64__) || defined(__ppc64__) # include "atomic_ops/sysdeps/gcc/powerpc.h" # endif /* __powerpc__ */ # if defined(__arm__) && !defined(AO_USE_PTHREAD_DEFS) # include "atomic_ops/sysdeps/gcc/arm.h" # define AO_CAN_EMUL_CAS # endif /* __arm__ */ # if defined(__cris__) || defined(CRIS) # include "atomic_ops/sysdeps/gcc/cris.h" # define AO_GENERALIZE_TWICE # endif # if defined(__mips__) # include "atomic_ops/sysdeps/gcc/mips.h" # endif /* __mips__ */ # if defined(__sh__) || defined(SH4) # include "atomic_ops/sysdeps/gcc/sh.h" # define AO_CAN_EMUL_CAS # endif /* __sh__ */ # if defined(__avr32__) # include "atomic_ops/sysdeps/gcc/avr32.h" # endif #endif /* __GNUC__ && !AO_USE_PTHREAD_DEFS */ #if (defined(__IBMC__) || defined(__IBMCPP__)) && !defined(__GNUC__) \ && !defined(AO_USE_PTHREAD_DEFS) # if defined(__powerpc__) || defined(__powerpc) || defined(__ppc__) \ || defined(__PPC__) || defined(_M_PPC) || defined(_ARCH_PPC) \ || defined(_ARCH_PWR) # include "atomic_ops/sysdeps/ibmc/powerpc.h" # define AO_GENERALIZE_TWICE # endif #endif #if defined(__INTEL_COMPILER) && !defined(AO_USE_PTHREAD_DEFS) # if defined(__ia64__) # include "atomic_ops/sysdeps/icc/ia64.h" # define AO_GENERALIZE_TWICE # endif # if defined(__GNUC__) /* Intel Compiler in GCC compatible mode */ # if defined(__i386__) # include "atomic_ops/sysdeps/gcc/x86.h" # endif /* __i386__ */ # if defined(__x86_64__) # if __INTEL_COMPILER > 1110 # define AO_USE_SYNC_CAS_BUILTIN # endif # include "atomic_ops/sysdeps/gcc/x86_64.h" # endif /* __x86_64__ */ # endif #endif #if defined(_HPUX_SOURCE) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) # if defined(__ia64) # include "atomic_ops/sysdeps/hpc/ia64.h" # define AO_GENERALIZE_TWICE # else # include "atomic_ops/sysdeps/hpc/hppa.h" # define AO_CAN_EMUL_CAS # endif #endif #if defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \ || (defined(__WATCOMC__) && defined(__NT__)) # if defined(_AMD64_) || defined(_M_X64) # include "atomic_ops/sysdeps/msftc/x86_64.h" # elif defined(_M_IX86) || defined(x86) # include "atomic_ops/sysdeps/msftc/x86.h" # elif defined(_M_ARM) || defined(ARM) || defined(_ARM_) # include "atomic_ops/sysdeps/msftc/arm.h" # define AO_GENERALIZE_TWICE # endif #endif #if defined(__sun) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) /* Note: use -DAO_USE_PTHREAD_DEFS if Sun CC does not handle inline asm. */ # if defined(__i386) # include "atomic_ops/sysdeps/sunc/x86.h" # endif /* __i386 */ # if defined(__x86_64) || defined(__amd64) # include "atomic_ops/sysdeps/sunc/x86_64.h" # endif /* __x86_64 */ #endif #if !defined(__GNUC__) && (defined(sparc) || defined(__sparc)) \ && !defined(AO_USE_PTHREAD_DEFS) # include "atomic_ops/sysdeps/sunc/sparc.h" # define AO_CAN_EMUL_CAS #endif #if defined(AO_REQUIRE_CAS) && !defined(AO_HAVE_compare_and_swap) \ && !defined(AO_HAVE_compare_and_swap_full) \ && !defined(AO_HAVE_compare_and_swap_acquire) # if defined(AO_CAN_EMUL_CAS) # include "atomic_ops/sysdeps/emul_cas.h" # else # error Cannot implement AO_compare_and_swap_full on this architecture. # endif #endif /* AO_REQUIRE_CAS && !AO_HAVE_compare_and_swap ... */ /* The most common way to clear a test-and-set location */ /* at the end of a critical section. */ #if AO_AO_TS_T && !defined(AO_CLEAR) # define AO_CLEAR(addr) AO_store_release((AO_TS_t *)(addr), AO_TS_CLEAR) #endif #if AO_CHAR_TS_T && !defined(AO_CLEAR) # define AO_CLEAR(addr) AO_char_store_release((AO_TS_t *)(addr), AO_TS_CLEAR) #endif /* The generalization section. */ #if !defined(AO_GENERALIZE_TWICE) && defined(AO_CAN_EMUL_CAS) \ && !defined(AO_HAVE_compare_and_swap_full) # define AO_GENERALIZE_TWICE #endif /* Theoretically we should repeatedly include atomic_ops_generalize.h. */ /* In fact, we observe that this converges after a small fixed number */ /* of iterations, usually one. */ #include "atomic_ops/generalize.h" #ifdef AO_GENERALIZE_TWICE # include "atomic_ops/generalize.h" #endif /* For compatibility with version 0.4 and earlier */ #define AO_TS_T AO_TS_t #define AO_T AO_t #define AO_TS_VAL AO_TS_VAL_t #endif /* ATOMIC_OPS_H */ ================================================ FILE: datastructures/trevor_brown_abtree/common/dcss/dcss_plus.h ================================================ /* * File: dcss_plus.h * Author: Maya Arbel-Raviv * * Created on May 1, 2017, 10:42 AM */ #ifndef DCSS_PLUS_H #define DCSS_PLUS_H #include #include #include #include "descriptors.h" #define dcssptagptr_t uintptr_t #define dcsspptr_t dcsspdesc_t * #define casword_t intptr_t #define DCSSP_STATE_UNDECIDED 0 #define DCSSP_STATE_SUCCEEDED 4 #define DCSSP_STATE_FAILED 8 #define DCSSP_LEFTSHIFT 1 #define DCSSP_IGNORED_RETVAL -1 #define DCSSP_SUCCESS 0 #define DCSSP_FAILED_ADDR1 1 #define DCSSP_FAILED_ADDR2 2 #define MAX_PAYLOAD_PTRS 6 struct dcsspresult_t { int status; casword_t failed_val; }; template class dcsspdesc_t { public: volatile mutables_t mutables; casword_t volatile * volatile addr1; casword_t volatile old1; casword_t volatile * volatile addr2; casword_t volatile old2; casword_t volatile new2; PAYLOAD_T volatile payload1[MAX_PAYLOAD_PTRS+1]; PAYLOAD_T volatile payload2[MAX_PAYLOAD_PTRS+1]; const static int size = sizeof(mutables)+sizeof(addr1)+sizeof(old1)+sizeof(addr2)+sizeof(old2)+sizeof(new2)+sizeof(PAYLOAD_T)*(MAX_PAYLOAD_PTRS+1)+sizeof(PAYLOAD_T)*(MAX_PAYLOAD_PTRS+1); char padding[PREFETCH_SIZE_BYTES+(((64<<10)-size%64)%64)]; // add padding to prevent false sharing } __attribute__ ((aligned(64))); template class dcsspProvider { /** * Data definitions */ private: // descriptor reduction algorithm #define DCSSP_MUTABLES_OFFSET_STATE 0 #define DCSSP_MUTABLES_MASK_STATE 0xf #define DCSSP_MUTABLES_NEW(mutables) \ ((((mutables)&MASK_SEQ)+(1< dcsspDescriptors[LAST_TID+1] __attribute__ ((aligned(64))); char __padding_desc3[PREFETCH_SIZE_BYTES]; public: #ifdef USE_DEBUGCOUNTERS debugCounter * dcsspHelpCounter; #endif const int NUM_PROCESSES; /** * Function declarations */ dcsspProvider(const int numProcesses); ~dcsspProvider(); void initThread(const int tid); void deinitThread(const int tid); void writePtr(casword_t volatile * addr, casword_t val); // use for addresses that might have been modified by DCSSP (ONLY GOOD FOR INITIALIZING, CANNOT DEAL WITH CONCURRENT DCSSP OPERATIONS.) void writeVal(casword_t volatile * addr, casword_t val); // use for addresses that might have been modified by DCSSP (ONLY GOOD FOR INITIALIZING, CANNOT DEAL WITH CONCURRENT DCSSP OPERATIONS.) casword_t readPtr(const int tid, casword_t volatile * addr); // use for addresses that might have been modified by DCSSP casword_t readVal(const int tid, casword_t volatile * addr); // use for addresses that might have been modified by DCSSP inline dcsspresult_t dcsspPtr(const int tid, casword_t * addr1, casword_t old1, casword_t * addr2, casword_t old2, casword_t new2, PAYLOAD_T * const payload1, PAYLOAD_T * const payload2); // use when addr2 is a pointer, or another type that does not use its least significant bit inline dcsspresult_t dcsspVal(const int tid, casword_t * addr1, casword_t old1, casword_t * addr2, casword_t old2, casword_t new2, PAYLOAD_T * const payload1, PAYLOAD_T * const payload2); // use when addr2 uses its least significant bit, but does not use its most significant but void discardPayloads(const int tid); void debugPrint(); tagptr_t getDescriptorTagptr(const int otherTid); dcsspptr_t getDescriptorPtr(tagptr_t tagptr); bool getDescriptorSnapshot(tagptr_t tagptr, dcsspptr_t const dest); void helpProcess(const int tid, const int otherTid); private: casword_t dcsspRead(const int tid, casword_t volatile * addr); inline dcsspresult_t dcsspHelp(const int tid, dcssptagptr_t tagptr, dcsspptr_t snapshot, bool helpingOther); void dcsspHelpOther(const int tid, dcssptagptr_t tagptr); }; #endif /* DCSS_PLUS_H */ ================================================ FILE: datastructures/trevor_brown_abtree/common/dcss/dcss_plus_impl.h ================================================ /* * File: dcss_plus_impl.h * Author: Maya Arbel-Raviv * * Created on May 1, 2017, 10:52 AM */ #ifndef DCSS_PLUS_IMPL_H #define DCSS_PLUS_IMPL_H #include "dcss_plus.h" #include #include #include using namespace std; #define BOOL_CAS __sync_bool_compare_and_swap #define VAL_CAS __sync_val_compare_and_swap #define DCSSP_TAGBIT 0x1 static bool isDcssp(casword_t val) { return (val & DCSSP_TAGBIT); } template dcsspresult_t dcsspProvider::dcsspHelp(const int tid, dcssptagptr_t tagptr, dcsspptr_t snapshot, bool helpingOther) { // figure out what the state should be casword_t state = DCSSP_STATE_FAILED; SOFTWARE_BARRIER; casword_t val1 = *(snapshot->addr1); SOFTWARE_BARRIER; //DELAY_UP_TO(1000); if (val1 == snapshot->old1) { // linearize here(?) state = DCSSP_STATE_SUCCEEDED; } // try to cas the state to the appropriate value dcsspptr_t ptr = TAGPTR_UNPACK_PTR(dcsspDescriptors,tagptr); casword_t retval; bool failedBit; MUTABLES_VAL_CAS_FIELD(failedBit, retval, ptr->mutables, snapshot->mutables, DCSSP_STATE_UNDECIDED, state, DCSSP_MUTABLES_MASK_STATE, DCSSP_MUTABLES_OFFSET_STATE); if (failedBit) return {DCSSP_IGNORED_RETVAL,0}; // failed to access the descriptor: we must be helping another process complete its operation, so we will NOT use this return value! // TODO: do we do the announcement here? what will be announced exactly? do we let the user provide a pointer/value to announce as an argument to dcssp? do we need to provide an operation to retrieve the current announcement for a given process? // finish the operation based on the descriptor's state if ((retval == DCSSP_STATE_UNDECIDED && state == DCSSP_STATE_SUCCEEDED) // if we changed the state to succeeded OR || retval == DCSSP_STATE_SUCCEEDED) { // if someone else changed the state to succeeded // if (state == DCSSP_STATE_FAILED) DELAY_UP_TO(1000); assert(helpingOther || ((snapshot->mutables & DCSSP_MUTABLES_MASK_STATE) >> DCSSP_MUTABLES_OFFSET_STATE) == DCSSP_STATE_SUCCEEDED); BOOL_CAS(snapshot->addr2, (casword_t) tagptr, snapshot->new2); return {DCSSP_SUCCESS,0}; } else { // either we or someone else changed the state to failed assert((retval == DCSSP_STATE_UNDECIDED && state == DCSSP_STATE_FAILED) || retval == DCSSP_STATE_FAILED); assert(helpingOther || ((snapshot->mutables & DCSSP_MUTABLES_MASK_STATE) >> DCSSP_MUTABLES_OFFSET_STATE) == DCSSP_STATE_FAILED); BOOL_CAS(snapshot->addr2, (casword_t) tagptr, snapshot->old2); // if (state == DCSSP_STATE_FAILED) DELAY_UP_TO(1000); return {DCSSP_FAILED_ADDR1,val1}; } } template void dcsspProvider::dcsspHelpOther(const int tid, dcssptagptr_t tagptr) { const int otherTid = TAGPTR_UNPACK_TID(tagptr); assert(otherTid >= 0 && otherTid < NUM_PROCESSES); dcsspdesc_t newSnapshot; const int sz = dcsspdesc_t::size; assert((((tagptr & MASK_SEQ) >> OFFSET_SEQ) & 1) == 1); if (DESC_SNAPSHOT(dcsspdesc_t, dcsspDescriptors, &newSnapshot, tagptr, sz)) { dcsspHelp(tid, tagptr, &newSnapshot, true); } else { //TRACE COUTATOMICTID("helpOther unable to get snapshot of "< inline tagptr_t dcsspProvider::getDescriptorTagptr(const int otherTid) { dcsspptr_t ptr = &dcsspDescriptors[otherTid]; tagptr_t tagptr = TAGPTR_NEW(otherTid, ptr->mutables, DCSSP_TAGBIT); if ((UNPACK_SEQ(tagptr) & 1) == 0) { // descriptor is being initialized! essentially, // we can think of there being NO ongoing operation, // so we can imagine we return NULL = no descriptor. return (tagptr_t) NULL; } return tagptr; } template inline dcsspptr_t dcsspProvider::getDescriptorPtr(tagptr_t tagptr) { return TAGPTR_UNPACK_PTR(dcsspDescriptors, tagptr); } template inline bool dcsspProvider::getDescriptorSnapshot(tagptr_t tagptr, dcsspptr_t const dest) { if (tagptr == (tagptr_t) NULL) return false; return DESC_SNAPSHOT(dcsspdesc_t, dcsspDescriptors, dest, tagptr, dcsspdesc_t::size); } template inline void dcsspProvider::helpProcess(const int tid, const int otherTid) { tagptr_t tagptr = getDescriptorTagptr(otherTid); if (tagptr != (tagptr_t) NULL) dcsspHelpOther(tid, tagptr); } template void dcsspProvider::discardPayloads(const int tid) { SOFTWARE_BARRIER; dcssptagptr_t tagptr = getDescriptorTagptr(tid); dcsspptr_t ptr = getDescriptorPtr(tagptr); ptr->payload1[0] = NULL; ptr->payload2[0] = NULL; SOFTWARE_BARRIER; } template dcsspresult_t dcsspProvider::dcsspVal(const int tid, casword_t * addr1, casword_t old1, casword_t * addr2, casword_t old2, casword_t new2, PAYLOAD_T * const payload1, PAYLOAD_T * const payload2) { return dcsspPtr(tid, addr1, old1, addr2, old2 << DCSSP_LEFTSHIFT , new2 << DCSSP_LEFTSHIFT, payload1, payload2); } template dcsspresult_t dcsspProvider::dcsspPtr(const int tid, casword_t * addr1, casword_t old1, casword_t * addr2, casword_t old2, casword_t new2, PAYLOAD_T * const payload1, PAYLOAD_T * const payload2) { // create dcssp descriptor dcsspptr_t ptr = DESC_NEW(dcsspDescriptors, DCSSP_MUTABLES_NEW, tid); assert((((dcsspDescriptors[tid].mutables & MASK_SEQ) >> OFFSET_SEQ) & 1) == 0); ptr->addr1 = addr1; ptr->old1 = old1; ptr->addr2 = addr2; ptr->old2 = old2; ptr->new2 = new2; // add payload1 and payload2 to the dcssp descriptor int i; for (i=0;payload1[i];++i) { ptr->payload1[i] = payload1[i]; assert(i < MAX_PAYLOAD_PTRS); } ptr->payload1[i] = NULL; for (i=0;payload2[i];++i) { ptr->payload2[i] = payload2[i]; assert(i < MAX_PAYLOAD_PTRS); } ptr->payload2[i] = NULL; DESC_INITIALIZED(dcsspDescriptors, tid); // create tagptr assert((((dcsspDescriptors[tid].mutables & MASK_SEQ) >> OFFSET_SEQ) & 1) == 1); tagptr_t tagptr = TAGPTR_NEW(tid, ptr->mutables, DCSSP_TAGBIT); // perform the dcssp operation described by our descriptor casword_t r; do { assert(!isDcssp(ptr->old2)); assert(isDcssp(tagptr)); r = VAL_CAS(ptr->addr2, ptr->old2, (casword_t) tagptr); if (isDcssp(r)) { #ifdef USE_DEBUGCOUNTERS this->dcsspHelpCounter->inc(tid); #endif dcsspHelpOther(tid, (dcssptagptr_t) r); } } while (isDcssp(r)); if (r == ptr->old2){ // DELAY_UP_TO(1000); return dcsspHelp(tid, tagptr, ptr, false); // finish our own operation } return {DCSSP_FAILED_ADDR2,r};//DCSSP_FAILED_ADDR2; } template casword_t dcsspProvider::dcsspRead(const int tid, casword_t volatile * addr) { casword_t r; while (1) { r = *addr; if (isDcssp(r)) { #ifdef USE_DEBUGCOUNTERS this->dcsspHelpCounter->inc(tid); #endif dcsspHelpOther(tid, (dcssptagptr_t) r); } else { return r; } } } template dcsspProvider::dcsspProvider(const int numProcesses) : NUM_PROCESSES(numProcesses) { #ifdef USE_DEBUGCOUNTERS dcsspHelpCounter = new debugCounter(NUM_PROCESSES); #endif DESC_INIT_ALL(dcsspDescriptors, DCSSP_MUTABLES_NEW, NUM_PROCESSES); for (int tid=0;tid dcsspProvider::~dcsspProvider() { #ifdef USE_DEBUGCOUNTERS delete dcsspHelpCounter; #endif } template casword_t dcsspProvider::readPtr(const int tid, casword_t volatile * addr) { casword_t r; r = dcsspRead(tid, addr); return r; } template casword_t dcsspProvider::readVal(const int tid, casword_t volatile * addr) { return ((casword_t) readPtr(tid, addr))>>DCSSP_LEFTSHIFT; } template void dcsspProvider::writePtr(casword_t volatile * addr, casword_t ptr) { //assert((*addr & DCSSP_TAGBIT) == 0); assert((ptr & DCSSP_TAGBIT) == 0); *addr = ptr; } template void dcsspProvider::writeVal(casword_t volatile * addr, casword_t val) { writePtr(addr, val< void dcsspProvider::initThread(const int tid) {} template void dcsspProvider::deinitThread(const int tid) {} template void dcsspProvider::debugPrint() { #ifdef USE_DEBUGCOUNTERS cout<<"dcssp helping : "<dcsspHelpCounter->getTotal()< #include #include #include "dcss_plus_impl.h" using namespace std; #define NUM_OPS 10000000 #define INCREMENT 1 #define FALSE_SHARING_ULL_FACTOR 24 #define FALSE_SHARING_PAD_BYTES 192 #define COUNTER(tid) (counters[(tid)*FALSE_SHARING_ULL_FACTOR]) int numProcesses = 0; volatile unsigned long long counters[MAX_TID_POW2*FALSE_SHARING_ULL_FACTOR]; volatile char padding[FALSE_SHARING_PAD_BYTES]; volatile unsigned long long faa; volatile bool start; volatile int running; // number of threads that are running dcsspProvider * prov; #ifndef KERNEL #define KERNEL test_kernel1 #endif #ifndef VALIDATE #define VALIDATE validate1 #endif //#define GET_FAA_FOR_TID(tid) ((faa >> ((tid)*(62/numProcesses))) & (numProcesses == 1 ? 0xffffffffffffffffULL : ((1ULL<<(62/numProcesses))-1))) void * test_kernel1(void * arg) { const int tid = *((int *) arg); //const unsigned long long numOps = min(1ULL<<20, 1ULL<<(62/numProcesses)-1); //const unsigned long long increment = 1ULL<<(tid*(62/numProcesses)); prov->initThread(tid); __sync_fetch_and_add(&running, 1); while (!start) { __sync_synchronize(); } //COUTATOMICTID("performing "<readVal(tid,(casword_t*)&(COUNTER(tid))); casword_t newval = (casword_t) oldval+1; if (DCSSP_SUCCESS == prov->dcsspVal(tid, (casword_t *) &faa, (casword_t) faa, (casword_t *) &COUNTER(tid), oldval, newval, deletedNodes)) { ++numSucc; __sync_fetch_and_add(&faa, INCREMENT); } #else ++numSucc; ++COUNTER(tid); __sync_fetch_and_add(&faa, INCREMENT); #endif } prov->deinitThread(tid); } bool validate1() { // compute checksum bool good = true; for (int i=0;ireadVal(i,(casword_t*)&(COUNTER(i))); if (c != NUM_OPS) { cout<<"ERROR: counters["<readVal(tid, (casword_t *) &faa); casword_t new2 = (casword_t) old2+1; if (DCSSP_SUCCESS == prov->dcsspVal(tid, (casword_t *) &COUNTER((tid+1)%numProcesses), old1, (casword_t *) &faa, old2, new2, deletedNodes)) { ++numSucc; ++COUNTER(tid); } } prov->deinitThread(tid); } bool validate2() { // compute checksum bool good = true; for (int i=0;i. * these three fields are defined by the TAGPTR_ macros below. */ #ifndef WIDTH1_SEQ #define WIDTH1_SEQ 48 #endif #define OFFSET1_SEQ 11 #define MASK1_SEQ ((uintptr_t)((1LL<>OFFSET1_SEQ) #define TAGPTR1_OFFSET_STALE 0 /* UNUSED */ #define TAGPTR1_OFFSET_TID 1 #define TAGPTR1_MASK_STALE 0x1 /* UNUSED */ #define TAGPTR1_MASK_TID (((1<>TAGPTR1_OFFSET_TID)) #define TAGPTR1_UNPACK_PTR(tagptr) (&DESC1_ARRAY[TAGPTR1_UNPACK_TID((tagptr))]) #define TAGPTR1_NEW(tid, mutables) ((tagptr_t) (((UNPACK1_SEQ(mutables))<>TAGPTR1_OFFSET_TID) #define TAGPTR1_STATIC_DESC(id) ((tagptr_t) TAGPTR1_NEW(LAST_TID1-1-id, 0)) #define TAGPTR1_DUMMY_DESC(id) ((tagptr_t) TAGPTR1_NEW(LAST_TID1, id<>(offset)) #define MUTABLES1_WRITE_FIELD(fldMutables, snapMutables, val, mask, offset) { \ mutables_t __v = (fldMutables); \ while (UNPACK1_SEQ(__v) == UNPACK1_SEQ((snapMutables)) \ && MUTABLES1_UNPACK_FIELD(__v, (mask), (offset)) != (val) \ && !__sync_bool_compare_and_swap(&(fldMutables), __v, \ (__v & ~(mask)) | ((val)<<(offset)))) { \ __v = (fldMutables); \ } \ } #define MUTABLES1_WRITE_BIT(fldMutables, snapMutables, mask) { \ mutables_t __v = (fldMutables); \ while (UNPACK1_SEQ(__v) == UNPACK1_SEQ((snapMutables)) \ && !(__v&(mask)) \ && !__sync_bool_compare_and_swap(&(fldMutables), __v, (__v|(mask)))) { \ __v = (fldMutables); \ } \ } // WARNING: uses a GCC extension "({ })". to get rid of this, use an inline function. #define DESC1_SNAPSHOT(descDest, tagptr, sz) ({ \ DESC1_T *__src = TAGPTR1_UNPACK_PTR((tagptr)); \ memcpy((descDest), __src, (sz)); \ SOFTWARE_BARRIER; /* prevent compiler from reordering read of __src->mutables before (at least the reading portion of) the memcpy */ \ (UNPACK1_SEQ(__src->c.mutables) == UNPACK1_SEQ((tagptr))); \ }) #define DESC1_READ_FIELD(successBit, fldMutables, tagptr, mask, offset) ({ \ mutables_t __mutables = (fldMutables); \ successBit = (UNPACK1_SEQ(__mutables) == UNPACK1_SEQ(tagptr)); \ MUTABLES1_UNPACK_FIELD(__mutables, (mask), (offset)); \ }) #define DESC1_NEW(tid) &DESC1_ARRAY[(tid)]; { /* note: only the process invoking this following macro can change the sequence# */ \ SOFTWARE_BARRIER; \ uintptr_t __v = DESC1_ARRAY[(tid)].c.mutables; \ /* while (!__sync_bool_compare_and_swap(&DESC1_ARRAY[(tid)].mutables, __v, MUTABLES1_NEW(__v))) { \ __v = DESC1_ARRAY[(tid)].mutables; \ } \ }*/ \ DESC1_ARRAY[(tid)].c.mutables = MUTABLES1_NEW(__v); \ /*__sync_synchronize();*/ \ SOFTWARE_BARRIER; \ } #define DESC1_INITIALIZED(tid) \ SOFTWARE_BARRIER; \ DESC1_ARRAY[(tid)].c.mutables += (1<. * these three fields are defined by the TAGPTR_ macros below. */ #ifndef WIDTH_SEQ #define WIDTH_SEQ 48 #endif #define OFFSET_SEQ 14 #define MASK_SEQ ((uintptr_t)((1LL<>OFFSET_SEQ) #define TAGPTR_OFFSET_USER 0 #define TAGPTR_OFFSET_TID 3 #define TAGPTR_MASK_USER ((1<>TAGPTR_OFFSET_TID)) #define TAGPTR_UNPACK_PTR(descArray, tagptr) (&(descArray)[TAGPTR_UNPACK_TID((tagptr))]) #define TAGPTR_NEW(tid, mutables, userBits) ((tagptr_t) (((UNPACK_SEQ(mutables))<>TAGPTR_OFFSET_TID) #define TAGPTR_STATIC_DESC(id) ((tagptr_t) TAGPTR_NEW(LAST_TID-1-id, 0)) #define TAGPTR_DUMMY_DESC(id) ((tagptr_t) TAGPTR_NEW(LAST_TID, id<>(offset)) // TODO: make more efficient version "MUTABLES_CAS_BIT" // TODO: change sequence # unpacking to masking for quick comparison // note: if there is only one subfield besides seq#, then the third if-block is redundant, and you should just return false if the cas fails, since the only way the cas fails and the field being cas'd contains still old is if the sequence number has changed. #define MUTABLES_BOOL_CAS_FIELD(successBit, fldMutables, snapMutables, oldval, val, mask, offset) { \ mutables_t __v = (fldMutables); \ while (1) { \ if (UNPACK_SEQ(__v) != UNPACK_SEQ((snapMutables))) { \ (successBit) = false; \ break; \ } \ if ((successBit) = __sync_bool_compare_and_swap(&(fldMutables), \ (__v & ~(mask)) | ((oldval)<<(offset)), \ (__v & ~(mask)) | ((val)<<(offset)))) { \ break; \ } \ __v = (fldMutables); \ if (MUTABLES_UNPACK_FIELD(__v, (mask), (offset)) != (oldval)) { \ (successBit) = false; \ break; \ } \ } \ } #define MUTABLES_VAL_CAS_FIELD(failedBit, retval, fldMutables, snapMutables, oldval, val, mask, offset) { \ mutables_t __v = (fldMutables); \ while (1) { \ if (UNPACK_SEQ(__v) != UNPACK_SEQ((snapMutables))) { \ (failedBit) = true; /* version number has changed, CAS cannot occur */ \ break; \ } \ mutables_t __oldval = (__v & ~(mask)) | ((oldval)<<(offset)); \ (retval) = __sync_val_compare_and_swap(&(fldMutables), \ __oldval, \ (__v & ~(mask)) | ((val)<<(offset))); \ if ((retval) == __oldval) { /* CAS SUCCESS */ \ (retval) = MUTABLES_UNPACK_FIELD((retval), (mask), (offset)); /* return contents of subfield */ \ (failedBit) = false; \ break; \ } else { /* CAS FAILURE: should we retry? */ \ __v = (retval); /* save the value that caused our CAS to fail, in case we need to retry */ \ (retval) = MUTABLES_UNPACK_FIELD((retval), (mask), (offset)); /* return contents of subfield */ \ if ((retval) != (oldval)) { /* check if we failed because the subfield's contents do not match oldval */ \ (failedBit) = false; \ break; \ } \ /* subfield's contents DO match oldval, so we need to try again */ \ } \ } \ } // TODO: change sequence # unpacking to masking for quick comparison // note: MUTABLES_FAA_FIELD would be very similar to MUTABLES_BOOL_CAS_FIELD; i think one would simply delete the last if block and change the new val from (val)<mutables before (at least the reading portion of) the memcpy */ \ (UNPACK_SEQ(__src->mutables) == UNPACK_SEQ((tagptr))); \ }) #define DESC_READ_FIELD(successBit, fldMutables, tagptr, mask, offset) ({ \ mutables_t __mutables = (fldMutables); \ successBit = (__mutables & MASK_SEQ) == ((tagptr) & MASK_SEQ); \ MUTABLES_UNPACK_FIELD(__mutables, (mask), (offset)); \ }) #define DESC_NEW(descArray, macro_mutablesNew, tid) &(descArray)[(tid)]; { /* note: only the process invoking this following macro can change the sequence# */ \ SOFTWARE_BARRIER; \ mutables_t __v = (descArray)[(tid)].mutables; \ (descArray)[(tid)].mutables = macro_mutablesNew(__v); \ SOFTWARE_BARRIER; \ /*__sync_synchronize();*/ \ } #define DESC_INITIALIZED(descArray, tid) \ SOFTWARE_BARRIER; \ (descArray)[(tid)].mutables += (1< #include #include #ifndef error #define error(s) { \ std::cout<<"ERROR: "< #include #include #include using namespace std; template class allocator_bump : public allocator_interface { private: const int cachelines; // # cachelines needed to store an object of type T // for bump allocation from a contiguous chunk of memory T ** mem; // mem[tid*PREFETCH_SIZE_WORDS] = pointer to current array to perform bump allocation from int * memBytes; // memBytes[tid*PREFETCH_SIZE_WORDS] = size of mem in bytes T ** current; // current[tid*PREFETCH_SIZE_WORDS] = pointer to current position in array mem vector ** toFree; // toFree[tid] = pointer to vector of bump allocation arrays to free when this allocator is destroyed T* bump_memory_next(const int tid) { T* result = current[tid*PREFETCH_SIZE_WORDS]; current[tid*PREFETCH_SIZE_WORDS] = (T*) (((char*) current[tid*PREFETCH_SIZE_WORDS]) + (cachelines*BYTES_IN_CACHE_LINE)); return result; } int bump_memory_bytes_remaining(const int tid) { return (((char*) mem[tid*PREFETCH_SIZE_WORDS])+memBytes[tid*PREFETCH_SIZE_WORDS]) - ((char*) current[tid*PREFETCH_SIZE_WORDS]); } bool bump_memory_full(const int tid) { return (((char*) current[tid*PREFETCH_SIZE_WORDS])+cachelines*BYTES_IN_CACHE_LINE > ((char*) mem[tid*PREFETCH_SIZE_WORDS])+memBytes[tid*PREFETCH_SIZE_WORDS]); } // call this when mem is null, or doesn't contain enough space to allocate an object void bump_memory_allocate(const int tid) { mem[tid*PREFETCH_SIZE_WORDS] = (T*) malloc(1<<24); memBytes[tid*PREFETCH_SIZE_WORDS] = 1<<24; current[tid*PREFETCH_SIZE_WORDS] = mem[tid*PREFETCH_SIZE_WORDS]; toFree[tid]->push_back(mem[tid*PREFETCH_SIZE_WORDS]); // remember we allocated this to free it later #ifdef HAS_FUNCTION_aligned_alloc #else // align on cacheline boundary int mod = (int) (((long) mem[tid*PREFETCH_SIZE_WORDS]) % BYTES_IN_CACHE_LINE); if (mod > 0) { // we are ignoring the first mod bytes of mem, because if we // use them, we will not be aligning objects to cache lines. current[tid*PREFETCH_SIZE_WORDS] = (T*) (((char*) mem[tid*PREFETCH_SIZE_WORDS]) + BYTES_IN_CACHE_LINE - mod); } else { current[tid*PREFETCH_SIZE_WORDS] = mem[tid*PREFETCH_SIZE_WORDS]; } #endif assert((((long) current[tid*PREFETCH_SIZE_WORDS]) % BYTES_IN_CACHE_LINE) == 0); } public: template struct rebind { typedef allocator_bump<_Tp1> other; }; // reserve space for ONE object of type T T* allocate(const int tid) { // bump-allocate from a contiguous chunk of memory if (!mem[tid*PREFETCH_SIZE_WORDS] || bump_memory_full(tid)) { bump_memory_allocate(tid); MEMORY_STATS { this->debug->addAllocated(tid, memBytes[tid*PREFETCH_SIZE_WORDS] / cachelines / BYTES_IN_CACHE_LINE); VERBOSE DEBUG2 { // if ((this->debug->getAllocated(tid) % 2000) == 0) { // this->debugInterfaces->reclaim->debugPrintStatus(tid); // debugPrintStatus(tid); COUTATOMICTID("allocated "<<(memBytes[tid*PREFETCH_SIZE_WORDS] / cachelines / BYTES_IN_CACHE_LINE)/*this->debug->getAllocated(tid)*/<<" records of size "<pool->debugPrintStatus(tid); // COUTATOMIC(endl); // } } } } return bump_memory_next(tid); } void static deallocate(const int tid, T * const p) { // no op for this allocator; memory is freed only by the destructor. // however, we have to call the destructor for the object manually... p->~T(); } void deallocateAndClear(const int tid, blockbag * const bag) { // the bag is cleared, which makes it seem like we're leaking memory, // but it will be freed in the destructor as we release the huge // slabs of memory. bag->clearWithoutFreeingElements(); } void debugPrintStatus(const int tid) {} void initThread(const int tid) {} allocator_bump(const int numProcesses, debugInfo * const _debug) : allocator_interface(numProcesses, _debug) , cachelines((sizeof(T)+(BYTES_IN_CACHE_LINE-1))/BYTES_IN_CACHE_LINE){ VERBOSE DEBUG COUTATOMIC("constructor allocator_bump"<*[numProcesses]; for (int tid=0;tid(); } } ~allocator_bump() { VERBOSE COUTATOMIC("destructor allocator_bump"<NUM_PROCESSES;++tid) { int n = toFree[tid]->size(); for (int i=0;i using namespace std; template class allocator_interface { public: debugInfo * const debug; const int NUM_PROCESSES; template struct rebind { typedef allocator_interface<_Tp1> other; }; // allocate space for one object of type T T* allocate(const int tid); void deallocate(const int tid, T * const p); void deallocateAndClear(const int tid, blockbag * const bag); void initThread(const int tid); void debugPrintStatus(const int tid); allocator_interface(const int numProcesses, debugInfo * const _debug) : debug(_debug) , NUM_PROCESSES(numProcesses){ VERBOSE DEBUG std::cout<<"constructor allocator_interface"< #include #include using namespace std; //__thread long long currentAllocatedBytes = 0; //__thread long long maxAllocatedBytes = 0; template class allocator_new : public allocator_interface { public: template struct rebind { typedef allocator_new<_Tp1> other; }; // reserve space for ONE object of type T T* allocate(const int tid) { // allocate a new object MEMORY_STATS { this->debug->addAllocated(tid, 1); VERBOSE { if ((this->debug->getAllocated(tid) % 2000) == 0) { debugPrintStatus(tid); } } // currentAllocatedBytes += sizeof(T); // if (currentAllocatedBytes > maxAllocatedBytes) { // maxAllocatedBytes = currentAllocatedBytes; // } } return new T; //(T*) malloc(sizeof(T)); } void deallocate(const int tid, T * const p) { // note: allocators perform the actual freeing/deleting, since // only they know how memory was allocated. // pools simply call deallocate() to request that it is freed. // allocators do not invoke pool functions. MEMORY_STATS { this->debug->addDeallocated(tid, 1); // currentAllocatedBytes -= sizeof(T); } #if !defined NO_FREE delete p; #endif } void deallocateAndClear(const int tid, blockbag * const bag) { #ifdef NO_FREE bag->clearWithoutFreeingElements(); #else while (!bag->isEmpty()) { T* ptr = bag->remove(); deallocate(tid, ptr); } #endif } void debugPrintStatus(const int tid) { // std::cout<debug->getAllocated(tid)<<" objects of size "<<(sizeof(T)); // std::cout<<" "; //// this->pool->debugPrintStatus(tid); // std::cout<(numProcesses, _debug) { VERBOSE DEBUG std::cout<<"constructor allocator_new"< #include #include #include #include using namespace std; //__thread long long currentAllocatedBytes = 0; //__thread long long maxAllocatedBytes = 0; template class allocator_new_segregated : public allocator_interface { private: void* (*allocfn)(size_t size); void (*freefn)(void *ptr); public: template struct rebind { typedef allocator_new_segregated<_Tp1> other; }; // reserve space for ONE object of type T T* allocate(const int tid) { // allocate a new object MEMORY_STATS { this->debug->addAllocated(tid, 1); VERBOSE { if ((this->debug->getAllocated(tid) % 2000) == 0) { debugPrintStatus(tid); } } // currentAllocatedBytes += sizeof(T); // if (currentAllocatedBytes > maxAllocatedBytes) { // maxAllocatedBytes = currentAllocatedBytes; // } } return (T*) allocfn(sizeof(T)); } void deallocate(const int tid, T * const p) { // note: allocators perform the actual freeing/deleting, since // only they know how memory was allocated. // pools simply call deallocate() to request that it is freed. // allocators do not invoke pool functions. MEMORY_STATS { this->debug->addDeallocated(tid, 1); // currentAllocatedBytes -= sizeof(T); } #if !defined NO_FREE p->~T(); // explicitly call destructor, since we lose automatic destructor calls when we bypass new/delete([]) freefn(p); #endif } void deallocateAndClear(const int tid, blockbag * const bag) { #if defined NO_FREE bag->clearWithoutFreeingElements(); #else while (!bag->isEmpty()) { T* ptr = bag->remove(); deallocate(tid, ptr); } #endif } void debugPrintStatus(const int tid) {} void initThread(const int tid) {} static void* dummy_thr(void *p) { return 0; } allocator_new_segregated(const int numProcesses, debugInfo * const _debug) : allocator_interface(numProcesses, _debug) { VERBOSE DEBUG std::cout<<"constructor allocator_new_segregated"< #include #include #include using namespace std; // this allocator only performs allocation once, at the beginning of the program. // define the following to specify how much memory should be allocated. #ifndef ALLOC_ONCE_MEMORY #define ALLOC_ONCE_MEMORY (1ULL<<32) /* default: 4 GB */ #endif #define MIN(a, b) ((a) < (b) ? (a) : (b)) template class allocator_once : public allocator_interface { private: const int cachelines; // # cachelines needed to store an object of type T // for bump allocation from a contiguous chunk of memory T ** mem; // mem[tid] = pointer to current array to perform bump allocation from size_t * memBytes; // memBytes[tid*PREFETCH_SIZE_WORDS] = size of mem in bytes T ** current; // current[tid*PREFETCH_SIZE_WORDS] = pointer to current position in array mem T* bump_memory_next(const int tid) { T* result = current[tid*PREFETCH_SIZE_WORDS]; current[tid*PREFETCH_SIZE_WORDS] = (T*) (((char*) current[tid*PREFETCH_SIZE_WORDS]) + (cachelines*BYTES_IN_CACHE_LINE)); return result; } int bump_memory_bytes_remaining(const int tid) { return (((char*) mem[tid])+memBytes[tid*PREFETCH_SIZE_WORDS]) - ((char*) current[tid*PREFETCH_SIZE_WORDS]); } bool bump_memory_full(const int tid) { return (((char*) current[tid*PREFETCH_SIZE_WORDS])+cachelines*BYTES_IN_CACHE_LINE > ((char*) mem[tid])+memBytes[tid*PREFETCH_SIZE_WORDS]); } public: template struct rebind { typedef allocator_once<_Tp1> other; }; // reserve space for ONE object of type T T* allocate(const int tid) { if (bump_memory_full(tid)) return NULL; return bump_memory_next(tid); } void static deallocate(const int tid, T * const p) { // no op for this allocator; memory is freed only by the destructor. // however, we have to call the destructor for the object manually... p->~T(); } void deallocateAndClear(const int tid, blockbag * const bag) { // the bag is cleared, which makes it seem like we're leaking memory, // but it will be freed in the destructor as we release the huge // slabs of memory. bag->clearWithoutFreeingElements(); } void debugPrintStatus(const int tid) {} void initThread(const int tid) { // // touch each page of memory before our trial starts // long pagesize = sysconf(_SC_PAGE_SIZE); // int last = (int) (memBytes[tid*PREFETCH_SIZE_WORDS]/pagesize); // VERBOSE COUTATOMICTID("touching each page... memBytes="<(numProcesses, _debug) , cachelines((sizeof(T)+(BYTES_IN_CACHE_LINE-1))/BYTES_IN_CACHE_LINE) { VERBOSE DEBUG COUTATOMIC("constructor allocator_once"< 0) { // we are ignoring the first mod bytes of mem, because if we // use them, we will not be aligning objects to cache lines. current[tid*PREFETCH_SIZE_WORDS] = (T*) (((char*) mem[tid]) + BYTES_IN_CACHE_LINE - mod); } else { current[tid*PREFETCH_SIZE_WORDS] = mem[tid]; } assert((((long) current[tid*PREFETCH_SIZE_WORDS]) % BYTES_IN_CACHE_LINE) == 0); } } ~allocator_once() { long allocated = 0; for (int tid=0;tidNUM_PROCESSES;++tid) { allocated += (((char*) current[tid*PREFETCH_SIZE_WORDS]) - ((char*) mem[tid])); } VERBOSE COUTATOMIC("destructor allocator_once allocated="<NUM_PROCESSES;++tid) { delete mem[tid]; } delete[] mem; delete[] memBytes; delete[] current; } }; #endif /* ALLOC_ONCE_H */ ================================================ FILE: datastructures/trevor_brown_abtree/common/recordmgr/arraylist.h ================================================ /** * C++ record manager implementation (PODC 2015) by Trevor Brown. * * Copyright (C) 2015 Trevor Brown * */ #ifndef ARRAYLIST_H #define ARRAYLIST_H #include #include #include #include "plaf.h" #include "globals.h" using namespace std; // this list allows multiple readers, but only ONE writer. // i don't know if it is linearizable; maybe linearize at __size.load()/store() template class AtomicArrayList { private: atomic_int __size; atomic_uintptr_t *data; public: const int capacity; AtomicArrayList(const int _capacity) : capacity(_capacity) { VERBOSE DEBUG COUTATOMIC("constructor AtomicArrayList capacity="<= 0 && ix < sz); if (ix != sz-1) data[ix].store(data[sz-1].load(memory_order_relaxed), memory_order_relaxed); __size.store(sz-1, memory_order_relaxed); // note: this must be seq_cst if membars are not manually added } inline void erase(T * const obj) { int ix = getIndex(obj); if (ix != -1) erase(ix); } inline int getIndex(T * const obj) { int sz = __size.load(memory_order_relaxed); // note: this must be seq_cst if membars are not manually added for (int i=0;i class ArrayList { private: int __size; T **data; public: const int capacity; ArrayList(const int _capacity) : capacity(_capacity) { __size = 0; data = new T*[capacity]; } ~ArrayList() { delete[] data; } inline T* get(const int ix) { return data[ix]; } inline int size() { return __size; } inline void add(T * const obj) { assert(__size < capacity); data[__size++] = obj; } inline void erase(const int ix) { assert(ix >= 0 && ix < __size); data[ix] = data[--__size]; } inline void erase(T * const obj) { int ix = getIndex(obj); if (ix != -1) erase(ix); } inline int getIndex(T * const obj) { for (int i=0;i<__size;++i) { if (data[i] == obj) return i; } return -1; } inline bool contains(T * const obj) { return (getIndex(obj) != -1); } inline void clear() { __size = 0; } inline bool isFull() { return __size == capacity; } inline bool isEmpty() { return __size == 0; } }; #endif /* ARRAYLIST_H */ ================================================ FILE: datastructures/trevor_brown_abtree/common/recordmgr/blockbag.h ================================================ /** * C++ record manager implementation (PODC 2015) by Trevor Brown. * * Copyright (C) 2015 Trevor Brown * */ #ifndef BLOCKLIST_H #define BLOCKLIST_H #include #include #include "blockpool.h" #include "plaf.h" using namespace std; template class blockpool; template class blockbag; template class block; #include "lockfreeblockbag.h" // BLOCK_SIZE must be a power of two, or else the bitwise math is invalid. #define BLOCK_SIZE (1<<8) template class block { // stack implemented as an array private: T * data[BLOCK_SIZE]; int size; public: block *next; block(block * const _next) : next(_next) { size = 0; } ~block() { assert(size == 0); } bool isFull() { return size == BLOCK_SIZE; } bool isEmpty() { return size == 0; } // precondition: !isFull() void push(T * const obj) { assert(size < BLOCK_SIZE); const int sz = size; //assert(interruptible[((long) ((int *) pthread_getspecific(pthreadkey)))*PREFETCH_SIZE_WORDS] == false); data[size] = obj; SOFTWARE_BARRIER; size = sz+1; } // precondition: !isEmpty() T* pop() { assert(size > 0); const int sz = size-1; size = sz; return data[sz]; } T* peek(const int ix) { assert(ix >= 0); //assert(ix < size); return data[ix]; } // warning: linear time bool contains(T* const obj) { for (int i=0;i 0); if (data[size-1] == obj) { --size; // erase last pushed item return; } // the things we want to remove are probably the oldest, // so we iterate forward (head of stack = data[size-1]) for (int i=0;i 0); if (ix != size-1) { data[ix] = data[size-1]; } SOFTWARE_BARRIER; --size; // erase last item } } void replace(const int ix, T* const obj) { assert(ix >= 0); assert(ix < size); assert(obj); data[ix] = obj; } int computeSize() { return size; } // this function is occasionally useful if, for instance, // you use a bump allocator, which hands out objects from // a huge slab of memory. // then, in the destructor for a data structure, we can clear // a block without worrying about leaking memory, // since we will just free the whole slab at once. void clearWithoutFreeingElements() { SOFTWARE_BARRIER; size = 0; SOFTWARE_BARRIER; } }; template class blockbag_iterator { private: blockbag * const bag; block * const head; block * curr; int ix; // long long reclaimCountStart; #ifdef BLOCKBAG_ITERATOR_COUNT_BLOCKS_TRAVERSED int sizeInBlocks; int blocksTraversed; #endif #ifdef BLOCKBAG_ITERATOR_COUNT_STEPS int steps; #endif public: block *getCurr() const { return curr; } int getIndex() const { return ix; } blockbag_iterator(block * const _head, blockbag * const _bag) : bag(_bag), head(_head) { #ifdef BLOCKBAG_ITERATOR_COUNT_STEPS steps = 0; #endif // reclaimCountStart = bag->getReclaimCount(); // assert((reclaimCountStart % 1) == 0); // if (reclaimCountStart % 1) { // // bag is currently being reclaimed. we cannot traverse it. // curr = NULL; // ix = -1; // } else { curr = head; ix = -1; if (curr) { ix = curr->computeSize(); // "linearize" here (*this)++; } // } #ifdef BLOCKBAG_ITERATOR_COUNT_BLOCKS_TRAVERSED sizeInBlocks = bag->sizeInBlocks; blocksTraversed = 0; #endif } inline T* operator*() const { #ifdef BLOCKBAG_ITERATOR_COUNT_STEPS if (ix < 0) std::cout<<"bag="<next; // race condition: if reclamation happens AND curr is freed along with too many other blocks to fit in the blockpool, then this access might fault // /****** start consistency check for concurrent iteration ******/ // assert(reclaimCountStart == bag->getReclaimCount()); // if (reclaimCountStart != bag->getReclaimCount()) { // // bag is being/has been reclaimed, so we cannot iterate // curr = NULL; // ix = -1; // return *this; // } // /******* end consistency check for concurrent iteration *******/ ix = (curr ? curr->computeSize()-1 : -1); // race condition: if reclamation happens AND curr is freed along with too many other blocks to fit in the blockpool, then this access might fault // /****** start consistency check for concurrent iteration ******/ // assert(reclaimCountStart == bag->getReclaimCount()); // if (reclaimCountStart != bag->getReclaimCount()) { // // bag is being/has been reclaimed, so we cannot iterate // curr = NULL; // ix = -1; // return *this; // } // /******* end consistency check for concurrent iteration *******/ } return *this; } void swap(block * const otherCurr, const int otherIx) { T * const temp = otherCurr->peek(otherIx); otherCurr->replace(otherIx, curr->peek(ix)); curr->replace(ix, temp); } // erases the current item void erase() { assert(curr); assert(!curr->isEmpty()); bool result = bag->erase(curr, ix); if (ix >= curr->computeSize()) { (*this)++; } if (result) { (*this)++; } } }; template inline bool operator==(const blockbag_iterator& a, const blockbag_iterator& b) { if (a.getCurr() != b.getCurr()) return false; if (a.getIndex() != b.getIndex()) return false; return true; } template inline bool operator!=(const blockbag_iterator& a, const blockbag_iterator& b) { return !(a == b); } // bag implemented with linked list whose nodes are blocks. // invariant: head and tail are never NULL // invariant: head is not full (computeSize() < BLOCK_SIZE) // invariant: all blocks except for the head are full // invariant: the bag is empty iff head is empty and head->next is null template class blockbag { private: int owner; volatile long long reclaimCount; // number of times this bag has been the oldest epoch bag and had its nodes reclaimed long debugFreed; public: int sizeInBlocks; private: block *head; block *tail; void validate() { // invariant: head and tail are never NULL assert(head); // invariant: head and tail are never NULL assert(tail); // invariant: head is not full (computeSize() < BLOCK_SIZE) assert(!head->isFull()); // invariant: all blocks except for the head are full block *curr = head->next; while (curr) { assert(curr->isFull()); curr = curr->next; } // invariant: sizeInBlocks is correct assert(sizeInBlocks == computeSizeInBlocks()); } blockpool * const pool; void debugPrintBag() { std::cout<<"("< * curr = head; while (curr) { std::cout<<" "<computeSize()<<"["<<((long)curr)<<"]"; curr = curr->next; } } int computeSizeInBlocks() { int result = 0; block *curr = head; while (curr) { ++result; curr = curr->next; } return result; } public: blockbag(const int tid, blockpool * const _pool) : pool(_pool) { // VERBOSE DEBUG std::cout<<"constructor blockbag"< * const temp = head; head = head->next; //DEBUG ++debugFreed; pool->deallocateBlock(temp); } // VERBOSE DEBUG std::cout<<" freed "< begin() { return blockbag_iterator(head, this); } blockbag_iterator end() { return blockbag_iterator(NULL, this); } void add(T * const obj) { DEBUG2 validate(); int oldsize; DEBUG2 oldsize = computeSize(); head->push(obj); if (head->isFull()) { int oldNumBlocks; DEBUG2 oldNumBlocks = computeSizeInBlocks(); block *newblock = pool->allocateBlock(head); ++sizeInBlocks; //DEBUG2 std::cout<<"((("<<((long)head)<<" full. prepending "<<((long)newblock)<<")))"; SOFTWARE_BARRIER; head = newblock; DEBUG2 assert(oldNumBlocks + 1 == computeSizeInBlocks()); DEBUG2 assert(sizeInBlocks == computeSizeInBlocks()); } DEBUG2 assert(oldsize + 1 == computeSize()); DEBUG2 validate(); } template void add(const int tid, T * const obj, lockfreeblockbag * const sharedBag, const int thresh, Alloc * const alloc) { DEBUG2 validate(); int oldsize; DEBUG2 oldsize = computeSize(); head->push(obj); if (head->isFull()) { int oldNumBlocks; DEBUG2 oldNumBlocks = computeSizeInBlocks(); block *newblock = pool->allocateBlock(head); ++sizeInBlocks; //DEBUG2 std::cout<<"((("<<((long)head)<<" full. prepending "<<((long)newblock)<<")))"; head = newblock; DEBUG2 assert(oldNumBlocks + 1 == computeSizeInBlocks()); DEBUG2 assert(sizeInBlocks == computeSizeInBlocks()); DEBUG2 assert(oldsize + 1 == computeSize()); if (sizeInBlocks > thresh) { block *b = removeFullBlock(); // returns NULL if freeBag has < 2 full blocks assert(b); sharedBag->addBlock(b); MEMORY_STATS alloc->debug->addGiven(tid, 1); //DEBUG2 COUTATOMIC(" thread "<tid<<" sharedBag("<<(sizeof(T)==sizeof(Node)?"Node":"SCXRecord")<<") now contains "<size()<<" blocks"<next == NULL && head->isEmpty(); } // precondition: !isEmpty, !curr->isEmpty() // returns true if a subsequent invocation of curr->peek(ix) will return // an item that was previously EARLIER in iterator order, and false otherwise. bool erase(block * const curr, const int ix) { assert(!isEmpty()); assert(!curr->isEmpty()); DEBUG2 validate(); if (head->isEmpty()) { // current block cannot be head, since head is empty assert(curr != head); // eliminate empty head block, since next block will now be non-full block * const temp = head; head = head->next; pool->deallocateBlock(temp); --sizeInBlocks; } assert(!head->isEmpty()); // case 1: curr is the new head if (curr == head) { // erase from head block head->erase(ix); DEBUG2 validate(); return false; // case 2: curr is not the head } else { assert(!head->isEmpty()); // we use head->pop() to retrieve // some object from the head block. // then, we replace the object to be erased // with the object taken from the head block. T* obj = head->pop(); curr->replace(ix, obj); DEBUG2 validate(); return true; } } // precondition: !isEmpty() T* remove() { assert(!isEmpty()); DEBUG2 validate(); int oldsize; DEBUG2 oldsize = computeSize(); T *result; if (head->isEmpty()) { result = head->next->pop(); int oldNumBlocks; DEBUG2 oldNumBlocks = computeSizeInBlocks(); block * const temp = head; head = head->next; pool->deallocateBlock(temp); --sizeInBlocks; DEBUG2 assert(oldNumBlocks - 1 == computeSizeInBlocks()); DEBUG2 assert(sizeInBlocks == computeSizeInBlocks()); DEBUG2 assert(oldsize - 1 == computeSize()); DEBUG2 validate(); return result; } else { result = head->pop(); DEBUG2 validate(); return result; } } ////////// not anymore // precondition: !isEmpty() template T* remove(const int tid, lockfreeblockbag * const sharedBag, Alloc * const alloc) { //assert(!isEmpty()); DEBUG2 validate(); int oldsize; DEBUG2 oldsize = computeSize(); T *result; if (head->isEmpty()) { if (head->next) { result = head->next->pop(); int oldNumBlocks; DEBUG2 oldNumBlocks = computeSizeInBlocks(); block * const temp = head; head = head->next; pool->deallocateBlock(temp); --sizeInBlocks; DEBUG2 assert(oldNumBlocks - 1 == computeSizeInBlocks()); DEBUG2 assert(sizeInBlocks == computeSizeInBlocks()); DEBUG2 assert(oldsize - 1 == computeSize()); // if (sizeInBlocks == 1) { // block *b = sharedBag->getBlock(); // if (b) { // addFullBlock(b); // //DEBUG this->debug->addTaken(tid, 1); // //DEBUG2 COUTATOMIC(" thread "<tid<<" took "<computeSize()<<" objects from sharedBag"<allocate(tid)); // } // /** end debug **/ // } // } // assert(sizeInBlocks > 1); DEBUG2 validate(); // MEMORY_STATS2 alloc->debug->addFromPool(tid, 1); return result; } else { block *b = sharedBag->getBlock(); if (b) { addFullBlock(b); MEMORY_STATS alloc->debug->addTaken(tid, 1); //DEBUG2 COUTATOMIC(" thread "<tid<<" took "<computeSize()<<" objects from sharedBag"<allocate(tid); /** begin debug **/ // allocate entire block worth of objects for (int i=0;iallocate(tid)); } /** end debug **/ assert(sizeInBlocks > 1); DEBUG2 validate(); return remove(/*tid, sharedBag, alloc*/); } } } else { // MEMORY_STATS2 alloc->debug->addFromPool(tid, 1); result = head->pop(); DEBUG2 validate(); return result; } } // removes and returns a full block if the list contains // at least two full blocks. otherwise, this returns NULL; block* removeFullBlock() { DEBUG2 validate(); int oldsize; DEBUG2 oldsize = computeSize(); int oldNumBlocks; DEBUG2 oldNumBlocks = computeSizeInBlocks(); block *second = head->next; if (second != NULL) { if (second->next != NULL) { assert(second->computeSize() == BLOCK_SIZE); head->next = second->next; second->next = NULL; // not technically necessary, but safer --sizeInBlocks; DEBUG2 assert(oldNumBlocks - 1 == computeSizeInBlocks()); DEBUG2 assert(oldsize - BLOCK_SIZE == computeSize()); DEBUG2 assert(sizeInBlocks == computeSizeInBlocks()); DEBUG2 validate(); return second; } } DEBUG2 assert(oldsize == computeSize()); DEBUG2 if (sizeInBlocks != computeSizeInBlocks()) { std::cout<<"sizeInBlocks="< using namespace std; #define MAX_BLOCK_POOL_SIZE 32 #ifndef VERBOSE #define VERBOSE if(0) #endif template class block; template class blockpool { private: block *pool[MAX_BLOCK_POOL_SIZE]; int poolSize; long debugAllocated; long debugPoolDeallocated; long debugPoolAllocated; long debugFreed; public: blockpool() { poolSize = 0; debugAllocated = 0; debugPoolAllocated = 0; debugPoolDeallocated = 0; debugFreed = 0; } ~blockpool() { VERBOSE DEBUG std::cout<<"destructor blockpool;"; for (int i=0;iisEmpty()); delete pool[i]; // warning: uses locks (for some allocators) } VERBOSE DEBUG std::cout<<" blocks allocated "<* allocateBlock(block * const next) { if (poolSize) { //DEBUG ++debugPoolAllocated; block *result = pool[--poolSize]; // pop a block off the stack *result = block(next); assert(result->next == next); assert(result->computeSize() == 0); assert(result->isEmpty()); return result; } else { //DEBUG ++debugAllocated; return new block(next); // warning: uses locks (for some allocators) } } void deallocateBlock(block * const b) { assert(b->isEmpty()); if (poolSize == MAX_BLOCK_POOL_SIZE) { //DEBUG ++debugFreed; // assert(poolSize < MAX_BLOCK_POOL_SIZE); // for the RQ benchmarks, we want to assert that we never free a block #ifndef NO_FREE delete b; // warning: uses locks (for some allocators) #endif } else { //DEBUG ++debugPoolDeallocated; pool[poolSize++] = b; } } }; #endif /* BLOCKPOOL_H */ ================================================ FILE: datastructures/trevor_brown_abtree/common/recordmgr/debug_info.h ================================================ /** * C++ record manager implementation (PODC 2015) by Trevor Brown. * * Copyright (C) 2015 Trevor Brown * */ #ifndef DEBUG_INFO_H #define DEBUG_INFO_H #include "plaf.h" struct _memrecl_counters { volatile char padding1[PREFETCH_SIZE_BYTES]; long allocated; long deallocated; long fromPool; long toPool; // how many objects have been added to this pool long given; // how many blocks have been moved from this pool to a shared pool long taken; // how many blocks have been moved from a shared pool to this pool long retired; // how many objects have been retired volatile char padding2[PREFETCH_SIZE_BYTES]; }; class debugInfo { private: const int NUM_PROCESSES; _memrecl_counters c[MAX_TID_POW2]; public: void clear() { for (int tid=0;tid #include #include "plaf.h" using namespace std; class debugCounter { private: const int NUM_PROCESSES; volatile long long * data; // data[tid*PREFETCH_SIZE_WORDS] = count for thread tid (padded to avoid false sharing) public: void add(const int tid, const long long val) { data[tid*PREFETCH_SIZE_WORDS] += val; } void inc(const int tid) { add(tid, 1); } long long get(const int tid) { return data[tid*PREFETCH_SIZE_WORDS]; } long long getTotal() { long long result = 0; for (int tid=0;tid #include #define COUTATOMIC(coutstr) /*cout< #include #include #include "plaf.h" using namespace std; namespace hashset_namespace { // note: TABLE_SIZE must be a power of two for bitwise operations below to work #define TABLE_SIZE 32 #define FIRST_INDEX(key) (hash((key)) & (TABLE_SIZE-1)) #define NEXT_INDEX(ix) ((ix)+1 % TABLE_SIZE) #define EMPTY_CELL 0 template class hashset { private: bool cleared; K* keys[TABLE_SIZE]; inline int hash(K * const key) { // MurmurHash3's integer finalizer long long k = (long long) key; k ^= k >> 33; k *= 0xff51afd7ed558ccd; k ^= k >> 33; k *= 0xc4ceb9fe1a85ec53; k ^= k >> 33; return k; } int getIndex(K * const key) { int ix; for (ix=FIRST_INDEX(key) ; keys[ix] != EMPTY_CELL && keys[ix] != key ; ix=NEXT_INDEX(ix)) { assert(ix >= 0); assert(ix < TABLE_SIZE); } assert(ix >= 0); assert(ix < TABLE_SIZE); return ix; } public: hashset() { VERBOSE DEBUG std::cout<<"constructor hashset"< // class AtomicHashSet { // private: // int size; // NOT ATOMICALLY ACCESSIBLE BY OTHER THREADS THAN OWNER // bool cleared; // NOT ATOMICALLY ACCESSIBLE BY OTHER THREADS THAN OWNER // atomic_uintptr_t keys[TABLE_SIZE]; // inline int hash(K * const key) { // // MurmurHash3's integer finalizer // long long k = (long long) key; // k ^= k >> 33; // k *= 0xff51afd7ed558ccd; // k ^= k >> 33; // k *= 0xc4ceb9fe1a85ec53; // k ^= k >> 33; // return k; // } // int getIndex(K * const key) { // int ix; // for (ix=FIRST_INDEX(key) // ; keys[ix] != EMPTY_CELL && keys[ix] != key // ; ix=NEXT_INDEX(ix)) { // assert(ix >= 0); // assert(ix < TABLE_SIZE); // } // assert(ix >= 0); // assert(ix < TABLE_SIZE); // return ix; // } // public: // AtomicHashSet() { // VERBOSE DEBUG std::cout<<"constructor AtomicHashSet"< class hashset_new { private: int tableSize; K** keys; int __size; inline long hash(K * const key) { // MurmurHash3's integer finalizer long long k = (long long) key; k ^= k >> 33; k *= 0xff51afd7ed558ccd; k ^= k >> 33; k *= 0xc4ceb9fe1a85ec53; k ^= k >> 33; return k; } inline int getIndex(K * const key) { int ix = firstIndex(key); assert(ix >= 0); assert(ix < tableSize); while (true) { if (keys[ix] == EMPTY_CELL || keys[ix] == key) { return ix; } ix = nextIndex(ix); assert(ix >= 0); assert(ix < tableSize); } } inline int firstIndex(K * const key) { return (hash(key) & (tableSize-1)); } inline int nextIndex(const int ix) { return ((ix+1) & (tableSize-1)); } public: hashset_new(const int numberOfElements) { tableSize = 32; while (tableSize < numberOfElements*2) { tableSize *= 2; } VERBOSE DEBUG std::cout<<"constructor hashset_new capacity="< #include #include "blockbag.h" using namespace std; #ifndef VERBOSE #define VERBOSE if(0) #endif // lock free bag that operates on elements of the block type, // defined in blockbag.h. this class does NOT allocate or deallocate any memory. // instead, it simply chains blocks together using their next pointers. // the implementation is a stack, with push and pop at the head. // the aba problem is avoided using version numbers with a double-wide CAS. // any contention issues with using a simple stack and overhead issues with // double-wide CAS are unimportant, because operations on this bag only happen // once a process has filled up two blocks of objects and needs to hand one // off. thus, the number of operations on this class is several orders of // magnitude smaller than the number of operations on the binary search tree. template class lockfreeblockbag { private: struct tagged_ptr { block *ptr; long tag; }; std::atomic head; public: lockfreeblockbag() { VERBOSE DEBUG std::cout<<"constructor lockfreeblockbag lockfree="< *curr = head.load(memory_order_relaxed).ptr; int debugFreed = 0; while (curr) { block * const temp = curr; curr = curr->next; //DEBUG ++debugFreed; delete temp; } VERBOSE DEBUG std::cout<<"freed "<* getBlock() { while (true) { tagged_ptr expHead = head.load(memory_order_relaxed); if (expHead.ptr != NULL) { if (head.compare_exchange_weak( expHead, tagged_ptr({expHead.ptr->next, expHead.tag+1}))) { block *result = expHead.ptr; result->next = NULL; return result; } } else { return NULL; } } } void addBlock(block *b) { while (true) { tagged_ptr expHead = head.load(memory_order_relaxed); b->next = expHead.ptr; if (head.compare_exchange_weak( expHead, tagged_ptr({b, expHead.tag+1}))) { return; } } } // NOT thread safe int sizeInBlocks() { int result = 0; block *curr = head.load(memory_order_relaxed).ptr; while (curr) { ++result; curr = curr->next; } return result; } // thread safe, but concurrent operations are very likely to starve it long long size() { while (1) { long long result = 0; block *originalHead = head.load(memory_order_relaxed).ptr; block *curr = originalHead; while (curr) { result += curr->computeSize(); curr = curr->next; } if (head.load(memory_order_relaxed).ptr == originalHead) { return result; } } } }; #endif /* LOCKFREESTACK_H */ ================================================ FILE: datastructures/trevor_brown_abtree/common/recordmgr/pool_interface.h ================================================ /** * C++ record manager implementation (PODC 2015) by Trevor Brown. * * Copyright (C) 2015 Trevor Brown * */ #ifndef POOL_INTERFACE_H #define POOL_INTERFACE_H #include #include "allocator_interface.h" #include "debug_info.h" #include "blockpool.h" #include "blockbag.h" using namespace std; template > class pool_interface { public: debugInfo * const debug; const int NUM_PROCESSES; blockpool **blockpools; // allocated (or not) and freed by descendants Alloc *alloc; template struct rebind { typedef pool_interface<_Tp1, Alloc> other; }; template struct rebind2 { typedef pool_interface<_Tp1, _Tp2> other; }; string getSizeString() { return ""; } // long long getSizeInNodes() { return 0; } /** * if the pool contains any object, then remove one from the pool * and return a pointer to it. otherwise, return NULL. */ inline T* get(const int tid); inline void add(const int tid, T* ptr); inline void addMoveFullBlocks(const int tid, blockbag *bag); inline void addMoveAll(const int tid, blockbag *bag); inline int computeSize(const int tid); void debugPrintStatus(const int tid); pool_interface(const int numProcesses, Alloc * const _alloc, debugInfo * const _debug) : debug(_debug) , NUM_PROCESSES(numProcesses) , alloc(_alloc){ VERBOSE DEBUG std::cout<<"constructor pool_interface"<blockpools = new blockpool*[numProcesses]; for (int tid=0;tidblockpools[tid] = new blockpool(); } } ~pool_interface() { VERBOSE DEBUG std::cout<<"destructor pool_interface"<NUM_PROCESSES;++tid) { delete this->blockpools[tid]; } delete[] this->blockpools; } }; #endif ================================================ FILE: datastructures/trevor_brown_abtree/common/recordmgr/pool_none.h ================================================ /** * C++ record manager implementation (PODC 2015) by Trevor Brown. * * Copyright (C) 2015 Trevor Brown * */ #ifndef POOL_NOOP_H #define POOL_NOOP_H #include #include #include "blockbag.h" #include "blockpool.h" #include "pool_interface.h" #include "plaf.h" using namespace std; template > class pool_none : public pool_interface { public: template struct rebind { typedef pool_none<_Tp1, Alloc> other; }; template struct rebind2 { typedef pool_none<_Tp1, _Tp2> other; }; string getSizeString() { return "no pool"; } /** * if the freebag contains any object, then remove one from the freebag * and return a pointer to it. * if not, then retrieve a new object from Alloc */ inline T* get(const int tid) { MEMORY_STATS2 this->alloc->debug->addFromPool(tid, 1); return this->alloc->allocate(tid); } inline void add(const int tid, T* ptr) { this->alloc->deallocate(tid, ptr); } inline void addMoveFullBlocks(const int tid, blockbag *bag, block * const predecessor) { bag->clearWithoutFreeingElements(); // note: this will leak memory, but i believe it is only used by debraplus (which really should use a pool) } inline void addMoveFullBlocks(const int tid, blockbag *bag) { this->alloc->deallocateAndClear(tid, bag); // T* ptr; // while (ptr = bag->remove()) { // add(tid, ptr); // } } inline void addMoveAll(const int tid, blockbag *bag) { this->alloc->deallocateAndClear(tid, bag); // T* ptr; // while (ptr = bag->remove()) { // add(tid, ptr); // } } inline int computeSize(const int tid) { return 0; } void debugPrintStatus(const int tid) { } pool_none(const int numProcesses, Alloc * const _alloc, debugInfo * const _debug) : pool_interface(numProcesses, _alloc, _debug) { VERBOSE DEBUG std::cout<<"constructor pool_none"< #include #include #include "blockbag.h" #include "blockpool.h" #include "pool_interface.h" #include "plaf.h" #include "globals.h" using namespace std; #define POOL_THRESHOLD_IN_BLOCKS 10 template > class pool_perthread_and_shared : public pool_interface { private: lockfreeblockbag *sharedBag; // shared bag that we offload blocks on when we have too many in our freeBag blockbag **freeBag; // freeBag[tid] = bag of objects of type T that are ready to be reused by the thread with id tid // note: only does something if freeBag contains at least two full blocks inline bool tryGiveFreeObjects(const int tid) { if (freeBag[tid]->getSizeInBlocks() >= POOL_THRESHOLD_IN_BLOCKS) { block *b = freeBag[tid]->removeFullBlock(); // returns NULL if freeBag has < 2 full blocks assert(b); // if (b) { sharedBag->addBlock(b); MEMORY_STATS this->debug->addGiven(tid, 1); //DEBUG2 COUTATOMIC(" thread "<tid<<" sharedBag("<<(sizeof(T)==sizeof(Node)?"Node":"SCXRecord")<<") now contains "<size()<<" blocks"< *b = sharedBag->getBlock(); // if (b) { // freeBag[tid]->addFullBlock(b); // DEBUG this->debug->addTaken(tid, 1); // //DEBUG2 COUTATOMIC(" thread "<tid<<" took "<computeSize()<<" objects from sharedBag"< struct rebind { typedef pool_perthread_and_shared<_Tp1, Alloc> other; }; template struct rebind2 { typedef pool_perthread_and_shared<_Tp1, _Tp2> other; }; // long long getSizeInNodes() { // long long sum = 0; // for (int tid=0;tidNUM_PROCESSES;++tid) { // sum += freeBag[tid]->computeSize(); // } //// sum += sharedBag->sizeInBlocks() * BLOCK_SIZE; // return sum; // } string getSizeString() { stringstream ss; long long insharedbag = sharedBag->size(); long long infreebags = 0; for (int tid=0;tidNUM_PROCESSES;++tid) { infreebags += freeBag[tid]->computeSize(); } ss<alloc->debug->addFromPool(tid, 1); return freeBag[tid]->template remove(tid, sharedBag, this->alloc); } inline void add(const int tid, T* ptr) { MEMORY_STATS2 this->debug->addToPool(tid, 1); freeBag[tid]->add(tid, ptr, sharedBag, POOL_THRESHOLD_IN_BLOCKS, this->alloc); } inline void addMoveFullBlocks(const int tid, blockbag *bag, block * const predecessor) { // WARNING: THE FOLLOWING DEBUG COMPUTATION GETS THE WRONG NUMBER OF BLOCKS. MEMORY_STATS2 this->debug->addToPool(tid, (bag->getSizeInBlocks()-1)*BLOCK_SIZE); freeBag[tid]->appendMoveFullBlocks(bag, predecessor); while (tryGiveFreeObjects(tid)) {} } inline void addMoveFullBlocks(const int tid, blockbag *bag) { // WARNING: THE FOLLOWING DEBUG COMPUTATION GETS THE WRONG NUMBER OF BLOCKS. MEMORY_STATS2 this->debug->addToPool(tid, (bag->getSizeInBlocks()-1)*BLOCK_SIZE); freeBag[tid]->appendMoveFullBlocks(bag); while (tryGiveFreeObjects(tid)) {} } inline void addMoveAll(const int tid, blockbag *bag) { MEMORY_STATS2 this->debug->addToPool(tid, bag->computeSize()); freeBag[tid]->appendMoveAll(bag); while (tryGiveFreeObjects(tid)) {} } inline int computeSize(const int tid) { return freeBag[tid]->computeSize(); } void debugPrintStatus(const int tid) { // long free = computeSize(tid); // long share = sharedBag->sizeInBlocks(); // COUTATOMIC("free="<*[numProcesses]; for (int tid=0;tid(tid, this->blockpools[tid]); } sharedBag = new lockfreeblockbag(); } ~pool_perthread_and_shared() { VERBOSE DEBUG COUTATOMIC("destructor pool_perthread_and_shared"< *fullBlock; while ((fullBlock = sharedBag->getBlock()) != NULL) { while (!fullBlock->isEmpty()) { T * const ptr = fullBlock->pop(); this->alloc->deallocate(dummyTid, ptr); } this->blockpools[dummyTid]->deallocateBlock(fullBlock); } // clean up free bags for (int tid=0;tidNUM_PROCESSES;++tid) { this->alloc->deallocateAndClear(tid, freeBag[tid]); delete freeBag[tid]; } delete[] freeBag; delete sharedBag; } }; #endif ================================================ FILE: datastructures/trevor_brown_abtree/common/recordmgr/reclaimer_debra.h ================================================ /** * C++ record manager implementation (PODC 2015) by Trevor Brown. * * Copyright (C) 2015 Trevor Brown * */ #ifndef RECLAIM_EPOCH_H #define RECLAIM_EPOCH_H #include #include #include #include #include "blockbag.h" #include "plaf.h" #include "allocator_interface.h" #include "reclaimer_interface.h" using namespace std; template > class reclaimer_debra : public reclaimer_interface { protected: #define EPOCH_INCREMENT 2 #define BITS_EPOCH(ann) ((ann)&~(EPOCH_INCREMENT-1)) #define QUIESCENT(ann) ((ann)&1) #define GET_WITH_QUIESCENT(ann) ((ann)|1) #ifdef RAPID_RECLAMATION #define MIN_OPS_BEFORE_READ 1 //#define MIN_OPS_BEFORE_CAS_EPOCH 1 #else #define MIN_OPS_BEFORE_READ 20 //#define MIN_OPS_BEFORE_CAS_EPOCH 100 #endif #define NUMBER_OF_EPOCH_BAGS 9 #define NUMBER_OF_ALWAYS_EMPTY_EPOCH_BAGS 3 // for epoch based reclamation volatile long epoch; atomic_long *announcedEpoch; // announcedEpoch[tid*PREFETCH_SIZE_WORDS] // todo: figure out if volatile here would help processes notice changes more quickly. long *checked; // checked[tid*PREFETCH_SIZE_WORDS] = how far we've come in checking the announced epochs of other threads blockbag **epochbags; // epochbags[NUMBER_OF_EPOCH_BAGS*tid+0..NUMBER_OF_EPOCH_BAGS*tid+(NUMBER_OF_EPOCH_BAGS-1)] are epoch bags for thread tid. blockbag **currentBag; // pointer to current epoch bag for each process long *index; // index of currentBag in epochbags for each process // note: oldest bag is number (index+1)%NUMBER_OF_EPOCH_BAGS long *opsSinceRead; public: template struct rebind { typedef reclaimer_debra<_Tp1, Pool> other; }; template struct rebind2 { typedef reclaimer_debra<_Tp1, _Tp2> other; }; // inline int getOldestBlockbagIndexOffset(const int tid) { // long long min_val = LLONG_MAX; // int min_i = -1; // for (int i=0;igetReclaimCount(); // if (reclaimCount % 1) { // bag's contents are currently being freed // return i; // } // if (reclaimCount < min_val) { // min_val = reclaimCount; // min_i = i; // } // } // return min_i; // } // // inline set_of_bags getBlockbags() { // blockbag_iterator ** const output) { //// int cnt=0; //// for (int tid=0;tidNUM_PROCESSES*NUMBER_OF_EPOCH_BAGS}; // } // // inline void getOldestTwoBlockbags(const int tid, blockbag ** oldest, blockbag ** secondOldest) { // long long min_val = LLONG_MAX; // int min_i = -1; // for (int i=0;igetReclaimCount(); // if (reclaimCount % 1) { // bag's contents are currently being freed // min_i = i; // break; // } // if (reclaimCount < min_val) { // min_val = reclaimCount; // min_i = i; // } // } // if (min_i == -1) { // *oldest = *secondOldest = NULL; // } else { // *oldest = epochbags[tid*NUMBER_OF_EPOCH_BAGS + min_i]; // *secondOldest = epochbags[tid*NUMBER_OF_EPOCH_BAGS + ((min_i+1)%NUMBER_OF_EPOCH_BAGS)]; // } // } inline void getSafeBlockbags(const int tid, blockbag ** bags) { SOFTWARE_BARRIER; int ix = index[tid*PREFETCH_SIZE_WORDS]; bags[0] = epochbags[tid*NUMBER_OF_EPOCH_BAGS+ix]; bags[1] = epochbags[tid*NUMBER_OF_EPOCH_BAGS+((ix+NUMBER_OF_EPOCH_BAGS-1)%NUMBER_OF_EPOCH_BAGS)]; bags[2] = epochbags[tid*NUMBER_OF_EPOCH_BAGS+((ix+NUMBER_OF_EPOCH_BAGS-2)%NUMBER_OF_EPOCH_BAGS)]; bags[3] = NULL; SOFTWARE_BARRIER; // SOFTWARE_BARRIER; // // find first dangerous blockbag // long long min_val = LLONG_MAX; // int min_i = -1; // for (int i=0;igetReclaimCount(); // if (reclaimCount % 1) { // bag's contents are currently being freed // min_i = i; // break; // } // if (reclaimCount < min_val) { // min_val = reclaimCount; // min_i = i; // } // } // assert(min_i != -1); // min_i = (min_i + NUMBER_OF_ALWAYS_EMPTY_EPOCH_BAGS) % NUMBER_OF_EPOCH_BAGS; // // // process might free from bag at offset min_i, or the next one. // // the others are safe. // int i; // for (i=0;iNUM_PROCESSES;++tid) { for (int j=0;jcomputeSize(); } } return sum; } string getSizeString() { stringstream ss; ss< * const freeable = epochbags[NUMBER_OF_EPOCH_BAGS*tid + ((nextIndex+NUMBER_OF_ALWAYS_EMPTY_EPOCH_BAGS) % NUMBER_OF_EPOCH_BAGS)]; this->pool->addMoveFullBlocks(tid, freeable); // moves any full blocks (may leave a non-full block behind) SOFTWARE_BARRIER; index[tid*PREFETCH_SIZE_WORDS] = nextIndex; currentBag[tid*PREFETCH_SIZE_WORDS] = epochbags[NUMBER_OF_EPOCH_BAGS*tid + nextIndex]; } // objects reclaimed by this epoch manager. // returns true if the call rotated the epoch bags for thread tid // (and reclaimed any objects retired two epochs ago). // otherwise, the call returns false. inline bool leaveQuiescentState(const int tid, void * const * const reclaimers, const int numReclaimers) { SOFTWARE_BARRIER; // prevent any bookkeeping from being moved after this point by the compiler. bool result = false; // ver 1 long readEpoch = epoch; const long ann = announcedEpoch[tid*PREFETCH_SIZE_WORDS].load(memory_order_relaxed); // // debug ver2 // const long ann = announcedEpoch[tid*PREFETCH_SIZE_WORDS].load(memory_order_relaxed); // ++opsSinceRead[tid*PREFETCH_SIZE_WORDS]; // long readEpoch = ((opsSinceRead[tid*PREFETCH_SIZE_WORDS] % MIN_OPS_BEFORE_READ) == 0) ? epoch : BITS_EPOCH(ann); // if our announced epoch is different from the current epoch if (readEpoch != BITS_EPOCH(ann)) { // announce the new epoch, and rotate the epoch bags and // reclaim any objects retired two epochs ago. checked[tid*PREFETCH_SIZE_WORDS] = 0; //rotateEpochBags(tid); for (int i=0;i * const) reclaimers[i])->rotateEpochBags(tid); } result = true; } // note: readEpoch, when written to announcedEpoch[tid], // will set the state to non-quiescent and non-neutralized // incrementally scan the announced epochs of all threads int otherTid = checked[tid*PREFETCH_SIZE_WORDS]; if ((++opsSinceRead[tid*PREFETCH_SIZE_WORDS] % MIN_OPS_BEFORE_READ) == 0) { long otherAnnounce = announcedEpoch[otherTid*PREFETCH_SIZE_WORDS].load(memory_order_relaxed); if (BITS_EPOCH(otherAnnounce) == readEpoch || QUIESCENT(otherAnnounce)) { const int c = ++checked[tid*PREFETCH_SIZE_WORDS]; if (c >= this->NUM_PROCESSES /*&& c > MIN_OPS_BEFORE_CAS_EPOCH*/) { __sync_bool_compare_and_swap(&epoch, readEpoch, readEpoch+EPOCH_INCREMENT); } } } SOFTWARE_BARRIER; if (readEpoch != ann) { announcedEpoch[tid*PREFETCH_SIZE_WORDS].store(readEpoch, memory_order_relaxed); } return result; } inline void enterQuiescentState(const int tid) { const long ann = announcedEpoch[tid*PREFETCH_SIZE_WORDS].load(memory_order_relaxed); announcedEpoch[tid*PREFETCH_SIZE_WORDS].store(GET_WITH_QUIESCENT(ann), memory_order_relaxed); } // for all schemes except reference counting inline void retire(const int tid, T* p) { currentBag[tid*PREFETCH_SIZE_WORDS]->add(p); DEBUG2 this->debug->addRetired(tid, 1); } inline void unretireLast(const int tid) { assert(false); // we do not use this, since it makes it harder to reason about iteration over blockbags when they shrink (aside from when their contents are being reclaimed, and we can determine this is the case by inspecting bag->getReclaimCount()...) currentBag[tid*PREFETCH_SIZE_WORDS]->remove(); } void debugPrintStatus(const int tid) { // assert(tid >= 0); // assert(tid < this->NUM_PROCESSES); if (tid == 0) { std::cout<<"global epoch counter="< * const _recoveryMgr = NULL) : reclaimer_interface(numProcesses, _pool, _debug, _recoveryMgr) { VERBOSE std::cout<<"constructor reclaimer_debra helping="<shouldHelp()<NUM_PROCESSES;++tid) { // move contents of all bags into pool for (int i=0;icomputeSize()<<" objects from epoch bag of tid="<pool->addMoveAll(tid, epochbags[NUMBER_OF_EPOCH_BAGS*tid+i]); delete epochbags[NUMBER_OF_EPOCH_BAGS*tid+i]; } } delete[] epochbags; delete[] index; delete[] opsSinceRead; delete[] currentBag; delete[] announcedEpoch; delete[] checked; } }; #endif ================================================ FILE: datastructures/trevor_brown_abtree/common/recordmgr/reclaimer_debraplus.h ================================================ /** * C++ record manager implementation (PODC 2015) by Trevor Brown. * * Copyright (C) 2015 Trevor Brown * */ #ifndef RECLAIM_EPOCH_CRASHRECOV_H #define RECLAIM_EPOCH_CRASHRECOV_H #include #include #include "plaf.h" #include "globals.h" #include "blockbag.h" #include "allocator_interface.h" #include "reclaimer_interface.h" #include "arraylist.h" #include "hashtable.h" #include "record_manager_single_type.h" using namespace std; using namespace hashset_namespace; template > class reclaimer_debraplus : public reclaimer_interface { private: #define EPOCH_INCREMENT 2 #define BITS_EPOCH(ann) ((ann)&~(EPOCH_INCREMENT-1)) #define QUIESCENT(ann) ((ann)&1) #define GET_WITH_QUIESCENT(ann) ((ann)|1) // the following threshold allows a process to accumulate about 768 objects in each epoch bag // (3*BLOCK_SIZE=768, but there are other things that inflate bag size slightly, such as // the fact that a thread can do n operations before it successfully neutralizes each thread // and can advance the epoch.) #define NEUTRALIZE_THRESHOLD_IN_BLOCKS 4 // maximum number of objects that can be simultaneously protected by calls to qProtect() #define MAX_PROTECT_EVEN_IF_QUIESCENT 7 #define MINIMUM_OPERATIONS_BEFORE_NEW_EPOCH_CR 100 #define NUMBER_OF_EPOCH_BAGS_CR 3 // for epoch based reclamation volatile long epoch; atomic_long *announcedEpoch; // announcedEpoch[tid*PREFETCH_SIZE_WORDS] = bits 1..end contain the last epoch seen by thread tid, and bit 0 indicates quiescence long *checked; // checked[tid*PREFETCH_SIZE_WORDS] = how far we've come in checking the announced epochs of other threads blockbag **epochbags; // epochbags[NUMBER_OF_EPOCH_BAGS*tid+0..NUMBER_OF_EPOCH_BAGS*tid+(NUMBER_OF_EPOCH_BAGS-1)] are epoch bags for thread tid. blockbag **currentBag; // pointer to current epoch bag for each process long *index; // index of currentBag in epochbags for each process // note: oldest bag is number (index+1)%NUMBER_OF_EPOCH_BAGS_CR // for hazard pointer component of this scheme; // each thread has a single hazard pointer that it uses to prevent // other threads from reclaiming its current scx record before it can // clean up after itself. AtomicArrayList **announce; // announce[tid] = pointer to set of hazard pointers for thread tid hashset_new **comparing; // comparing[tid] = set of announced hazard pointers for ALL threads, as collected by thread tid during it's last retire(tid, ...) call // number of blocks retired[tid] must contain before it is guaranteed to // contain at least 5*numProcesses*MAX_PROTECT_EVEN_IF_QUIESCENT items... // why 5*numProcesses*MAX_PROTECT_EVEN_IF_QUIESCENT items? // to get amortized constant scanning time per object, // the number of elements that retired[tid] must contain // before we scan hazard pointers to determine // which elements of retired[tid] can be deallocated // must be nk+Omega(nk), where // n = number of threads and // k = max number of hazard pointers a thread can hold at once // in this context, k=MAX_PROTECT_EVEN_IF_QUIESCENT, since a thread only obtains // a hazard pointer to the scx record it has most recently created, and // the nodes it points to. so, we just need some constant times // numProcesses*MAX_PROTECT_EVEN_IF_QUIESCENT. static const int scanThreshold = 4; sigset_t neutralizeSignalSet; inline bool neutralizeOther(const int tid, const int otherTid, const long currentEpoch, const long announceOther) { #ifdef SEND_CRASH_RECOVERY_SIGNALS assert(isQuiescent(tid)); assert(otherTid != tid); // if the epoch bag is too full, then we suspect otherTid has crashed... if (epochbags[NUMBER_OF_EPOCH_BAGS_CR*tid+index[tid*PREFETCH_SIZE_WORDS]]->getSizeInBlocks() >= NEUTRALIZE_THRESHOLD_IN_BLOCKS) { // neutralize otherTid by sending him a signal to make him // change what his next step will be, and force him to // throw away all pointers into the data structure, and // leaveQstate again before re-acquiring any pointers into // the data structure. this lets us reclaim memory without // waiting for him to progress. pthread_t otherPthread = this->recoveryMgr->getPthread(otherTid); int error = 0; // COUTATOMICTID("sending signal to tid "<recoveryMgr->neutralizeSignal)) { // should never happen for (int i=0;i<20;++i) COUTATOMICTID("######################################################"<recoveryMgr->neutralizeSignal<<")"< struct rebind { typedef reclaimer_debraplus<_Tp1, Pool> other; }; template struct rebind2 { typedef reclaimer_debraplus<_Tp1, _Tp2> other; }; inline static bool quiescenceIsPerRecordType() { return false; } inline static bool supportsCrashRecovery() { return true; } inline bool isQuiescent(const int tid) { //COUTATOMICTID("IS QUIESCENT EXECUTED"<contains(obj); // this is inefficient, but should only happen when recovering from being neutralized... } inline bool qProtect(const int tid, T * const obj, CallbackType notRetiredCallback, CallbackArg callbackArg, bool memoryBarrier = true) { TRACE COUTATOMICTID("reclaimer_debraplus::protectObjectEvenIfQuiescent(tid="<size(); DEBUG assert(__size < MAX_PROTECT_EVEN_IF_QUIESCENT); announce[tid]->add(obj); assert(announce[tid]->contains(obj)); DEBUG assert(announce[tid]->size() == __size+1); // if callbackArg = NULL, we assume notRetiredCallback is a noop. if (memoryBarrier) __sync_synchronize(); // prevent retired from being read before we set a hazard pointer to obj, and prevent any future reads of fields of obj from being moved before we announce obj. if (notRetiredCallback(callbackArg)) { TRACE COUTATOMICTID("notRetiredCallback returns true"<erase(obj); // note: this is inefficient, but it should never happen with regular use. DEBUG assert(__size == announce[tid]->size()); return false; } } inline void qUnprotectAll(const int tid) { TRACE COUTATOMICTID("reclaimer_debraplus::unprotectAllObjectsEvenIfQuiescent(tid="<clear(); assert(announce[tid]->size() == 0); } // rotate the epoch bags and reclaim any objects retired two epochs ago. inline void rotateEpochBags(const int tid) { assert(isQuiescent(tid)); // we rotate lists in constant time, and scan hazard pointers // when the blockbag from two epochs ago is larger than scanThreshold // (using an iterator with erase functionality). // maybe in the future we could use bloom filters somehow to determine when no hazard pointer // can be present in a block, so we can reclaim the entire block in O(k) time...??? // (if we're willing to accept k full, unreclaimable blocks per thread, then we can avoid // working with individual elements altogether. we can simply check if each HP is in the bloom // filter for each of c*n blocks (for some constant c), and have some probability of being // able to reclaim (c-1)*n blocks. then, this procedure will be worst case O(n) time.) index[tid*PREFETCH_SIZE_WORDS] = (index[tid*PREFETCH_SIZE_WORDS]+1) % NUMBER_OF_EPOCH_BAGS_CR; blockbag * const freeable = epochbags[NUMBER_OF_EPOCH_BAGS_CR*tid+index[tid*PREFETCH_SIZE_WORDS]]; if (freeable->getSizeInBlocks() >= scanThreshold) { TRACE COUTATOMICTID("retiring... we have "<computeSize()<<" things waiting to be retired in this epoch bag..."); // hash all announcements comparing[tid]->clear(); assert(comparing[tid]->size() == 0); for (int otherTid=0; otherTid < this->NUM_PROCESSES; ++otherTid) { int sz = announce[otherTid]->size(); for (int ixHP = 0; ixHP < sz; ++ixHP) { T* hp = (T*) announce[otherTid]->get(ixHP); if (hp) { int oldSize; DEBUG2 oldSize = comparing[tid]->size(); comparing[tid]->insert((T*) hp); DEBUG2 assert(comparing[tid]->size() <= oldSize + 1); // might not increase size if comparing[tid] already contains this item... } } } // check if any nodes (from two epochs ago) are announced (qprotected) // and swap them to the front of the blockbag. // once all announced nodes are at the front of the blockbag, // we can free whole blocks in the remainder of the blockbag. blockbag_iterator it = freeable->begin(); blockbag_iterator nextswap = freeable->begin(); while (it != freeable->end()) { if (comparing[tid]->contains(*it)) { // a hazard pointers points to the item it.swap(nextswap.getCurr(), nextswap.getIndex()); nextswap++; } it++; } block * const curr = nextswap.getCurr(); if (curr) { this->pool->addMoveFullBlocks(tid, freeable, curr); } } currentBag[tid*PREFETCH_SIZE_WORDS] = freeable; assert(isQuiescent(tid)); } // invoke this at the beginning of each operation that accesses // objects reclaimed by this epoch manager. // returns true if the call rotated the epoch bags for thread tid // (and reclaimed any objects retired two epochs ago). // otherwise, the call returns false. // IMPLIES A FULL MEMORY BARRIER inline bool leaveQuiescentState(const int tid, void * const * const reclaimers, const int numReclaimers) { SOFTWARE_BARRIER; // prevent any bookkeeping from being moved after this point by the compiler. bool result = false; long readEpoch = epoch; // multiple of EPOCH_INCREMENT assert(!QUIESCENT(readEpoch)); // if our announced epoch is different from the current epoch const long ann = announcedEpoch[tid*PREFETCH_SIZE_WORDS].load(memory_order_relaxed); DEBUG2 if (!QUIESCENT(ann)) { COUTATOMICTID("NOT QUIESCENT"< * const) reclaimers[i])->rotateEpochBags(tid); } result = true; } // note: readEpoch, when written to announcedEpoch[tid], // will set the state to non-quiescent and non-neutralized // incrementally scan the announced epochs of all threads int otherTid = checked[tid*PREFETCH_SIZE_WORDS]; if (otherTid >= this->NUM_PROCESSES) { const int c = ++checked[tid*PREFETCH_SIZE_WORDS]; if (c > MINIMUM_OPERATIONS_BEFORE_NEW_EPOCH_CR) { __sync_bool_compare_and_swap(&epoch, readEpoch, readEpoch+EPOCH_INCREMENT); } } else { assert(otherTid >= 0); long otherAnnounce = announcedEpoch[otherTid*PREFETCH_SIZE_WORDS].load(memory_order_relaxed); if (BITS_EPOCH(otherAnnounce) == readEpoch || QUIESCENT(otherAnnounce) || neutralizeOther(tid, otherTid, readEpoch, otherAnnounce)) { const int c = ++checked[tid*PREFETCH_SIZE_WORDS]; if (c >= this->NUM_PROCESSES && c > MINIMUM_OPERATIONS_BEFORE_NEW_EPOCH_CR) { __sync_bool_compare_and_swap(&epoch, readEpoch, readEpoch+EPOCH_INCREMENT); } } } // it is important that we set the announcedEpoch last, because we must // not be neutralized during some of the preceding steps, or we may // corrupt the data structure. // (on x86/64, writes are not moved earlier in program order, so we don't need any membar before this write.) // (on another arch, we'd have to prevent this write from being moved before the write to checked[].) assert(isQuiescent(tid)); SOFTWARE_BARRIER; announcedEpoch[tid*PREFETCH_SIZE_WORDS].store(readEpoch, memory_order_relaxed); return result; } // IN A SCHEME THAT SUPPORTS CRASH RECOVERY, THIS IMPLIES A FULL MEMORY BARRIER IFF THIS MOVES THE THREAD FROM AN ACTIVE STATE TO A QUIESCENT STATE inline void enterQuiescentState(const int tid) { const long ann = announcedEpoch[tid*PREFETCH_SIZE_WORDS].load(memory_order_relaxed); announcedEpoch[tid*PREFETCH_SIZE_WORDS].store(GET_WITH_QUIESCENT(ann), memory_order_relaxed); assert(isQuiescent(tid)); } // for all schemes except reference counting inline void retire(const int tid, T* p) { assert(isQuiescent(tid)); currentBag[tid*PREFETCH_SIZE_WORDS]->add(p); DEBUG2 this->debug->addRetired(tid, 1); } void debugPrintStatus(const int tid) { // assert(tid >= 0); // assert(tid < this->NUM_PROCESSES); // long announce = BITS_EPOCH(announcedEpoch[tid*PREFETCH_SIZE_WORDS].load(memory_order_relaxed))/EPOCH_INCREMENT; // std::cout<<"announce="<recoveryMgr->neutralizeSignal)) { COUTATOMIC("error adding signal to signal set"<*[NUMBER_OF_EPOCH_BAGS_CR*numProcesses]; currentBag = new blockbag*[numProcesses*PREFETCH_SIZE_WORDS]; index = new long[numProcesses*PREFETCH_SIZE_WORDS]; announcedEpoch = new atomic_long[numProcesses*PREFETCH_SIZE_WORDS]; checked = new long[numProcesses*PREFETCH_SIZE_WORDS]; announce = new AtomicArrayList*[numProcesses]; comparing = new hashset_new*[numProcesses]; for (int tid=0;tid(tid, this->pool->blockpools[tid]); } currentBag[tid*PREFETCH_SIZE_WORDS] = epochbags[NUMBER_OF_EPOCH_BAGS_CR*tid]; index[tid*PREFETCH_SIZE_WORDS] = 0; announcedEpoch[tid*PREFETCH_SIZE_WORDS].store(GET_WITH_QUIESCENT(0), memory_order_relaxed); checked[tid*PREFETCH_SIZE_WORDS] = 0; announce[tid] = new AtomicArrayList(MAX_PROTECT_EVEN_IF_QUIESCENT); comparing[tid] = new hashset_new(numProcesses*MAX_PROTECT_EVEN_IF_QUIESCENT); } } ~reclaimer_debraplus() { VERBOSE DEBUG COUTATOMIC("destructor reclaimer_debraplus"<NUM_PROCESSES;++tid) { // move contents of all bags into pool for (int i=0;ipool->addMoveAll(tid, epochbags[NUMBER_OF_EPOCH_BAGS_CR*tid+i]); delete epochbags[NUMBER_OF_EPOCH_BAGS_CR*tid+i]; } delete comparing[tid]; delete announce[tid]; } delete[] announce; delete[] epochbags; delete[] index; delete[] currentBag; delete[] announcedEpoch; delete[] checked; delete[] comparing; } }; #endif ================================================ FILE: datastructures/trevor_brown_abtree/common/recordmgr/reclaimer_hazardptr.h ================================================ /** * C++ record manager implementation (PODC 2015) by Trevor Brown. * * Copyright (C) 2015 Trevor Brown * */ #ifndef RECLAIM_HAZARDPTR_STACK_H #define RECLAIM_HAZARDPTR_STACK_H #include #include #include #include #include "blockbag.h" #include "plaf.h" #include "allocator_interface.h" #include "hashtable.h" #include "reclaimer_interface.h" #include "arraylist.h" using namespace std; using namespace hashset_namespace; #define MAX_HAZARDPTRS_PER_THREAD 16 template > class reclaimer_hazardptr : public reclaimer_interface { private: AtomicArrayList **announce; // announce[tid] = set of announced hazard pointers for thread tid ArrayList **retired; // retired[tid] = set of retired objects for thread tid hashset_new **comparing; // comparing[tid] = set of announced hazard pointers for ALL threads, as collected by thread tid during it's last retire(tid, ...) call // number of elements that retired[tid] must contain // before we scan hazard pointers to determine // which elements of retired[tid] can be deallocated. // to get amortized constant scanning time per object, // this must be nk+Omega(nk), where // n = number of threads and // k = max number of hazard pointers a thread can hold at once const int scanThreshold; public: template struct rebind { typedef reclaimer_hazardptr<_Tp1, Pool> other; }; template struct rebind2 { typedef reclaimer_hazardptr<_Tp1, _Tp2> other; }; inline static bool shouldHelp() { return false; } bool isProtected(const int tid, T * const obj) { return announce[tid]->contains(obj); } bool static isQProtected(const int tid, T * const obj) { return false; } inline static bool isQuiescent(const int tid) { return true; } // for hazard pointers (and counting references from threads) inline bool protect(const int tid, T * const obj, CallbackType notRetiredCallback, CallbackArg callbackArg, bool memoryBarrier = true) { TRACE std::cout<<"reclaimer_hazardptr::protect(tid="<size(); // DEBUG if (sizeof(T) < 80 /* is a node */) assert(!announce[tid]->contains(obj)); announce[tid]->add(obj); if (memoryBarrier) __sync_synchronize(); // prevent retired from being read before we set a hazard pointer to obj DEBUG2 assert(isProtected(tid, obj)); //announce[tid]->contains(obj)); DEBUG2 assert(size + 1 == announce[tid]->size()); // SOFTWARE_BARRIER; if (notRetiredCallback(callbackArg)) { // SOFTWARE_BARRIER; TRACE std::cout<<"notRetiredCallback returns true"<size() <= MAX_HAZARDPTRS_PER_THREAD); DEBUG2 assert(isProtected(tid, obj)); // SOFTWARE_BARRIER; assert(isProtected(tid, obj)); return true; } else { TRACE std::cout<<"notRetiredCallback returns false"<size(); announce[tid]->erase(obj); // DEBUG if (sizeof(T) < 80 /* is a node */) assert(!announce[tid]->contains(obj)); DEBUG2 assert(size - 1 == announce[tid]->size()); // SOFTWARE_BARRIER; } inline bool qProtect(const int tid, T * const obj, CallbackType notRetiredCallback, CallbackArg callbackArg, bool memoryBarrier = true) { TRACE std::cout<<"reclaimer_debraplus::qProtect(tid="<clear(); // __sync_synchronize(); // announce[tid]->clearWithoutFreeingElements(); DEBUG2 assert(announce[tid]->size() == 0); DEBUG2 assert(announce[tid]->isEmpty()); // SOFTWARE_BARRIER; } inline static bool leaveQuiescentState(const int tid, void * const * const reclaimers, const int numReclaimers) { TRACE std::cout<<"reclaimer_hazardptr::leaveQuiescentState(tid="< 0) { int c = x % base; if (c < 10) os<<(char)(c+(int)'0'); else if (c < 10+26) os<<(char)(c-10+(int)'a'); else os<<(char)(c-10-26+(int)'A'); x /= base; } return os.str(); } inline void retire(const int tid, T* p) { TRACE std::cout<<"reclaimer_hazardptr::retire(tid="<debug->addRetired(tid, 1); retired[tid]->add(p); // if the retired bag is sufficiently large if (retired[tid]->isFull()) { // __sync_synchronize(); // not necessary, since there is a membar implied by the update cas between here and the marked bit that makes the retired predicate return true... (it follows that the retired predicate for a node u will see marked and return true if it executes when we are performing retire(u).) // TRACE std::cout<<"retiring... we have "<size()<<" things waiting to be retired (#hps="<size()<<")..."; // // hash all announcements // int totalSize = 0; // int sizes[MAX_TID_POW2]; // for (int otherTid=0; otherTid < this->NUM_PROCESSES; ++otherTid) { // sizes[otherTid] = announce[tid]->size(); // totalSize += sizes[otherTid]; // } // hashset_new hset = hashset_new(totalSize); // for (int otherTid=0; otherTid < this->NUM_PROCESSES; ++otherTid) { // for (int i=0;iget(i)); // } // } // // // iterate over all items in retired[tid] // TRACE std::cout<<"retiring... we have "<size()<<" things waiting to be retired (#hps="<size()<<", totalSize="<size();++ix) { // TRACE std::cout<<" "<get(ix))<<"="<<(hset.contains(retired[tid]->get(ix))?"1":"0"); // if (!hset.contains(retired[tid]->get(ix))) { // // no hazard pointers point to the item, so we send it to the pool // this->pool->add(tid, retired[tid]->get(ix)); // // now we remove the item from retired[tid] and // // adjust ix to continue where we left off // retired[tid]->erase(ix); // --ix; // } // } // TRACE std::cout<<" afterwards, we have "<size()<<" things waiting to be retired..."<size()<<" things waiting to be retired (THIS thread #hps="<size()<<")..."; // for (int ix=0;ixsize();) { // // check if retired[tid]->data[ix] is in any set of hazard pointers // bool found = false; // for (int otherTid=0;otherTidNUM_PROCESSES;++otherTid) { // int sz = announce[otherTid]->size(); // for (int ixHP=0;ixHPget(ix) == announce[otherTid]->get(ixHP)) { // found = true; // // break out of both loops // otherTid = this->NUM_PROCESSES; // break; // } // } // } // if (!found) { // // no hazard pointers point to the item, so we send it to the pool // this->pool->add(tid, retired[tid]->get(ix)); // // now we remove the item from retired[tid] // retired[tid]->erase(ix); // } else { // ++ix; // we didn't erase, so we need to move on to the next element // } // } // TRACE std::cout<<" afterwards, we have "<size()<<" things waiting to be retired..."<size()<<" things waiting to be retired (THIS thread #hps="<size()<<")..."; // hash all announcements comparing[tid]->clear(); assert(comparing[tid]->size() == 0); for (int otherTid=0; otherTid < this->NUM_PROCESSES; ++otherTid) { int sz = announce[otherTid]->size(); assert(sz < MAX_HAZARDPTRS_PER_THREAD); for (int ixHP=0;ixHPsize(); comparing[tid]->insert(announce[otherTid]->get(ixHP)); DEBUG2 assert(comparing[tid]->size() <= oldSize + 1); // might not increase size if comparing[tid] already contains this item... } } for (int ix=0;ixsize();) { // check if retired[tid]->data[ix] is in any set of hazard pointers if (!comparing[tid]->contains(retired[tid]->get(ix))) { // no hazard pointers point to the item, so we send it to the pool this->pool->add(tid, retired[tid]->get(ix)); // now we remove the item from retired[tid] retired[tid]->erase(ix); } else { ++ix; // we didn't erase, so we need to move on to the next element } } TRACE std::cout<<" afterwards, we have "<size()<<" things waiting to be retired..."<isFull()); } } void debugPrintStatus(const int tid) { // assert(tid >= 0); // assert(tid < this->NUM_PROCESSES); } reclaimer_hazardptr(const int numProcesses, Pool *_pool, debugInfo * const _debug, RecoveryMgr * const _recoveryMgr = NULL) : scanThreshold(5*numProcesses*MAX_HAZARDPTRS_PER_THREAD), reclaimer_interface(numProcesses, _pool, _debug, _recoveryMgr) { VERBOSE DEBUG std::cout<<"constructor reclaimer_hazardptr"<*[numProcesses]; retired = new ArrayList*[numProcesses]; comparing = new hashset_new*[numProcesses]; for (int tid=0;tid(MAX_HAZARDPTRS_PER_THREAD); retired[tid] = new ArrayList(scanThreshold); comparing[tid] = new hashset_new(numProcesses*MAX_HAZARDPTRS_PER_THREAD); } } ~reclaimer_hazardptr() { VERBOSE DEBUG std::cout<<"destructor reclaimer_hazardptr"<NUM_PROCESSES;++tid) { int sz = retired[tid]->size(); for (int ix=0;ixpool->add(tid, retired[tid]->get(ix)); } delete announce[tid]; delete retired[tid]; delete comparing[tid]; } delete[] announce; delete[] retired; delete[] comparing; } }; // end class #endif ================================================ FILE: datastructures/trevor_brown_abtree/common/recordmgr/reclaimer_interface.h ================================================ /** * C++ record manager implementation (PODC 2015) by Trevor Brown. * * Copyright (C) 2015 Trevor Brown * */ #ifndef RECLAIM_INTERFACE_H #define RECLAIM_INTERFACE_H #include "recovery_manager.h" #include "pool_interface.h" #include "globals.h" #include #include using namespace std; template struct set_of_bags { blockbag * const * const bags; const int numBags; }; template > class reclaimer_interface { public: #ifndef __CYGWIN__ RecoveryMgr * recoveryMgr; #endif debugInfo * const debug; const int NUM_PROCESSES; Pool *pool; template struct rebind { typedef reclaimer_interface<_Tp1, Pool> other; }; template struct rebind2 { typedef reclaimer_interface<_Tp1, _Tp2> other; }; long long getSizeInNodes() { return 0; } string getSizeString() { return ""; } inline static bool quiescenceIsPerRecordType() { return true; } inline static bool shouldHelp() { return true; } // FOR DEBUGGING PURPOSES inline static bool supportsCrashRecovery() { return false; } inline bool isProtected(const int tid, T * const obj); inline bool isQProtected(const int tid, T * const obj); inline static bool isQuiescent(const int tid) { COUTATOMICTID("reclaimer_interface::isQuiescent(tid) is not implemented!"< must be idempotent, * and must unprotect all objects protected by calls to protectObject. * it must NOT unprotect any object protected by a call to * protectObjectEvenAfterRestart. */ inline void enterQuiescentState(const int tid); inline bool leaveQuiescentState(const int tid, void * const * const reclaimers, const int numReclaimers); inline void rotateEpochBags(const int tid); // for all schemes except reference counting inline void retire(const int tid, T* p); inline void unretireLast(const int tid) {} inline void initThread(const int tid) {} inline void deinitThread(const int tid) {} void debugPrintStatus(const int tid); reclaimer_interface(const int numProcesses, Pool *_pool, debugInfo * const _debug, RecoveryMgr * const _recoveryMgr = NULL) #ifndef __CYGWIN__ : recoveryMgr(_recoveryMgr) #endif , debug(_debug) , NUM_PROCESSES(numProcesses) , pool(_pool) { VERBOSE DEBUG COUTATOMIC("constructor reclaimer_interface"< #include #include "pool_interface.h" #include "reclaimer_interface.h" using namespace std; template > class reclaimer_none : public reclaimer_interface { private: public: template struct rebind { typedef reclaimer_none<_Tp1, Pool> other; }; template struct rebind2 { typedef reclaimer_none<_Tp1, _Tp2> other; }; string getSizeString() { return "no reclaimer"; } inline static bool shouldHelp() { return true; } inline static bool isQuiescent(const int tid) { return true; } inline static bool isProtected(const int tid, T * const obj) { return true; } inline static bool isQProtected(const int tid, T * const obj) { return false; } // for hazard pointers (and reference counting) inline static bool protect(const int tid, T * const obj, CallbackType notRetiredCallback, CallbackArg callbackArg, bool memoryBarrier = true) { return true; } inline static void unprotect(const int tid, T * const obj) {} inline static bool qProtect(const int tid, T * const obj, CallbackType notRetiredCallback, CallbackArg callbackArg, bool memoryBarrier = true) { return true; } inline static void qUnprotectAll(const int tid) {} // rotate the epoch bags and reclaim any objects retired two epochs ago. inline static void rotateEpochBags(const int tid) { } // invoke this at the beginning of each operation that accesses // objects reclaimed by this epoch manager. // returns true if the call rotated the epoch bags for thread tid // (and reclaimed any objects retired two epochs ago). // otherwise, the call returns false. inline static bool leaveQuiescentState(const int tid, void * const * const reclaimers, const int numReclaimers) { return false; } inline static void enterQuiescentState(const int tid) { } // for all schemes except reference counting inline static void retire(const int tid, T* p) { } void debugPrintStatus(const int tid) { } // set_of_bags getBlockbags() { // set_of_bags empty = {.bags = NULL, .numBags = 0}; // return empty; // } // // void getOldestTwoBlockbags(const int tid, blockbag ** oldest, blockbag ** secondOldest) { // *oldest = *secondOldest = NULL; // } // // int getOldestBlockbagIndexOffset(const int tid) { // return -1; // } void getSafeBlockbags(const int tid, blockbag ** bags) { bags[0] = NULL; } reclaimer_none(const int numProcesses, Pool *_pool, debugInfo * const _debug, RecoveryMgr * const _recoveryMgr = NULL) : reclaimer_interface(numProcesses, _pool, _debug, _recoveryMgr) { VERBOSE DEBUG std::cout<<"constructor reclaimer_none"< #include #include #include #include "blockbag.h" #include "plaf.h" #ifdef USE_DEBUGCOUNTERS #include "debugcounter.h" #endif #include "allocator_interface.h" #include "reclaimer_interface.h" #ifdef BST #include "node.h" #include "scxrecord.h" #elif defined KCAS_MAXK #include "kcas.h" #else #error ONLY SUPPORTS BST(main.cpp) and KCAS(ubench.cpp) #endif using namespace std; #include template M get_member_type(M T::*); template T get_class_type(M T::*); template constexpr std::size_t offset_of() { return reinterpret_cast(&(((T*)0)->*M)); } #define OFFSET_OF(m) offset_of() #define comma , __thread long long rcuthrFreesNode = 0; // for RCU THREADS ONLY __thread long long rcuthrFreesDescriptor = 0; // for RCU THREADS ONLY long long freesNode = 0; long long freesDescriptor = 0; #if defined BST || defined BST_THROWAWAY void rcuCallback_Node(struct rcu_head *rcu) { Node * n = (Node *) (((char*) rcu) - OFFSET_OF( &Node::rcuHeadField)); if (++rcuthrFreesNode == 1<<10) { __sync_fetch_and_add(&freesNode, rcuthrFreesNode); rcuthrFreesNode = 0; } free(n); } #ifdef BST_THROWAWAY void rcuCallback_SCXRecord(struct rcu_head *rcu) { SCXRecord * n = (SCXRecord *) (((char*) rcu) - OFFSET_OF( &SCXRecord::rcuHeadField)); if (++rcuthrFreesDescriptor == 1<<10) { __sync_fetch_and_add(&freesDescriptor, rcuthrFreesDescriptor); rcuthrFreesDescriptor = 0; } free(n); } #endif #elif defined KCAS_MAXK void rcuCallback_kcasdesc(struct rcu_head *rcu) { kcasdesc_t * n = (kcasdesc_t *) (((char*) rcu) - OFFSET_OF( &kcasdesc_t::rcuHeadField)); if (++rcuthrFreesDescriptor == 1<<10) { __sync_fetch_and_add(&freesDescriptor, rcuthrFreesDescriptor); rcuthrFreesDescriptor = 0; } free(n); } void rcuCallback_rdcssdesc(struct rcu_head *rcu) { rdcssdesc_t * n = (rdcssdesc_t *) (((char*) rcu) - OFFSET_OF( &rdcssdesc_t::rcuHeadField)); if (++rcuthrFreesNode == 1<<10) { __sync_fetch_and_add(&freesNode, rcuthrFreesNode); rcuthrFreesNode = 0; } free(n); } #endif //template //void rcuCallback(struct rcu_head *rcu) { // T * n = (T *) (((char *) rcu) - OFFSET_OF(&T::rcuHeadField)); // free(n); //} __thread bool calledRCULock = false; __thread bool rcuInitialized = false; template > class reclaimer_rcu : public reclaimer_interface { protected: public: template struct rebind { typedef reclaimer_rcu<_Tp1, Pool> other; }; template struct rebind2 { typedef reclaimer_rcu<_Tp1, _Tp2> other; }; long long getSizeInNodes() { long long sum = 0; return sum; } string getSizeString() { stringstream ss; ss<rcuHeadField, rcuCallback); #if defined BST || defined BST_THROWAWAY if (sizeof(*p) == sizeof(Node)) { call_rcu(&p->rcuHeadField, rcuCallback_Node); #ifdef BST_THROWAWAY } else if (sizeof(*p) == sizeof(SCXRecord)) { call_rcu(&p->rcuHeadField, rcuCallback_SCXRecord); #endif } #elif defined KCAS_MAXK if (sizeof(*p) == sizeof(kcasdesc_t)) { call_rcu(&p->rcuHeadField, rcuCallback_kcasdesc); } else if (sizeof(*p) == sizeof(rdcssdesc_t)) { call_rcu(&p->rcuHeadField, rcuCallback_rdcssdesc); } #endif } void debugPrintStatus(const int tid) { if (freesNode) std::cout<<"freesNode="< #include #include #include using namespace std; inline CallbackReturn callbackReturnTrue(CallbackArg arg) { return true; } // compile time check for duplicate template parameters // compare first with rest to find any duplicates template void check_duplicates(void) {} template void check_duplicates(void) { if (typeid(T) == typeid(First)) { throw logic_error("duplicate template arguments provided to RecordManagerSet"); } check_duplicates(); } // base case: empty template // this is a compile time check for invalid arguments template class RecordManagerSet { public: RecordManagerSet(const int numProcesses, RecoveryMgr * const _recoveryMgr) {} template record_manager_single_type * get(T * const recordType) { throw logic_error("invalid type passed to RecordManagerSet::get()"); return NULL; } void clearCounters(void) {} void registerThread(const int tid) {} void unregisterThread(const int tid) {} void printStatus() {} inline void qUnprotectAll(const int tid) {} inline void getReclaimers(const int tid, void ** const reclaimers, int index) {} inline void enterQuiescentState(const int tid) {} inline void leaveQuiescentStateForEach(const int tid) {} inline void leaveQuiescentState(const int tid, const bool callForEach) {} }; // "recursive" case template class RecordManagerSet : RecordManagerSet { record_manager_single_type * const mgr; public: RecordManagerSet(const int numProcesses, RecoveryMgr * const _recoveryMgr) : RecordManagerSet(numProcesses, _recoveryMgr) , mgr(new record_manager_single_type(numProcesses, _recoveryMgr)) { //cout<<"RecordManagerSet with First="<enterQuiescentState(tid); } else { // only call enterQuiescentState for one object type // std::cout<<"setting quiescent state for just one record type: "<get((RecordTypesFirst *) NULL)->enterQuiescentState(tid); } } inline void leaveQuiescentState(const int tid) { // assert(isQuiescent(tid)); // VERBOSE DEBUG2 COUTATOMIC("record_manager_single_type::leaveQuiescentState(tid="< class record_manager_single_type { protected: typedef Record* record_pointer; typedef typename Alloc::template rebind::other classAlloc; typedef typename Pool::template rebind2::other classPool; typedef typename Reclaim::template rebind2::other classReclaim; public: classAlloc *alloc; classPool *pool; classReclaim *reclaim; const int NUM_PROCESSES; debugInfo debugInfoRecord; RecoveryMgr * const recoveryMgr; record_manager_single_type(const int numProcesses, RecoveryMgr * const _recoveryMgr) : NUM_PROCESSES(numProcesses), debugInfoRecord(debugInfo(numProcesses)), recoveryMgr(_recoveryMgr) { VERBOSE DEBUG COUTATOMIC("constructor record_manager_single_type"<initThread(tid); reclaim->initThread(tid); // enterQuiescentState(tid); } void deinitThread(const int tid) { reclaim->deinitThread(tid); } inline void clearCounters() { debugInfoRecord.clear(); } inline static bool shouldHelp() { // FOR DEBUGGING PURPOSES return Reclaim::shouldHelp(); } inline bool isProtected(const int tid, record_pointer obj) { return reclaim->isProtected(tid, obj); } // for hazard pointers (and reference counting) inline bool protect(const int tid, record_pointer obj, CallbackType notRetiredCallback, CallbackArg callbackArg, bool hintMemoryBarrier = true) { return reclaim->protect(tid, obj, notRetiredCallback, callbackArg, hintMemoryBarrier); } inline void unprotect(const int tid, record_pointer obj) { reclaim->unprotect(tid, obj); } // warning: qProtect must be reentrant and lock-free (=== async-signal-safe) inline bool qProtect(const int tid, record_pointer obj, CallbackType notRetiredCallback, CallbackArg callbackArg, bool hintMemoryBarrier = true) { return reclaim->qProtect(tid, obj, notRetiredCallback, callbackArg, hintMemoryBarrier); } inline void qUnprotectAll(const int tid) { assert(!Reclaim::supportsCrashRecovery() || isQuiescent(tid)); reclaim->qUnprotectAll(tid); } inline bool isQProtected(const int tid, record_pointer obj) { return reclaim->isQProtected(tid, obj); } inline static bool supportsCrashRecovery() { return Reclaim::supportsCrashRecovery(); } inline static bool quiescenceIsPerRecordType() { return Reclaim::quiescenceIsPerRecordType(); } inline bool isQuiescent(const int tid) { return reclaim->isQuiescent(tid); } // for epoch based reclamation inline void enterQuiescentState(const int tid) { // VERBOSE DEBUG2 COUTATOMIC("record_manager_single_type::enterQuiescentState(tid="<enterQuiescentState(tid); } inline void leaveQuiescentState(const int tid, void * const * const reclaimers, const int numReclaimers) { // assert(isQuiescent(tid)); reclaim->leaveQuiescentState(tid, reclaimers, numReclaimers); } // for all schemes except reference counting inline void retire(const int tid, record_pointer p) { assert(!Reclaim::supportsCrashRecovery() || isQuiescent(tid)); reclaim->retire(tid, p); } // for algs that retire before the linearization point of a deletion inline void unretireLast(const int tid) { assert(!Reclaim::supportsCrashRecovery() || isQuiescent(tid)); reclaim->unretireLast(tid); } // for all schemes inline record_pointer allocate(const int tid) { assert(!Reclaim::supportsCrashRecovery() || isQuiescent(tid)); return pool->get(tid); } inline void deallocate(const int tid, record_pointer p) { assert(!Reclaim::supportsCrashRecovery() || isQuiescent(tid)); pool->add(tid, p); } void printStatus(void) { long long allocated = debugInfoRecord.getTotalAllocated(); long long allocatedBytes = allocated * sizeof(Record); long long deallocated = debugInfoRecord.getTotalDeallocated(); long long recycled = debugInfoRecord.getTotalFromPool() - allocated; COUTATOMIC("recmgr status for objects of size "<getSizeString()<getSizeString()<getSizeString().c_str()))<debugPrintStatus(tid); } COUTATOMIC(endl); // for (int tid=0;tidNUM_PROCESSES;++tid) { // COUTATOMIC("thread "<debugPrintStatus(tid); // // COUTATOMIC(" "); // //COUTATOMIC("allocated "<debugPrintStatus(tid); // COUTATOMIC(" "); // pool->debugPrintStatus(tid); // COUTATOMIC(" "); // COUTATOMIC("(given="<enterQuiescentState((tid)); \ (finishedbool) = recoverAnyAttemptedSCX((tid), -1); \ recordmgr->recoveryMgr->unblockCrashRecoverySignal(); \ } else #define CHECKPOINT_AND_RUN_QUERY(tid) \ if (MasterRecordMgr::supportsCrashRecovery() && sigsetjmp(setjmpbuffers[(tid)], 0)) { \ recordmgr->enterQuiescentState((tid)); \ recordmgr->recoveryMgr->unblockCrashRecoverySignal(); \ } else #endif // warning: this crash recovery code will only work if you've created a SINGLE instance of bst during an execution. // there are ways to make it work for multiple instances; i just haven't done that. template void crashhandler(int signum, siginfo_t *info, void *uctx) { MasterRecordMgr * const recordmgr = (MasterRecordMgr * const) ___singleton; #ifdef SIGHANDLER_IDENTIFY_USING_PTHREAD_GETSPECIFIC int tid = (int) ((long) pthread_getspecific(pthreadkey)); #endif TRACE COUTATOMICTID("received signal "<isQuiescent(tid)) { #ifdef PERFORM_RESTART_IN_SIGHANDLER recordmgr->enterQuiescentState(tid); #ifdef USE_DEBUGCOUNTERS DEBUG countLongjmp.inc(tid); #endif __sync_synchronize(); #ifdef CRASH_RECOVERY_USING_SETJMP siglongjmp(setjmpbuffers[tid], 1); #endif #endif } // otherwise, i simply continue my operation as if nothing happened. // this lets me behave nicely when it would be dangerous for me to be // restarted (being in a Q state is analogous to having interrupts // disabled in an operating system kernel; however, whereas disabling // interrupts blocks other processes' progress, being in a Q state // implies that you cannot block the progress of any other thread.) } template class RecoveryMgr { public: const int NUM_PROCESSES; const int neutralizeSignal; inline int getTidInefficient(const pthread_t me) { int tid = -1; for (int i=0;i; // specify signal handler ___act.sa_flags = SA_RESTART | SA_SIGINFO; // restart any interrupted sys calls instead of silently failing sigfillset(&___act.sa_mask); // block signals during handler if (sigaction(_neutralizeSignal, &___act, NULL)) { COUTATOMIC("ERROR: could not register signal handler for signal "<<_neutralizeSignal<dtime == TIMESTAMP_NOT_SET) ; true; }) #else #define WAIT_FOR_DTIME(node) ({ false; }) #endif #include #include #include "rq_debugging.h" #include "dcss_plus_impl.h" template inline bool contains(T ** nullTerminatedArray, T * element) { for (int i=0;nullTerminatedArray[i];++i) { if (nullTerminatedArray[i] == element) return true; } return false; } template inline bool contains(T * array, const int numElements, T element) { for (int i=0;i class RQProvider { private: struct __rq_thread_data { #define __RQ_THREAD_DATA_SIZE 1024 #define MAX_NODES_DELETED_ATOMICALLY 8 #define CODE_COVERAGE_MAX_PATHS 11 union { struct { // anonymous struct inside anonymous union means we don't need to type anything special to access these variables long long rq_lin_time; HashList * hashlist; #ifdef COUNT_CODE_PATH_EXECUTIONS long long codePathExecutions[CODE_COVERAGE_MAX_PATHS]; #endif volatile char padding0[PREFETCH_SIZE_BYTES]; void * announcements[MAX_NODES_DELETED_ATOMICALLY]; int numAnnouncements; }; char bytes[__RQ_THREAD_DATA_SIZE]; // avoid false sharing (note: anon struct above contains around 96 bytes) }; } __attribute__((aligned(__RQ_THREAD_DATA_SIZE))); #ifdef COUNT_CODE_PATH_EXECUTIONS #define COUNT_CODE_PATH(path) { assert((path) < CODE_COVERAGE_MAX_PATHS); (++threadData[tid].codePathExecutions[(path)]); } long long codePathExecutions[CODE_COVERAGE_MAX_PATHS]; #else #define COUNT_CODE_PATH(path) #endif #define TIMESTAMP_NOT_SET 0 #define HASHLIST_INIT_CAPACITY_POW2 (1<<8) const int NUM_PROCESSES; volatile char padding0[PREFETCH_SIZE_BYTES]; volatile long long timestamp = 1; volatile char padding1[PREFETCH_SIZE_BYTES]; __rq_thread_data * threadData; #define NODE_DELETED_BEFORE_RQ 0 #define NODE_DELETED_AFTER_RQ 1 #define NODE_NOT_DELETED_BY_THREAD -1 dcsspProvider * prov; DataStructure * ds; RecordManager * const recmgr; int init[MAX_TID_POW2] = {0,}; public: RQProvider(const int numProcesses, DataStructure * ds, RecordManager * recmgr) : NUM_PROCESSES(numProcesses), ds(ds), recmgr(recmgr) { prov = new dcsspProvider(numProcesses); threadData = new __rq_thread_data[numProcesses]; DEBUG_INIT_RQPROVIDER(numProcesses); #ifdef COUNT_CODE_PATH_EXECUTIONS for (int i=0;ideinitThread(tid); // threadData[tid].hashlist->destroy(); // delete threadData[tid].hashlist; // } prov->debugPrint(); delete prov; delete[] threadData; DEBUG_DEINIT_RQPROVIDER(NUM_PROCESSES); } // invoke before a given thread can invoke any functions on this object void initThread(const int tid) { if (init[tid]) return; else init[tid] = !init[tid]; prov->initThread(tid); threadData[tid].hashlist = new HashList(); threadData[tid].hashlist->init(HASHLIST_INIT_CAPACITY_POW2); threadData[tid].numAnnouncements = 0; for (int i=0;ideinitThread(tid); threadData[tid].hashlist->destroy(); delete threadData[tid].hashlist; #ifdef COUNT_CODE_PATH_EXECUTIONS for (int i=0;iitime = TIMESTAMP_NOT_SET; node->dtime = TIMESTAMP_NOT_SET; } // for each address addr that is modified by rq_linearize_update_at_write // or rq_linearize_update_at_cas, you must replace any initialization of addr // with invocations of rq_write_addr // // NOTE: this CANNOT be used on fields that might be concurrently being modified // by an invocation of rq_linearize_update_at_write or // rq_linearize_update_at_cas template inline void write_addr(const int tid, T volatile * const addr, const T val) { if (is_pointer::value) { prov->writePtr((casword_t *) addr, (casword_t) val); } else { prov->writeVal((casword_t *) addr, (casword_t) val); } } // for each address addr that is modified by rq_linearize_update_at_write // or rq_linearize_update_at_cas, you must replace any reads of addr with // invocations of rq_read_addr template inline T read_addr(const int tid, T volatile * const addr) { return (T) ((is_pointer::value) ? prov->readPtr(tid, (casword_t *) addr) : prov->readVal(tid, (casword_t *) addr)); } // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run some time BEFORE the physical deletion of a node // whose key has ALREADY been logically deleted. void announce_physical_deletion(const int tid, NodeType * const * const deletedNodes) { int i; for (i=0;deletedNodes[i];++i) { threadData[tid].announcements[threadData[tid].numAnnouncements+i] = deletedNodes[i]; } SOFTWARE_BARRIER; threadData[tid].numAnnouncements += i; assert(threadData[tid].numAnnouncements <= MAX_NODES_DELETED_ATOMICALLY); SOFTWARE_BARRIER; } // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run AFTER performing announce_physical_deletion, // if the cas that was trying to physically delete node failed. void physical_deletion_failed(const int tid, NodeType * const * const deletedNodes) { for (int i=0;deletedNodes[i];++i) { --threadData[tid].numAnnouncements; } assert(threadData[tid].numAnnouncements >= 0); } // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run AFTER performing announce_physical_deletion, // if the cas that was trying to physically delete node succeeded. void physical_deletion_succeeded(const int tid, NodeType * const * const deletedNodes) { int i; for (i=0;deletedNodes[i];++i) { recmgr->retire(tid, deletedNodes[i]); } SOFTWARE_BARRIER; // ensure nodes are placed in the epoch bag BEFORE they are removed from announcements. threadData[tid].numAnnouncements -= i; assert(threadData[tid].numAnnouncements >= 0); } private: inline void set_insertion_timestamps( const int tid, const long long ts, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { // set insertion timestamps // for each i_node in insertedNodes for (int i_nodeix=0;insertedNodes[i_nodeix];++i_nodeix) { insertedNodes[i_nodeix]->itime = ts; } } inline void set_deletion_timestamps( const int tid, const long long ts, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { // set deletion timestamps // for each d_node in deletedNodes for (int d_nodeix=0;deletedNodes[d_nodeix];++d_nodeix) { deletedNodes[d_nodeix]->dtime = ts; } } public: // replace the linearization point of an update that inserts or deletes nodes // with an invocation of this function if the linearization point is a WRITE template inline T linearize_update_at_write( const int tid, T volatile * const lin_addr, const T& lin_newval, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion announce_physical_deletion(tid, deletedNodes); } casword_t old1; while (true) { old1 = (casword_t) timestamp; casword_t old2 = (is_pointer::value) ? (casword_t) prov->readPtr(tid, (casword_t *) lin_addr) : (casword_t) prov->readVal(tid, (casword_t *) lin_addr); casword_t new2 = (casword_t) lin_newval; dcsspresult_t result = (is_pointer::value) ? prov->dcsspPtr(tid, (casword_t *) ×tamp, old1, (casword_t *) lin_addr, old2, new2, (void **) insertedNodes, (void **) deletedNodes) : prov->dcsspVal(tid, (casword_t *) ×tamp, old1, (casword_t *) lin_addr, old2, new2, (void **) insertedNodes, (void **) deletedNodes); if (result.status == DCSSP_SUCCESS) { break; } } //DELAY_UP_TO(10000); set_insertion_timestamps(tid, old1 /* timestamp */, insertedNodes, deletedNodes); set_deletion_timestamps(tid, old1 /* timestamp */, insertedNodes, deletedNodes); // discard the payloads (insertedNodes and deletedNodes) in this thread's descriptor // so other threads can't access them far in the future if we become QUIESCENT and sleep for a long time // (must be performed after setting itimes and dtimes, but before enterQuiescentState) prov->discardPayloads(tid); if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion physical_deletion_succeeded(tid, deletedNodes); } #if defined USE_RQ_DEBUGGING DEBUG_RECORD_UPDATE_CHECKSUM(tid, old1 /* timestamp */, insertedNodes, deletedNodes, ds); #endif return lin_newval; } // replace the linearization point of an update that inserts or deletes nodes // with an invocation of this function if the linearization point is a CAS template inline T linearize_update_at_cas( const int tid, T volatile * const lin_addr, const T& lin_oldval, const T& lin_newval, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion announce_physical_deletion(tid, deletedNodes); } casword_t old2 = (casword_t) lin_oldval; casword_t new2 = (casword_t) lin_newval; dcsspresult_t result; while (true) { casword_t old1 = (casword_t) timestamp; result = (is_pointer::value) ? prov->dcsspPtr(tid, (casword_t *) ×tamp, old1 /* timestamp */, (casword_t *) lin_addr, old2, new2, (void **) insertedNodes, (void **) deletedNodes) : prov->dcsspVal(tid, (casword_t *) ×tamp, old1 /* timestamp */, (casword_t *) lin_addr, old2, new2, (void **) insertedNodes, (void **) deletedNodes); if (result.status == DCSSP_SUCCESS) { //DELAY_UP_TO(1000); set_insertion_timestamps(tid, old1 /* timestamp */, insertedNodes, deletedNodes); set_deletion_timestamps(tid, old1 /* timestamp */, insertedNodes, deletedNodes); // discard the payloads (insertedNodes and deletedNodes) in this thread's descriptor // so other threads can't access them far in the future if we become QUIESCENT and sleep for a long time // (must be performed after setting itimes and dtimes, but before enterQuiescentState) prov->discardPayloads(tid); if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion physical_deletion_succeeded(tid, deletedNodes); } #if defined USE_RQ_DEBUGGING DEBUG_RECORD_UPDATE_CHECKSUM(tid, old1 /* timestamp */, insertedNodes, deletedNodes, ds); #endif return lin_oldval; } else if (result.status == DCSSP_FAILED_ADDR2) { // failed due to original CAS's failure (NOT due to the timestamp changing) if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion physical_deletion_failed(tid, deletedNodes); } break; } } assert(result.status == DCSSP_FAILED_ADDR2); assert(old2 != result.failed_val); return (T) result.failed_val; } // invoke at the start of each traversal inline void traversal_start(const int tid) { threadData[tid].hashlist->clear(); threadData[tid].rq_lin_time = __sync_add_and_fetch(×tamp, 1); // linearize rq here! } private: // invoke each time a traversal visits a node with a key in the desired range: // if the node belongs in the range query, it will be placed in rqResult[index] inline int __traversal_try_add(const int tid, NodeType * const node, K * const outputKeys, V * const outputValues, const K& lo, const K& hi, bool foundDuringTraversal) { // rqResultKeys should have space for MAX_KEYS_PER_NODE keys, AT LEAST // in the following, rather than having deeply nested if-else blocks, // we return asap, and list facts that must be true if we didn't return assert(foundDuringTraversal || !logicalDeletion || ds->isLogicallyDeleted(tid, node)); // TODO: ensure this makes sense when called with announced nodes long long itime = node->itime; if (itime != TIMESTAMP_NOT_SET & node->itime >= threadData[tid].rq_lin_time) return 0; // node was inserted after the range query // fact: either itime was not set above, or node was inserted before rq ///////////////////////// HANDLE UNKNOWN ITIME ///////////////////////// // TODO: try adding a bit of spinning before falling back to the full lock-free solution // determine if any other process inserted, or is trying to insert node, and, if so, when for (int otherTid=0; (itime = node->itime) == TIMESTAMP_NOT_SET && otherTidgetDescriptorTagptr(otherTid); // try to get a snapshot of otherTid's dcssp descriptor dcsspdesc_t snap; if (!prov->getDescriptorSnapshot(tagptr, &snap)) { // we failed to obtain a snapshot, which means that while getDescriptorSnapshot() was running, the process finished one dcssp, and started a new dcssp. continue; // goto check next process // if the finished dcssp inserted node, then before the next dcssp by the same process, node->itime is set. so, we check whether itime is set. } // fact: we obtained a snapshot if (!SNAPSHOT_CONTAINS_INSERTED_NODE(snap, node)) continue; // goto check next process // fact: otherTid is trying/tried to insert node int state = MUTABLES_UNPACK_FIELD(snap.mutables, DCSSP_MUTABLES_MASK_STATE, DCSSP_MUTABLES_OFFSET_STATE); if (state == DCSSP_STATE_FAILED) { // the operation described by snap did not insert node, so either this process did not insert it, or the process inserted/inserted it in a PREVIOUS operation, so it must have already set itime as appropriate continue; // goto check next process } else if (state == DCSSP_STATE_SUCCEEDED) { // the dcssp operation finished, and inserted node. to determine WHEN it inserted node, we look at the argument old1 to the dcssp, which contains the timestamp when the dcssp took place. (observe that this is the value process otherTid would write to node->itime) if (snap.old1 >= threadData[tid].rq_lin_time) return 0; // node was inserted after rq break; // process inserted node at time snap.old1, BEFORE the RQ } // fact: state is UNDECIDED // now we try to help casword_t addr2 = *snap.addr2; if (addr2 == tagptr) { // addr2 indeed points to the dcssp descriptor. the linearization point of the dcssp operation occurs after this step, so the dcssp might have been linearized, but not yet had its state set. prov->helpProcess(tid, otherTid); // we need to know what its final state will be to determine whether it successfully inserted node. so, we HELP otherTid finish its dcssp. } // note: the following all happens in BOTH the cases where addr2 == tagptr and where addr2 != tagptr, except there is some extra work if addr2 != tagptr and state2 != SUCCEEDED. i've folded the two cases together simply for compactness / less repetition. // then, we reread the state bool valid = false; dcsspdesc_t * ptr = prov->getDescriptorPtr(tagptr); int state2 = DESC_READ_FIELD(valid, ptr->mutables, tagptr, DCSSP_MUTABLES_MASK_STATE, DCSSP_MUTABLES_OFFSET_STATE); if (!valid) continue; // goto check next process // the read of the state field was invalid, which means that the dcssp operation has terminated, the next dcssp operation by otherTid has begun. since the next dcssp operation has begun, and each high-level data structure operation performs only one successful dcssp (in a call to linearize_update_at_...), if this dcssp that finished in fact inserted node, then the next dcssp would be part of the next high-level operation. thus, if node->itime was inserted by the finished dcssp, then the high-level operation that performed this dcssp will already have set node->itime. // fact: the read of state was valid if (state2 == DCSSP_STATE_SUCCEEDED) { // we are in case (b) described above. the dcssp operation finished, and insert node. to determine WHEN it inserted node, we look at the argument old1 to the dcssp, which contains the timestamp when the dcssp took place. (observe that this is the value process otherTid would write to node->itime) if (snap.old1 >= threadData[tid].rq_lin_time) return 0; // node was inserted after rq break; // process inserted node at time snap.old1, BEFORE the RQ } else { // undecided or failed continue; // goto check next process // we are in case (a) or case (c) described above. so, otherTid did NOT delete node. } } if (itime != TIMESTAMP_NOT_SET && itime >= threadData[tid].rq_lin_time) return 0; // node was inserted after rq /////////////// HANDLE LOGICAL DELETION AND CHECK DTIME //////////////// long long dtime = TIMESTAMP_NOT_SET; if (!logicalDeletion && foundDuringTraversal) goto tryAddToRQ; // no logical deletion. since node was inserted before the range query, and the traversal encountered it, it must have been deleted AFTER the traversal encountered it. // fact: no logical deletion ==> did not find node during traversal dtime = node->dtime; if (dtime != TIMESTAMP_NOT_SET && dtime < threadData[tid].rq_lin_time) return 0; // fact: either dtime was not set above, or node was deleted after rq if (logicalDeletion && !ds->isLogicallyDeleted(tid, node)) goto tryAddToRQ; // if logical deletion is used with marking, the fact that node was inserted before the range query, and that the traversal encountered node, is NOT enough to argue that node was in the data structure when the traversal started. why? when the traversal encountered node, it might have already been marked. so, we check if node is marked. if not, then the node has not yet been deleted. // fact: logical deletion ==> node has been logically deleted ///////////////////////// HANDLE UNKNOWN DTIME ///////////////////////// // determine if any other process is trying/tried to delete node for (int otherTid=0; (dtime = node->dtime) == TIMESTAMP_NOT_SET && otherTidgetDescriptorTagptr(otherTid); // try to get a snapshot of otherTid's dcssp descriptor dcsspdesc_t snap; if (!prov->getDescriptorSnapshot(tagptr, &snap)) { // we failed to obtain a snapshot, which means that while getDescriptorSnapshot() was running, the process finished one dcssp, and started a new dcssp. continue; // goto check next process // if the finished dcssp deleted node, then before the next dcssp by the same process, node->dtime is set (and it will be seen after the loop). } // fact: we obtained a snapshot if (!SNAPSHOT_CONTAINS_DELETED_NODE(snap, node)) continue; // goto check next process // fact: otherTid is trying/tried to delete node // we must determine whether otherTid's dcssp operation (whose descriptor we obtained a snapshot of) has been linearized, and whether it was successful. // we use the following facts. // (1) the dcssp descriptor has a state that is initially UNDECIDED, and becomes SUCCEEDED or FAILED after the dcssp has been linearized. // (2) a dcssp that succeeds or fails changes *snap.addr2 to tagptr, then reads *snap.addr1 and linearizes, then sets its state to SUCCEEDED or FAILED, then changes *snap.addr2 from tagptr to another value. // (3) once *snap.addr2 has been changed from tagptr to another value, it can never again contain tagptr. // (4) each high-level data structure operation invokes dcssp only via linearize_update_at_write or linearize_update_at_cas, and only performs one invocation of linearize_update_at_write or one /successful/ invocation of linearize_update_at_cas (and possibly many unsuccessful invocations of linearize_update_at_cas). // (5) if a dcssp operation deletes node, then before the next dcssp by the same process, node->dtime is set. // so, we check the state of the dcssp operation. if it is SUCCEEDED or FAILED, we have our answer. but, if the state is UNDECIDED, the dcssp may or may not have been linearized. // in the latter case, to determine whether it has been linearized, we would like to HELP the dcssp operation to complete. // note, however, that the help procedure for the dcssp algorithm can be invoked only if otherTid has already changed *snap.addr2 to tagptr. // thus, we must determine whether otherTid has changed *snap.addr2 to tagptr, before we can help the dcssp operation. // so, we read *snap.addr2. if we see that it contains tagptr, then we can help the dcssp. // otherwise, one of the following must be true: // (a) otherTid has not yet changed *snap.addr2 to tagptr, or // (b) otherTid changed *snap.addr2 to tagptr, then it (or a helper) changed its state to SUCCEEDED, then changed *snap.addr2 to a different value (never again to contain tagptr), or // (c) otherTid changed *snap.addr2 to tagptr, then it (or a helper) changed its state to FAILED, then changed *snap.addr2 to a different value (never again to contain tagptr). // in case (a), we know that the dcssp has not yet been linearized. // in case (b), the dcssp has been linearized, has state SUCCEEDED, and deleted node. // in case (c), the dcssp has been linearized, has state FAILED, and did NOT delete node. // so, after reading *snap.addr2, we read the dcssp operation's state again to determine which case has occurred. int state = MUTABLES_UNPACK_FIELD(snap.mutables, DCSSP_MUTABLES_MASK_STATE, DCSSP_MUTABLES_OFFSET_STATE); if (state == DCSSP_STATE_FAILED) { // the operation described by snap did not insert/delete node, so either this process did not insert/delete it, or the process inserted/deleted it in a PREVIOUS operation, so it must have already set itime/dtime as appropriate continue; // goto check next process } else if (state == DCSSP_STATE_SUCCEEDED) { // the dcssp operation finished, and deleted node. to determine WHEN it deleted node, we look at the argument old1 to the dcssp, which contains the timestamp when the dcssp took place. (observe that this is the value process otherTid would write to node->dtime) if (WAIT_FOR_DTIME(node)) { // the following assertions are thread safe ONLY if WAIT_FOR_DTIME actually waits! (which is true only if it returns true, which is true only if RQ_LOCKFREE_WAITS_FOR_DTIME is defined) assert(snap.old1 <= node->dtime); assert((snap.old1 >= threadData[tid].rq_lin_time) == (node->dtime >= threadData[tid].rq_lin_time)); assert(foundDuringTraversal || node->dtime == snap.old1); assert(!foundDuringTraversal || node->dtime == snap.old1); } if (snap.old1 < threadData[tid].rq_lin_time) return 0; // node was deleted before rq goto tryAddToRQ; // node was deleted by this process after rq } // fact: state is UNDECIDED // maya: in logicalDeletion, since the node is marked the DCSS was successful and only the dtime is not yet set. Thus, UNDECIDED means that otherThread did not mark the node, it was some other thread and we can continue. if (logicalDeletion) continue; // goto check next process // now we try to help casword_t addr2 = *snap.addr2; if (addr2 == tagptr) { // TODO: prove it is impossible to execute this block with logical deletion (idea: since node is marked, either (1) otherTid marked it earlier with a DCSS whose state is SUCCEEDED, or (2) someone else marked node, so otherTid cannot successfully CAS addr2.) // addr2 indeed points to the dcssp descriptor. the linearization point of the dcssp operation occurs after this step, so the dcssp might have been linearized, but not yet had its state set. prov->helpProcess(tid, otherTid); // we need to know what its final state will be to determine whether it successfully deleted node. so, we HELP otherTid finish its dcssp. } // note: the following all happens in BOTH the cases where addr2 == tagptr and where addr2 != tagptr, except there is some extra work if addr2 != tagptr and state2 != SUCCEEDED. i've folded the two cases together simply for compactness / less repetition. // then, we reread the state bool valid = false; dcsspdesc_t * ptr = prov->getDescriptorPtr(tagptr); int state2 = DESC_READ_FIELD(valid, ptr->mutables, tagptr, DCSSP_MUTABLES_MASK_STATE, DCSSP_MUTABLES_OFFSET_STATE); if (!valid) continue; // goto check next process // the read of the state field was invalid, which means that the dcssp operation has terminated, the next dcssp operation by otherTid has begun. since the next dcssp operation has begun, and each high-level data structure operation performs only one successful dcssp (in a call to linearize_update_at_...), if this dcssp that finished in fact deleted node, then the next dcssp would be part of the next high-level operation. thus, if node->itime was inserted by the finished dcssp, then the high-level operation that performed this dcssp will already have set node->itime. // fact: the read of state was valid if (state2 == DCSSP_STATE_SUCCEEDED) { // we are in case (b) described above. the dcssp operation finished, and deleted node. to determine WHEN it deleted node, we look at the argument old1 to the dcssp, which contains the timestamp when the dcssp took place. (observe that this is the value process otherTid would write to node->dtime) if (snap.old1 >= threadData[tid].rq_lin_time) goto tryAddToRQ; return 0; // do not add to rq } else { // undecided or failed continue; // goto check next process // we are in case (a) or case (c) described above. so, otherTid did NOT delete node. } } if(dtime == TIMESTAMP_NOT_SET) { assert(!logicalDeletion); assert(!foundDuringTraversal); goto tryAddToRQ; // no process deleted node before the range query } COUNT_CODE_PATH(9); if (dtime >= threadData[tid].rq_lin_time) goto tryAddToRQ; return 0; // do not add to rq ///////////////////// TRY TO ADD NODE'S KEYS TO RQ ///////////////////// // note: this way of organizing this decision tree favors trees with fat multi-key nodes, because getKeys is delayed as long as possible. tryAddToRQ: // fetch the node's keys that are in the set int cnt = ds->getKeys(tid, node, outputKeys, outputValues); assert(cnt < RQ_DEBUGGING_MAX_KEYS_PER_NODE); if (cnt == 0) return 0; // node doesn't contain any keys that are in the set and in the desired range // note: in the following loop, we shift keys in the outputKeys array left to eliminate any that ultimately should not be added to the range query int numNewKeys = 0; for (int i=0;iisInRange(outputKeys[i], lo, hi)) goto doNotAddToRQ; // key is NOT in the desired range if (threadData[tid].hashlist->contains(outputKeys[i])) goto doNotAddToRQ; // key is already in the range query outputKeys[numNewKeys] = outputKeys[i]; // save this as a new key added to the RQ outputValues[numNewKeys] = outputValues[i]; ++numNewKeys; doNotAddToRQ: (0); } return numNewKeys; } inline void traversal_try_add(const int tid, NodeType * const node, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi, bool foundDuringTraversal) { //#if defined MICROBENCH && !defined NDEBUG // assert(*startIndex < RQSIZE); // note: this assert is a hack. it should be *startIndex < size of rqResultKeys // if (*startIndex > RQSIZE) { // cout<<"ERROR: *startIndex="<<(*startIndex)<<" is unexpectedly greater than or equal to RQSIZE="<insert(rqResultKeys[(*startIndex)++]); } // note: the above increments startIndex #if defined MICROBENCH assert(*startIndex <= RQSIZE); #endif } public: inline void traversal_try_add(const int tid, NodeType * const node, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi) { traversal_try_add(tid, node, rqResultKeys, rqResultValues, startIndex, lo, hi, true); } // invoke at the end of each traversal: // any nodes that were deleted during the traversal, // and were consequently missed during the traversal, // are placed in rqResult[index] void traversal_end(const int tid, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi) { // todo: possibly optimize by skipping entire blocks if there are many keys to skip (does not seem to be justifiable for 4 work threads and 4 range query threads) SOFTWARE_BARRIER; long long end_timestamp = timestamp; SOFTWARE_BARRIER; #if 0 vector nodes; // collect nodes announced by other processes for (int otherTid=0;otherTid * all_bags[NUM_PROCESSES*NUMBER_OF_EPOCH_BAGS+1]; vector> all_iterators; int numIterators = 0; for (int otherTid=0;otherTid * thread_bags[NUMBER_OF_EPOCH_BAGS+1]; recmgr->get((NodeType *) NULL)->reclaim->getSafeBlockbags(otherTid, thread_bags); for (int i=0;thread_bags[i];++i) { all_bags[numIterators] = thread_bags[i]; all_iterators.push_back(thread_bags[i]->begin()); ++numIterators; } } // collect nodes in epoch bags int numVisitedInEpochBags = 0; for (int ix = 0; ix < numIterators; ++ix) { for (; all_iterators[ix] != all_bags[ix]->end(); all_iterators[ix]++) { NodeType * node = (*all_iterators[ix]); nodes.push_back(node); ++numVisitedInEpochBags; long long dtime = node->dtime; if (dtime != TIMESTAMP_NOT_SET && dtime > end_timestamp) continue; if (!(logicalDeletion && canRetireNodesLogicallyDeletedByOtherProcesses)) { // if we cannot retire nodes that are logically deleted // by other processes, then we always retire nodes in // order of increasing dtime values. // so, the blockbag will be ordered, which means that, // if dtime is before the RQ, then all remaining nodes // in this bag were deleted before the RQ. // so, in this case, we skip to the next bag. if (dtime != TIMESTAMP_NOT_SET && dtime < threadData[tid].rq_lin_time) break; } } } // visit collected nodes for (auto it = nodes.begin(); it != nodes.end(); it++) { NodeType * node = *it; traversal_try_add(tid, node, rqResultKeys, startIndex, lo, hi, false); } #else // collect nodes announced by other processes for (int otherTid=0;otherTid * all_bags[NUM_PROCESSES*NUMBER_OF_EPOCH_BAGS+1]; vector> all_iterators; int numIterators = 0; for (int otherTid=0;otherTid * thread_bags[NUMBER_OF_EPOCH_BAGS+1]; recmgr->get((NodeType *) NULL)->reclaim->getSafeBlockbags(otherTid, thread_bags); for (int i=0;thread_bags[i];++i) { all_bags[numIterators] = thread_bags[i]; all_iterators.push_back(thread_bags[i]->begin()); ++numIterators; } } int numSkippedInEpochBags = 0; int numVisitedInEpochBags = 0; for (int ix = 0; ix < numIterators; ++ix) { for (; all_iterators[ix] != all_bags[ix]->end(); all_iterators[ix]++) { NodeType * node = (*all_iterators[ix]); assert(node); ++numVisitedInEpochBags; ++numSkippedInEpochBags; long long dtime = node->dtime; if (dtime != TIMESTAMP_NOT_SET && dtime > end_timestamp) continue; --numSkippedInEpochBags; if (!(logicalDeletion && canRetireNodesLogicallyDeletedByOtherProcesses)) { // if we cannot retire nodes that are logically deleted // by other processes, then we always retire nodes in // order of increasing dtime values. // so, the blockbag will be ordered, which means that, // if dtime is before the RQ, then all remaining nodes // in this bag were deleted before the RQ. // so, in this case, we skip to the next bag. if (dtime != TIMESTAMP_NOT_SET && dtime < threadData[tid].rq_lin_time) break; } traversal_try_add(tid, node, rqResultKeys, rqResultValues, startIndex, lo, hi, false); } } #endif #if defined MICROBENCH && !defined NDEBUG if (*startIndex > RQSIZE) { cout<<"ERROR: *startIndex="<<(*startIndex)<<" is unexpectedly greater than or equal to RQSIZE="< #include #include using namespace std; #define MAX_NUM_RQ_IN_EXECUTION (1<<20) #ifdef RQ_VISITED_IN_BAGS_HISTOGRAM #include string twoDigits(int x) { stringstream ss; if (x >= 0 && x < 10) { ss<<"0"; } ss< void printLogarithmicHistogram(T * valuesOverTime, int numValues) { constexpr int numBits = sizeof(T)*8; int histogram[numBits+1]; memset(histogram, 0, sizeof(histogram)); T sum = 0; int cntNonZero = 0; for (int i=0;i 1) { v >>= 1; ++pow2; } assert(pow2 <= numBits); ++histogram[pow2]; } for (int i=0;i<=numBits;++i) { if (histogram[i] > 0) { cout<<" (2^"< #define CSV_OUTPUT_FILE "data.csv" std::ofstream ofs; #define MAX_RQ_SIZE (1<<16) __thread int numRQs[MAX_RQ_SIZE+1]; int totalNumRQs[MAX_RQ_SIZE+1]; #endif inline void DEBUG_RECORD_RQ_VISITED(const int tid, const long long ts, const int numVisited) { #ifdef RQ_VISITED_IN_BAGS_HISTOGRAM if (ts >= MAX_NUM_RQ_IN_EXECUTION) return; threadNumNodesVisitedInBags[tid][ts] = numVisited; #endif } inline void DEBUG_RECORD_RQ_SIZE(const int size) { #ifdef RQ_HISTOGRAM ++numRQs[size]; #endif } template inline void DEBUG_RECORD_UPDATE_CHECKSUM(const int tid, const long long timestamp, Node * const * const insertedNodes, Node * const * const deletedNodes, DataStructure * const ds) { #ifdef RQ_VALIDATION if (timestamp >= MAX_NUM_RQ_IN_EXECUTION) { return; // cout << "timestamp is: " << timestamp << endl; // error("timestamp > MAX_NUM_RQ_IN_EXECUTION"); } for (int i=0;insertedNodes[i];++i) { K outputKeys[RQ_DEBUGGING_MAX_KEYS_PER_NODE]; V outputValues[RQ_DEBUGGING_MAX_KEYS_PER_NODE]; int cnt = ds->getKeys(tid, insertedNodes[i], outputKeys, outputValues); assert(cnt <= RQ_DEBUGGING_MAX_KEYS_PER_NODE); for (int j=0;jgetKeys(tid, deletedNodes[i], outputKeys, outputValues); assert(cnt <= RQ_DEBUGGING_MAX_KEYS_PER_NODE); for (int j=0;j inline void DEBUG_RECORD_RQ_CHECKSUM(const int tid, const long long timestamp, K const * const rqResult, const int len) { #ifdef RQ_VALIDATION if (timestamp >= MAX_NUM_RQ_IN_EXECUTION) return; //if (timestamp >= MAX_NUM_RQ_IN_EXECUTION) error("timestamp > MAX_NUM_RQ_IN_EXECUTION"); // compute checksum long long checksum = 0; for (int i=0;i 0) { cout<<"RQ VALIDATION TOTAL FAILURES: "< #include #include #include #include #define MAX_HTM_ATTEMPTS 30 #define dosum(src) ({ \ long long __sum = 0; \ for (int __i=0;__i class RQProvider { private: struct __rq_thread_data { #define __RQ_THREAD_DATA_SIZE 1024 union { struct { // anonymous struct inside anonymous union means we don't need to type anything special to access these variables long long rq_lin_time; HashList * hashlist; volatile char padding0[PREFETCH_SIZE_BYTES]; void * announcements[MAX_NODES_DELETED_ATOMICALLY+1]; int numAnnouncements; volatile char padding1[PREFETCH_SIZE_BYTES]; // prevent false sharing between htm debugging stats below and announcements // htm debugging stuff int commitWriter; int abortWriter; int commitReader; int abortReader; int fallback; }; char bytes[__RQ_THREAD_DATA_SIZE]; // avoid false sharing }; } __attribute__((aligned(__RQ_THREAD_DATA_SIZE))); #define TIMESTAMP_NOT_SET 0 #define HASHLIST_INIT_CAPACITY_POW2 (1<<8) const int NUM_PROCESSES; volatile char padding0[PREFETCH_SIZE_BYTES]; volatile long long timestamp = 1; volatile char padding1[PREFETCH_SIZE_BYTES]; RWLock rwlock; volatile char padding2[PREFETCH_SIZE_BYTES]; __rq_thread_data * threadData; DataStructure * ds; RecordManager * const recmgr; int init[MAX_TID_POW2] = {0,}; public: RQProvider(const int numProcesses, DataStructure * ds, RecordManager * recmgr) : NUM_PROCESSES(numProcesses), ds(ds), recmgr(recmgr) { threadData = new __rq_thread_data[numProcesses]; DEBUG_INIT_RQPROVIDER(numProcesses); } ~RQProvider() { cout<<"writer commits : "<destroy(); // delete threadData[tid].hashlist; // } delete[] threadData; DEBUG_DEINIT_RQPROVIDER(NUM_PROCESSES); } // invoke before a given thread can perform any rq_functions void initThread(const int tid) { if (init[tid]) return; else init[tid] = !init[tid]; threadData[tid].hashlist = new HashList(); threadData[tid].hashlist->init(HASHLIST_INIT_CAPACITY_POW2); threadData[tid].numAnnouncements = 0; for (int i=0;idestroy(); delete threadData[tid].hashlist; DEBUG_DEINIT_THREAD(tid); } // invoke whenever a new node is created/initialized inline void init_node(const int tid, NodeType * const node) { node->itime = TIMESTAMP_NOT_SET; node->dtime = TIMESTAMP_NOT_SET; } // for each address addr that is modified by rq_linearize_update_at_write // or rq_linearize_update_at_cas, you must replace any initialization of addr // with invocations of rq_write_addr template inline void write_addr(const int tid, T volatile * const addr, const T val) { *addr = val; } // for each address addr that is modified by rq_linearize_update_at_write // or rq_linearize_update_at_cas, you must replace any reads of addr with // invocations of rq_read_addr template inline T read_addr(const int tid, T volatile * const addr) { return *addr; } // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run some time BEFORE the physical deletion of a node // whose key has ALREADY been logically deleted. void announce_physical_deletion(const int tid, NodeType * const * const deletedNodes) { int i; for (i=0;deletedNodes[i];++i) { threadData[tid].announcements[threadData[tid].numAnnouncements+i] = deletedNodes[i]; } SOFTWARE_BARRIER; threadData[tid].numAnnouncements += i; assert(threadData[tid].numAnnouncements <= MAX_NODES_DELETED_ATOMICALLY); SOFTWARE_BARRIER; } // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run AFTER performing announce_physical_deletion, // if the cas that was trying to physically delete node failed. void physical_deletion_failed(const int tid, NodeType * const * const deletedNodes) { for (int i=0;deletedNodes[i];++i) { --threadData[tid].numAnnouncements; } assert(threadData[tid].numAnnouncements >= 0); } // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run AFTER performing announce_physical_deletion, // if the cas that was trying to physically delete node succeeded. void physical_deletion_succeeded(const int tid, NodeType * const * const deletedNodes) { int i; for (i=0;deletedNodes[i];++i) { recmgr->retire(tid, deletedNodes[i]); } SOFTWARE_BARRIER; // ensure nodes are placed in the epoch bag BEFORE they are removed from announcements. threadData[tid].numAnnouncements -= i; assert(threadData[tid].numAnnouncements >= 0); } private: inline void set_insertion_timestamps( const int tid, const long long ts, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { // set insertion timestamps // for each i_node in insertedNodes for (int i_nodeix=0;insertedNodes[i_nodeix];++i_nodeix) { insertedNodes[i_nodeix]->itime = ts; } } inline void set_deletion_timestamps( const int tid, const long long ts, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { // set deletion timestamps // for each d_node in deletedNodes for (int d_nodeix=0;deletedNodes[d_nodeix];++d_nodeix) { deletedNodes[d_nodeix]->dtime = ts; } } public: // replace the linearization point of an update that inserts or deletes nodes // with an invocation of this function if the linearization point is a WRITE template inline T linearize_update_at_write( const int tid, T volatile * const lin_addr, const T& lin_newval, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion announce_physical_deletion(tid, deletedNodes); } // htm path long long ts; int limit = MAX_HTM_ATTEMPTS; while (limit--) { while (rwlock.isWriteLocked()) {} if (XBEGIN() == _XBEGIN_STARTED) { if (rwlock.isWriteLocked()) XABORT(1); ts = timestamp; *lin_addr = lin_newval; // original linearization point XEND(); ++threadData[tid].commitReader; goto committed; } else ++threadData[tid].abortReader; } // fallback path ++threadData[tid].fallback; rwlock.readLock(); ts = timestamp; *lin_addr = lin_newval; // original linearization point rwlock.readUnlock(); committed: set_insertion_timestamps(tid, ts, insertedNodes, deletedNodes); set_deletion_timestamps(tid, ts, insertedNodes, deletedNodes); if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion physical_deletion_succeeded(tid, deletedNodes); } #if defined USE_RQ_DEBUGGING DEBUG_RECORD_UPDATE_CHECKSUM(tid, ts, insertedNodes, deletedNodes, ds); #endif return lin_newval; } // replace the linearization point of an update that inserts or deletes nodes // with an invocation of this function if the linearization point is a CAS template inline T linearize_update_at_cas( const int tid, T volatile * const lin_addr, const T& lin_oldval, const T& lin_newval, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion announce_physical_deletion(tid, deletedNodes); } // htm path long long ts; T res; int limit = MAX_HTM_ATTEMPTS; while (limit--) { while (rwlock.isWriteLocked()) {} if (XBEGIN() == _XBEGIN_STARTED) { if (rwlock.isWriteLocked()) XABORT(1); ts = timestamp; res = __sync_val_compare_and_swap(lin_addr, lin_oldval, lin_newval); // original linearization point // res = *lin_addr; // manually implement CAS // if (res == lin_oldval) *lin_addr = lin_newval; XEND(); ++threadData[tid].commitReader; goto committed; } else ++threadData[tid].abortReader; } // fallback path ++threadData[tid].fallback; rwlock.readLock(); ts = timestamp; res = __sync_val_compare_and_swap(lin_addr, lin_oldval, lin_newval); rwlock.readUnlock(); committed: if (res == lin_oldval){ set_insertion_timestamps(tid, ts, insertedNodes, deletedNodes); set_deletion_timestamps(tid, ts, insertedNodes, deletedNodes); if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion physical_deletion_succeeded(tid, deletedNodes); } #if defined USE_RQ_DEBUGGING DEBUG_RECORD_UPDATE_CHECKSUM(tid, ts, insertedNodes, deletedNodes, ds); #endif } else { if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion physical_deletion_failed(tid, deletedNodes); } } return res; } // invoke at the start of each traversal inline void traversal_start(const int tid) { threadData[tid].hashlist->clear(); // // htm path // int limit = MAX_HTM_ATTEMPTS; // while (limit--) { // while (rwlock.isLocked()) {} // if (XBEGIN() == _XBEGIN_STARTED) { // if (rwlock.isLocked()) XABORT(1); // threadData[tid].rq_lin_time = ++timestamp; // linearization point of range query (at the write to timestamp) // XEND(); // ++threadData[tid].commitWriter; // goto committed; // } else ++threadData[tid].abortWriter; // } //committed: // fallback path rwlock.writeLock(); threadData[tid].rq_lin_time = ++timestamp; // linearization point of range query (at the write to timestamp) rwlock.writeUnlock(); } private: // invoke each time a traversal visits a node with a key in the desired range: // if the node belongs in the range query, it will be placed in rqResult[index] inline int __traversal_try_add(const int tid, NodeType * const node, NodeType ** const nodeSource, K * const outputKeys, V * const outputValues, const K& lo, const K& hi, bool foundDuringTraversal) { // rqResultKeys should have space for MAX_KEYS_PER_NODE keys, AT LEAST // in the following, rather than having deeply nested if-else blocks, // we return asap, and list facts that must be true if we didn't return assert(foundDuringTraversal || !logicalDeletion || ds->isLogicallyDeleted(tid, node)); long long itime = TIMESTAMP_NOT_SET; while (itime == TIMESTAMP_NOT_SET) { itime = node->itime; } if (node->itime >= threadData[tid].rq_lin_time) return 0; // node was inserted after the range query // fact: node was inserted before the range query bool logicallyDeleted = (logicalDeletion && ds->isLogicallyDeleted(tid, node)); long long dtime = TIMESTAMP_NOT_SET; if (!logicalDeletion && foundDuringTraversal) goto tryAddToRQ; // no logical deletion. since node was inserted before the range query, and the traversal encountered it, it must have been deleted AFTER the traversal encountered it. // fact: no logical deletion ==> did not find node during traversal dtime = node->dtime; if (dtime != TIMESTAMP_NOT_SET) { if (dtime < threadData[tid].rq_lin_time) return 0; // node was deleted before the range query goto tryAddToRQ; } // fact: dtime was not set above if (logicalDeletion && !logicallyDeleted) goto tryAddToRQ; // if logical deletion is used with marking, the fact that node was inserted before the range query, and that the traversal encountered node, is NOT enough to argue that node was in the data structure when the traversal started. why? when the traversal encountered node, it might have already been marked. so, we check if node is marked. if not, then the node has not yet been deleted. // fact: if there is logical deletion, then the node has now been deleted ///////////////////////// HANDLE UNKNOWN DTIME ///////////////////////// // if we are executing this because node was ANNOUNCED by a process, // as something that MIGHT soon be deleted (if nodeSource != NULL), // then node might not ever actually be deleted, // so we can't spin forever on dtime. if (nodeSource != NULL) { while (dtime == TIMESTAMP_NOT_SET && *nodeSource == node) { dtime = node->dtime; } if (dtime == TIMESTAMP_NOT_SET) { // above loop exited because the process removed its announcement to this node! // if the process deleted the node, then it removed the // announcement AFTER setting dtime. // so we reread dtime one more time, to figure out whether // the process actually deleted the node. SOFTWARE_BARRIER; // prevent read of dtime from happening before last read of *nodeSource dtime = node->dtime; if (dtime == TIMESTAMP_NOT_SET) { // since dtime is not set, the process did NOT delete the node. // so, either a DIFFERENT process deleted it, // or it was found during the data structure traversal. // if another process deleted it, then we will find it // either in that process' announcements, or in a limbo bag. return 0; } // the node has been deleted, and dtime is set, so we check dtime below. } } else { while (dtime == TIMESTAMP_NOT_SET) { dtime = node->dtime; } } if (dtime < threadData[tid].rq_lin_time) return 0; // node was deleted before the range query // fact: node was inserted before the rq and deleted after it ///////////////////// TRY TO ADD NODE'S KEYS TO RQ ///////////////////// // note: this way of organizing this decision tree favors trees with fat multi-key nodes, because getKeys is delayed as long as possible. tryAddToRQ: // fetch the node's keys that are in the set int cnt = ds->getKeys(tid, node, outputKeys, outputValues); assert(cnt < RQ_DEBUGGING_MAX_KEYS_PER_NODE); if (cnt == 0) return 0; // node doesn't contain any keys that are in the set // TODO: properly assert that getKeys doesn't run out of bounds on outputKeys[...] (i'm quite certain it doesn't, currently, though.) // note: in the following loop, we shift keys in the outputKeys array left to eliminate any that ultimately should not be added to the range query int numNewKeys = 0; for (int i=0;iisInRange(outputKeys[i], lo, hi)) goto doNotAddToRQ; // key is NOT in the desired range if (threadData[tid].hashlist->contains(outputKeys[i])) goto doNotAddToRQ; // key is already in the range query outputKeys[numNewKeys] = outputKeys[i]; // save this as a new key added to the RQ outputValues[numNewKeys] = outputValues[i]; ++numNewKeys; doNotAddToRQ: (0); } return numNewKeys; } inline void traversal_try_add(const int tid, NodeType * const node, NodeType ** const nodeSource, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi, bool foundDuringTraversal) { //#if defined MICROBENCH && !defined NDEBUG // assert(*startIndex < 2*RQSIZE); // note: this assert is a hack. it should be *startIndex < size of rqResultKeys // if (*startIndex >= RQSIZE) { // cout<<"ERROR: *startIndex="<<(*startIndex)<<" is unexpectedly greater than or equal to RQSIZE="<insert(rqResultKeys[(*startIndex)++]); } // note: the above increments startIndex #if defined MICROBENCH assert(*startIndex <= RQSIZE); #endif } public: inline void traversal_try_add(const int tid, NodeType * const node, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi) { traversal_try_add(tid, node, NULL, rqResultKeys, rqResultValues, startIndex, lo, hi, true); } // invoke at the end of each traversal: // any nodes that were deleted during the traversal, // and were consequently missed during the traversal, // are placed in rqResult[index] void traversal_end(const int tid, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi) { // todo: possibly optimize by skipping entire blocks if there are many keys to skip (does not seem to be justifiable for 4 work threads and 4 range query threads) SOFTWARE_BARRIER; long long end_timestamp = timestamp; SOFTWARE_BARRIER; // collect nodes announced by other processes for (int otherTid=0;otherTid * all_bags[NUM_PROCESSES*NUMBER_OF_EPOCH_BAGS+1]; vector> all_iterators; int numIterators = 0; for (int otherTid=0;otherTid * thread_bags[NUMBER_OF_EPOCH_BAGS+1]; recmgr->get((NodeType *) NULL)->reclaim->getSafeBlockbags(otherTid, thread_bags); for (int i=0;thread_bags[i];++i) { all_bags[numIterators] = thread_bags[i]; all_iterators.push_back(thread_bags[i]->begin()); ++numIterators; } } int numSkippedInEpochBags = 0; int numVisitedInEpochBags = 0; for (int ix = 0; ix < numIterators; ++ix) { for (; all_iterators[ix] != all_bags[ix]->end(); all_iterators[ix]++) { NodeType * node = (*all_iterators[ix]); assert(node); ++numVisitedInEpochBags; ++numSkippedInEpochBags; long long dtime = node->dtime; if (dtime != TIMESTAMP_NOT_SET && dtime > end_timestamp) continue; --numSkippedInEpochBags; if (!(logicalDeletion && canRetireNodesLogicallyDeletedByOtherProcesses)) { // if we cannot retire nodes that are logically deleted // by other processes, then we always retire nodes in // order of increasing dtime values. // so, the blockbag will be ordered, which means that, // if dtime is before the RQ, then all remaining nodes // in this bag were deleted before the RQ. // so, in this case, we skip to the next bag. if (dtime != TIMESTAMP_NOT_SET && dtime < threadData[tid].rq_lin_time) break; } traversal_try_add(tid, node, NULL, rqResultKeys, rqResultValues, startIndex, lo, hi, false); } } #if defined MICROBENCH && !defined NDEBUG if (*startIndex > RQSIZE) { cout<<"ERROR: *startIndex="<<(*startIndex)<<" is unexpectedly greater than or equal to RQSIZE="< #include #include #include // the following define enables an optimization that i'm not sure is correct. //#define COLLECT_ANNOUNCEMENTS_FAST template class RQProvider { private: struct __rq_thread_data { #define __RQ_THREAD_DATA_SIZE 1024 union { struct { // anonymous struct inside anonymous union means we don't need to type anything special to access these variables long long rq_lin_time; HashList * hashlist; volatile char padding0[PREFETCH_SIZE_BYTES]; void * announcements[MAX_NODES_DELETED_ATOMICALLY+1]; int numAnnouncements; }; char bytes[__RQ_THREAD_DATA_SIZE]; // avoid false sharing }; } __attribute__((aligned(__RQ_THREAD_DATA_SIZE))); #define TIMESTAMP_NOT_SET 0 #define HASHLIST_INIT_CAPACITY_POW2 (1<<8) const int NUM_PROCESSES; volatile char padding0[PREFETCH_SIZE_BYTES]; volatile long long timestamp = 1; volatile char padding1[PREFETCH_SIZE_BYTES]; RWLock rwlock; volatile char padding2[PREFETCH_SIZE_BYTES]; __rq_thread_data * threadData; DataStructure * ds; RecordManager * const recmgr; int init[MAX_TID_POW2] = {0,}; public: RQProvider(const int numProcesses, DataStructure * ds, RecordManager * recmgr) : NUM_PROCESSES(numProcesses), ds(ds), recmgr(recmgr) { threadData = new __rq_thread_data[numProcesses]; DEBUG_INIT_RQPROVIDER(numProcesses); } ~RQProvider() { // for (int tid=0;tiddestroy(); // delete threadData[tid].hashlist; // } delete[] threadData; DEBUG_DEINIT_RQPROVIDER(NUM_PROCESSES); } // invoke before a given thread can perform any rq_functions void initThread(const int tid) { if (init[tid]) return; else init[tid] = !init[tid]; threadData[tid].hashlist = new HashList(); threadData[tid].hashlist->init(HASHLIST_INIT_CAPACITY_POW2); threadData[tid].numAnnouncements = 0; for (int i=0;idestroy(); delete threadData[tid].hashlist; DEBUG_DEINIT_THREAD(tid); } // invoke whenever a new node is created/initialized inline void init_node(const int tid, NodeType * const node) { node->itime = TIMESTAMP_NOT_SET; node->dtime = TIMESTAMP_NOT_SET; } // for each address addr that is modified by rq_linearize_update_at_write // or rq_linearize_update_at_cas, you must replace any initialization of addr // with invocations of rq_write_addr template inline void write_addr(const int tid, T volatile * const addr, const T val) { *addr = val; } // for each address addr that is modified by rq_linearize_update_at_write // or rq_linearize_update_at_cas, you must replace any reads of addr with // invocations of rq_read_addr template inline T read_addr(const int tid, T volatile * const addr) { return *addr; } // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run some time BEFORE the physical deletion of a node // whose key has ALREADY been logically deleted. void announce_physical_deletion(const int tid, NodeType * const * const deletedNodes) { int i; for (i=0;deletedNodes[i];++i) { threadData[tid].announcements[threadData[tid].numAnnouncements+i] = deletedNodes[i]; } SOFTWARE_BARRIER; threadData[tid].numAnnouncements += i; assert(threadData[tid].numAnnouncements <= MAX_NODES_DELETED_ATOMICALLY); SOFTWARE_BARRIER; } // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run AFTER performing announce_physical_deletion, // if the cas that was trying to physically delete node failed. void physical_deletion_failed(const int tid, NodeType * const * const deletedNodes) { for (int i=0;deletedNodes[i];++i) { --threadData[tid].numAnnouncements; #ifdef COLLECT_ANNOUNCEMENTS_FAST threadData[tid].announcements[threadData[tid].numAnnouncements] = NULL; #endif } assert(threadData[tid].numAnnouncements >= 0); } // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run AFTER performing announce_physical_deletion, // if the cas that was trying to physically delete node succeeded. void physical_deletion_succeeded(const int tid, NodeType * const * const deletedNodes) { int i; for (i=0;deletedNodes[i];++i) { recmgr->retire(tid, deletedNodes[i]); } SOFTWARE_BARRIER; // ensure nodes are placed in the epoch bag BEFORE they are removed from announcements. threadData[tid].numAnnouncements -= i; assert(threadData[tid].numAnnouncements >= 0); } private: inline void set_insertion_timestamps( const int tid, const long long ts, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { // set insertion timestamps // for each i_node in insertedNodes for (int i_nodeix=0;insertedNodes[i_nodeix];++i_nodeix) { insertedNodes[i_nodeix]->itime = ts; } } inline void set_deletion_timestamps( const int tid, const long long ts, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { // set deletion timestamps // for each d_node in deletedNodes for (int d_nodeix=0;deletedNodes[d_nodeix];++d_nodeix) { deletedNodes[d_nodeix]->dtime = ts; } } public: // replace the linearization point of an update that inserts or deletes nodes // with an invocation of this function if the linearization point is a WRITE template inline T linearize_update_at_write( const int tid, T volatile * const lin_addr, const T& lin_newval, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion announce_physical_deletion(tid, deletedNodes); } rwlock.readLock(); long long ts = timestamp; *lin_addr = lin_newval; // original linearization point rwlock.readUnlock(); set_insertion_timestamps(tid, ts, insertedNodes, deletedNodes); set_deletion_timestamps(tid, ts, insertedNodes, deletedNodes); if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion physical_deletion_succeeded(tid, deletedNodes); } #if defined USE_RQ_DEBUGGING DEBUG_RECORD_UPDATE_CHECKSUM(tid, ts, insertedNodes, deletedNodes, ds); #endif return lin_newval; } // replace the linearization point of an update that inserts or deletes nodes // with an invocation of this function if the linearization point is a CAS template inline T linearize_update_at_cas( const int tid, T volatile * const lin_addr, const T& lin_oldval, const T& lin_newval, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion announce_physical_deletion(tid, deletedNodes); } rwlock.readLock(); long long ts = timestamp; T res = __sync_val_compare_and_swap(lin_addr, lin_oldval, lin_newval); rwlock.readUnlock(); if (res == lin_oldval){ set_insertion_timestamps(tid, ts, insertedNodes, deletedNodes); set_deletion_timestamps(tid, ts, insertedNodes, deletedNodes); if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion physical_deletion_succeeded(tid, deletedNodes); } #if defined USE_RQ_DEBUGGING DEBUG_RECORD_UPDATE_CHECKSUM(tid, ts, insertedNodes, deletedNodes, ds); #endif } else { if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion physical_deletion_failed(tid, deletedNodes); } } return res; } // invoke at the start of each traversal inline void traversal_start(const int tid) { threadData[tid].hashlist->clear(); rwlock.writeLock(); threadData[tid].rq_lin_time = ++timestamp; // linearization point of range query (at the write to timestamp) rwlock.writeUnlock(); } private: // invoke each time a traversal visits a node with a key in the desired range: // if the node belongs in the range query, it will be placed in rqResult[index] inline int __traversal_try_add(const int tid, NodeType * const node, NodeType ** const nodeSource, K * const outputKeys, V * const outputValues, const K& lo, const K& hi, bool foundDuringTraversal) { // rqResultKeys should have space for MAX_KEYS_PER_NODE keys, AT LEAST // in the following, rather than having deeply nested if-else blocks, // we return asap, and list facts that must be true if we didn't return assert(foundDuringTraversal || !logicalDeletion || ds->isLogicallyDeleted(tid, node)); long long itime = TIMESTAMP_NOT_SET; while (itime == TIMESTAMP_NOT_SET) { itime = node->itime; } if (node->itime >= threadData[tid].rq_lin_time) return 0; // node was inserted after the range query // fact: node was inserted before the range query bool logicallyDeleted = (logicalDeletion && ds->isLogicallyDeleted(tid, node)); long long dtime = TIMESTAMP_NOT_SET; if (!logicalDeletion && foundDuringTraversal) goto tryAddToRQ; // no logical deletion. since node was inserted before the range query, and the traversal encountered it, it must have been deleted AFTER the traversal encountered it. // fact: no logical deletion ==> did not find node during traversal dtime = node->dtime; if (dtime != TIMESTAMP_NOT_SET) { if (dtime < threadData[tid].rq_lin_time) return 0; // node was deleted before the range query goto tryAddToRQ; } // fact: dtime was not set above if (logicalDeletion && !logicallyDeleted) goto tryAddToRQ; // if logical deletion is used with marking, the fact that node was inserted before the range query, and that the traversal encountered node, is NOT enough to argue that node was in the data structure when the traversal started. why? when the traversal encountered node, it might have already been marked. so, we check if node is marked. if not, then the node has not yet been deleted. // fact: if there is logical deletion, then the node has now been deleted ///////////////////////// HANDLE UNKNOWN DTIME ///////////////////////// // if we are executing this because node was ANNOUNCED by a process, // as something that MIGHT soon be deleted (if nodeSource != NULL), // then node might not ever actually be deleted, // so we can't spin forever on dtime. if (nodeSource != NULL) { while (dtime == TIMESTAMP_NOT_SET && *nodeSource == node) { dtime = node->dtime; } if (dtime == TIMESTAMP_NOT_SET) { // above loop exited because the process removed its announcement to this node! // if the process deleted the node, then it removed the // announcement AFTER setting dtime. // so we reread dtime one more time, to figure out whether // the process actually deleted the node. SOFTWARE_BARRIER; // prevent read of dtime from happening before last read of *nodeSource dtime = node->dtime; if (dtime == TIMESTAMP_NOT_SET) { // since dtime is not set, the process did NOT delete the node. // so, either a DIFFERENT process deleted it, // or it was found during the data structure traversal. // if another process deleted it, then we will find it // either in that process' announcements, or in a limbo bag. return 0; } // the node has been deleted, and dtime is set, so we check dtime below. } } else { while (dtime == TIMESTAMP_NOT_SET) { dtime = node->dtime; } } if (dtime < threadData[tid].rq_lin_time) return 0; // node was deleted before the range query // fact: node was inserted before the rq and deleted after it ///////////////////// TRY TO ADD NODE'S KEYS TO RQ ///////////////////// // note: this way of organizing this decision tree favors trees with fat multi-key nodes, because getKeys is delayed as long as possible. tryAddToRQ: // fetch the node's keys that are in the set int cnt = ds->getKeys(tid, node, outputKeys, outputValues); assert(cnt < RQ_DEBUGGING_MAX_KEYS_PER_NODE); if (cnt == 0) return 0; // node doesn't contain any keys that are in the set // TODO: properly assert that getKeys doesn't run out of bounds on outputKeys[...] (i'm quite certain it doesn't, currently, though.) // note: in the following loop, we shift keys in the outputKeys array left to eliminate any that ultimately should not be added to the range query int numNewKeys = 0; for (int i=0;iisInRange(outputKeys[i], lo, hi)) goto doNotAddToRQ; // key is NOT in the desired range if (threadData[tid].hashlist->contains(outputKeys[i])) goto doNotAddToRQ; // key is already in the range query outputKeys[numNewKeys] = outputKeys[i]; // save this as a new key added to the RQ outputValues[numNewKeys] = outputValues[i]; ++numNewKeys; doNotAddToRQ: (0); } return numNewKeys; } inline void traversal_try_add(const int tid, NodeType * const node, NodeType ** const nodeSource, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi, bool foundDuringTraversal) { //#if defined MICROBENCH && !defined NDEBUG // assert(*startIndex < 2*RQSIZE); // note: this assert is a hack. it should be *startIndex < size of rqResultKeys // if (*startIndex >= RQSIZE) { // cout<<"ERROR: *startIndex="<<(*startIndex)<<" is unexpectedly greater than or equal to RQSIZE="<insert(rqResultKeys[(*startIndex)++]); } // note: the above increments startIndex #if defined MICROBENCH assert(*startIndex <= RQSIZE); #endif } public: inline void traversal_try_add(const int tid, NodeType * const node, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi) { traversal_try_add(tid, node, NULL, rqResultKeys, rqResultValues, startIndex, lo, hi, true); } // invoke at the end of each traversal: // any nodes that were deleted during the traversal, // and were consequently missed during the traversal, // are placed in rqResult[index] void traversal_end(const int tid, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi) { // todo: possibly optimize by skipping entire blocks if there are many keys to skip (does not seem to be justifiable for 4 work threads and 4 range query threads) SOFTWARE_BARRIER; long long end_timestamp = timestamp; SOFTWARE_BARRIER; // collect nodes announced by other processes #ifdef COLLECT_ANNOUNCEMENTS_FAST int numCollected = 0; NodeType * collectedAnnouncement[NUM_PROCESSES*MAX_NODES_DELETED_ATOMICALLY]; NodeType ** announcementSource[NUM_PROCESSES*MAX_NODES_DELETED_ATOMICALLY]; #endif for (int otherTid=0;otherTid * all_bags[NUM_PROCESSES*NUMBER_OF_EPOCH_BAGS+1]; vector> all_iterators; int numIterators = 0; for (int otherTid=0;otherTid * thread_bags[NUMBER_OF_EPOCH_BAGS+1]; recmgr->get((NodeType *) NULL)->reclaim->getSafeBlockbags(otherTid, thread_bags); for (int i=0;thread_bags[i];++i) { all_bags[numIterators] = thread_bags[i]; all_iterators.push_back(thread_bags[i]->begin()); ++numIterators; } } #ifdef COLLECT_ANNOUNCEMENTS_FAST // try to add nodes collected from process announcements to the RQ for (int i=0;iend(); all_iterators[ix]++) { NodeType * node = (*all_iterators[ix]); assert(node); ++numVisitedInEpochBags; ++numSkippedInEpochBags; long long dtime = node->dtime; if (dtime != TIMESTAMP_NOT_SET && dtime > end_timestamp) continue; --numSkippedInEpochBags; if (!(logicalDeletion && canRetireNodesLogicallyDeletedByOtherProcesses)) { // if we cannot retire nodes that are logically deleted // by other processes, then we always retire nodes in // order of increasing dtime values. // so, the blockbag will be ordered, which means that, // if dtime is before the RQ, then all remaining nodes // in this bag were deleted before the RQ. // so, in this case, we skip to the next bag. if (dtime != TIMESTAMP_NOT_SET && dtime < threadData[tid].rq_lin_time) break; } traversal_try_add(tid, node, NULL, rqResultKeys, rqResultValues, startIndex, lo, hi, false); } } #if defined MICROBENCH && !defined NDEBUG if (*startIndex > RQSIZE) { cout<<"ERROR: *startIndex="<<(*startIndex)<<" is unexpectedly greater than or equal to RQSIZE="< #include #include #include "snapcollector.h" template class RQProvider { private: struct __rq_thread_data { #define __RQ_THREAD_DATA_SIZE 1024 union { struct { // anonymous struct inside anonymous union means we don't need to type anything special to access these variables long long rq_lin_time; SnapCollector * currentSnapCollector; SnapCollector * snapCollectorToRetire; }; char bytes[__RQ_THREAD_DATA_SIZE]; // avoid false sharing }; } __attribute__((aligned(__RQ_THREAD_DATA_SIZE))); const int NUM_PROCESSES; volatile long long timestamp = 1; pthread_rwlock_t rwlock; __rq_thread_data * threadData; DataStructure * const ds; RecordManager * const recmgr; volatile char padding[PREFETCH_SIZE_BYTES]; SnapCollector * volatile snapPointer; int init[MAX_TID_POW2] = {0,}; public: RQProvider(const int numProcesses, DataStructure * ds, RecordManager * recmgr) : NUM_PROCESSES(numProcesses), ds(ds), recmgr(recmgr) { assert(logicalDeletion); // Timnat's iterator algorithm REQUIRES logical deletion! if (pthread_rwlock_init(&rwlock, NULL)) error("could not init rwlock"); threadData = new __rq_thread_data[numProcesses]; const int dummyTid = 0; recmgr->initThread(dummyTid); // must initialize record manager before allocating!! initThread(dummyTid); // initialize dummy snap collector snapPointer = recmgr->template allocate >(dummyTid); #ifdef __HANDLE_STATS GSTATS_APPEND(dummyTid, extra_type1_allocated_addresses, ((long long) snapPointer)%(1<<12)); #endif snapPointer->init(dummyTid, numProcesses, recmgr, ds->KEY_MIN, ds->KEY_MAX+1); snapPointer->BlockFurtherPointers(dummyTid, recmgr); snapPointer->Deactivate(NULL, NULL, NULL); snapPointer->BlockFurtherReports(); DEBUG_INIT_RQPROVIDER(numProcesses); } ~RQProvider() { if (pthread_rwlock_destroy(&rwlock)) error("could not destroy rwlock"); delete[] threadData; snapPointer->retire(0 /* dummy tid */, recmgr); DEBUG_DEINIT_RQPROVIDER(NUM_PROCESSES); } // invoke before a given thread can perform any rq_functions void initThread(const int tid) { if (init[tid]) return; else init[tid] = !init[tid]; threadData[tid].rq_lin_time = 0; threadData[tid].currentSnapCollector = NULL; threadData[tid].snapCollectorToRetire = NULL; DEBUG_INIT_THREAD(tid); } // invoke once a given thread will no longer perform any rq_ functions void deinitThread(const int tid) { if (!init[tid]) return; else init[tid] = !init[tid]; DEBUG_DEINIT_THREAD(tid); } // invoke whenever a new node is created/initialized inline void init_node(const int tid, NodeType * const node) {} // for each address addr that is modified by rq_linearize_update_at_write // or rq_linearize_update_at_cas, you must replace any initialization of addr // with invocations of rq_write_addr template inline void write_addr(const int tid, T volatile * const addr, const T val) { *addr = val; } // for each address addr that is modified by rq_linearize_update_at_write // or rq_linearize_update_at_cas, you must replace any reads of addr with // invocations of rq_read_addr template inline T read_addr(const int tid, T volatile * const addr) { return *addr; } /** * Added function only for Timnat's SnapCollector. * This must be invoked just before the return statement of every search. */ inline void search_report_target_key(const int tid, const K key, NodeType * const node) { SnapCollector * sc = snapPointer; if (sc->IsActive()) { ReportType type = ds->isLogicallyDeleted(tid, node) ? ReportType::Remove : ReportType::Add; sc->Report(tid, node, type, key, recmgr); } SOFTWARE_BARRIER; } /** * Added function only for Timnat's SnapCollector. * This must be invoked just before the return statement of every insertion * that does not modify the data structure. */ inline void insert_readonly_report_target_key(const int tid, NodeType * const node) { SnapCollector * sc = snapPointer; if (sc->IsActive()) { if (!ds->isLogicallyDeleted(tid, node)) { sc->Report(tid, node, ReportType::Add, node->key, recmgr); } } SOFTWARE_BARRIER; } /** * Added function only for Timnat's SnapCollector. * This can be invoked to determine if the current SnapCollector is active. */ inline bool traversal_is_active(const int tid) { return threadData[tid].currentSnapCollector->IsActive(); } private: inline void delete_report_target_key(const int tid, NodeType * const node) { if (node) { SnapCollector * sc = snapPointer; if (sc->IsActive()) { sc->Report(tid, node, ReportType::Remove, node->key, recmgr); } SOFTWARE_BARRIER; } } public: // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run some time BEFORE the physical deletion of a node // whose key has ALREADY been logically deleted. inline void announce_physical_deletion(const int tid, NodeType * const * const deletedNodes) { assert(!deletedNodes[0] || !deletedNodes[1]); delete_report_target_key(tid, deletedNodes[0]); } // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run AFTER performing announce_physical_deletion, // if the cas that was trying to physically delete node failed. inline void physical_deletion_failed(const int tid, NodeType * const * const deletedNodes) {} // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run AFTER performing announce_physical_deletion, // if the cas that was trying to physically delete node succeeded. inline void physical_deletion_succeeded(const int tid, NodeType * const * const deletedNodes) { int i; for (i=0;deletedNodes[i];++i) { recmgr->retire(tid, deletedNodes[i]); } } // replace the linearization point of an update that inserts or deletes nodes // with an invocation of this function if the linearization point is a WRITE template inline T linearize_update_at_write( const int tid, T volatile * const lin_addr, const T& lin_newval, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { assert((insertedNodes[0] && !deletedNodes[0]) || (!insertedNodes[0] && deletedNodes[0])); #ifdef RQ_USE_TIMESTAMPS if (pthread_rwlock_rdlock(&rwlock)) error("could not read-lock rwlock"); long long ts = timestamp; #else long long ts = 1; #endif *lin_addr = lin_newval; // original linearization point #ifdef RQ_USE_TIMESTAMPS if (pthread_rwlock_unlock(&rwlock)) error("could not read-unlock rwlock"); #endif if (insertedNodes[0]) insert_readonly_report_target_key(tid, insertedNodes[0]); if (deletedNodes[0]) delete_report_target_key(tid, deletedNodes[0]); #if defined USE_RQ_DEBUGGING DEBUG_RECORD_UPDATE_CHECKSUM(tid, ts, insertedNodes, deletedNodes, ds); #endif return lin_newval; } // replace the linearization point of an update that inserts or deletes nodes // with an invocation of this function if the linearization point is a CAS template inline T linearize_update_at_cas( const int tid, T volatile * const lin_addr, const T& lin_oldval, const T& lin_newval, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { assert((insertedNodes[0] && !deletedNodes[0]) || (!insertedNodes[0] && deletedNodes[0])); #ifdef RQ_USE_TIMESTAMPS if (pthread_rwlock_rdlock(&rwlock)) error("could not read-lock rwlock"); long long ts = timestamp; #else long long ts = 1; #endif T res = __sync_val_compare_and_swap(lin_addr, lin_oldval, lin_newval); #ifdef RQ_USE_TIMESTAMPS if (pthread_rwlock_unlock(&rwlock)) error("could not read-unlock rwlock"); #endif if (res == lin_oldval){ if (insertedNodes[0]) insert_readonly_report_target_key(tid, insertedNodes[0]); if (deletedNodes[0]) delete_report_target_key(tid, deletedNodes[0]); #if defined USE_RQ_DEBUGGING DEBUG_RECORD_UPDATE_CHECKSUM(tid, ts, insertedNodes, deletedNodes, ds); #endif } return res; } // invoke at the start of each traversal inline void traversal_start(const int tid) { #if !defined(RQ_USE_TIMESTAMPS) threadData[tid].rq_lin_time = 1; #endif threadData[tid].currentSnapCollector = snapPointer; SOFTWARE_BARRIER; if (!threadData[tid].currentSnapCollector->IsActive()) { SnapCollector * candidate = recmgr->template allocate >(tid); #ifdef __HANDLE_STATS GSTATS_APPEND(tid, extra_type1_allocated_addresses, ((long long) candidate)%(1<<12)); #endif candidate->init(tid, NUM_PROCESSES, recmgr, ds->KEY_MIN, ds->KEY_MAX+1); if (__sync_bool_compare_and_swap(&snapPointer, threadData[tid].currentSnapCollector, candidate)) { // delay retiring until later, because we've started accepting reports, // and we don't want to waste time while we are accepting reports, // because we don't want to receive many reports... threadData[tid].snapCollectorToRetire = threadData[tid].currentSnapCollector; threadData[tid].currentSnapCollector = candidate; } else { candidate->retire(tid, recmgr); threadData[tid].currentSnapCollector = snapPointer; } } // usleep(200000); } inline NodeType * traversal_try_add(const int tid, NodeType * const node, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi) { SnapCollector * sc = threadData[tid].currentSnapCollector; return sc->AddNode(tid, node, node->key, recmgr); } // invoke at the end of each traversal: // any nodes that were deleted during the traversal, // and were consequently missed during the traversal, // are placed in rqResult[index] void traversal_end(const int tid, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi) { SnapCollector * sc = threadData[tid].currentSnapCollector; sc->BlockFurtherPointers(tid, recmgr); SOFTWARE_BARRIER; sc->Deactivate(NULL, NULL, NULL); sc->BlockFurtherReports(); SOFTWARE_BARRIER; sc->Prepare(tid, recmgr); NodeType * curr = NULL; while ((curr = sc->GetNext(tid))) { if (curr->key < lo) continue; if (curr->key > hi) break; rqResultKeys[*startIndex] = curr->key; rqResultValues[*startIndex] = curr->val; ++*startIndex; } #if defined MICROBENCH assert(*startIndex <= RQSIZE); #endif #ifdef SNAPCOLLECTOR_PRINT_RQS // for (int i=0;i<*startIndex;++i) { // cout<<" "<retire(tid, recmgr); threadData[tid].snapCollectorToRetire = NULL; } } }; #endif /* RQ_RWLOCK_H */ ================================================ FILE: datastructures/trevor_brown_abtree/common/rq/rq_unsafe.h ================================================ /* * File: rq_unsafe.h * Author: trbot * * Created on May 15, 2017, 5:06 PM */ #ifndef RQ_UNSAFE_H #define RQ_UNSAFE_H #include "rq_debugging.h" #include #include #ifndef casword_t #define casword_t uintptr_t #endif template class RQProvider { private: struct __rq_thread_data { #define __RQ_THREAD_DATA_SIZE 1024 union { struct { // anonymous struct inside anonymous union means we don't need to type anything special to access these variables long long rq_lin_time; }; char bytes[__RQ_THREAD_DATA_SIZE]; // avoid false sharing }; } __attribute__((aligned(__RQ_THREAD_DATA_SIZE))); #define TIMESTAMP_NOT_SET 0 const int NUM_PROCESSES; volatile char padding0[PREFETCH_SIZE_BYTES]; volatile long long timestamp = 1; volatile char padding1[PREFETCH_SIZE_BYTES]; RWLock rwlock; volatile char padding2[PREFETCH_SIZE_BYTES]; __rq_thread_data * threadData; DataStructure * ds; RecordManager * const recmgr; int init[MAX_TID_POW2] = {0,}; public: RQProvider(const int numProcesses, DataStructure * ds, RecordManager * recmgr) : NUM_PROCESSES(numProcesses), ds(ds), recmgr(recmgr) { threadData = new __rq_thread_data[numProcesses]; DEBUG_INIT_RQPROVIDER(numProcesses); } ~RQProvider() { delete[] threadData; DEBUG_DEINIT_RQPROVIDER(NUM_PROCESSES); } // invoke before a given thread can perform any rq_functions void initThread(const int tid) { if (init[tid]) return; else init[tid] = !init[tid]; DEBUG_INIT_THREAD(tid); } // invoke once a given thread will no longer perform any rq_ functions void deinitThread(const int tid) { if (!init[tid]) return; else init[tid] = !init[tid]; DEBUG_DEINIT_THREAD(tid); } // invoke whenever a new node is created/initialized inline void init_node(const int tid, NodeType * const node) {} // for each address addr that is modified by rq_linearize_update_at_write // or rq_linearize_update_at_cas, you must replace any initialization of addr // with invocations of rq_write_addr template inline void write_addr(const int tid, T volatile * const addr, const T val) { *addr = val; } // for each address addr that is modified by rq_linearize_update_at_write // or rq_linearize_update_at_cas, you must replace any reads of addr with // invocations of rq_read_addr template inline T read_addr(const int tid, T volatile * const addr) { return *addr; } // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run some time BEFORE the physical deletion of a node // whose key has ALREADY been logically deleted. inline void announce_physical_deletion(const int tid, NodeType * const * const deletedNodes) {} // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run AFTER performing announce_physical_deletion, // if the cas that was trying to physically delete node failed. inline void physical_deletion_failed(const int tid, NodeType * const * const deletedNodes) {} // IF DATA STRUCTURE PERFORMS LOGICAL DELETION // run AFTER performing announce_physical_deletion, // if the cas that was trying to physically delete node succeeded. inline void physical_deletion_succeeded(const int tid, NodeType * const * const deletedNodes) { int i; for (i=0;deletedNodes[i];++i) { recmgr->retire(tid, deletedNodes[i]); } } // replace the linearization point of an update that inserts or deletes nodes // with an invocation of this function if the linearization point is a WRITE template inline T linearize_update_at_write( const int tid, T volatile * const lin_addr, const T& lin_newval, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion announce_physical_deletion(tid, deletedNodes); } #ifdef RQ_USE_TIMESTAMPS rwlock.readLock(); long long ts = timestamp; #else long long ts = 1; #endif *lin_addr = lin_newval; // original linearization point #ifdef RQ_USE_TIMESTAMPS rwlock.readUnlock(); #endif if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion physical_deletion_succeeded(tid, deletedNodes); } #if defined USE_RQ_DEBUGGING DEBUG_RECORD_UPDATE_CHECKSUM(tid, ts, insertedNodes, deletedNodes, ds); #endif return lin_newval; } // replace the linearization point of an update that inserts or deletes nodes // with an invocation of this function if the linearization point is a CAS template inline T linearize_update_at_cas( const int tid, T volatile * const lin_addr, const T& lin_oldval, const T& lin_newval, NodeType * const * const insertedNodes, NodeType * const * const deletedNodes) { if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion announce_physical_deletion(tid, deletedNodes); } #ifdef RQ_USE_TIMESTAMPS rwlock.readLock(); long long ts = timestamp; #else long long ts = 1; #endif T res = __sync_val_compare_and_swap(lin_addr, lin_oldval, lin_newval); // original linearization point #ifdef RQ_USE_TIMESTAMPS rwlock.readUnlock(); #endif if (res == lin_oldval) { if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion physical_deletion_succeeded(tid, deletedNodes); } #if defined USE_RQ_DEBUGGING DEBUG_RECORD_UPDATE_CHECKSUM(tid, ts, insertedNodes, deletedNodes, ds); #endif } else { if (!logicalDeletion) { // physical deletion will happen at the same time as logical deletion physical_deletion_failed(tid, deletedNodes); } } return res; } // invoke at the start of each traversal inline void traversal_start(const int tid) { #ifdef RQ_USE_TIMESTAMPS rwlock.writeLock(); threadData[tid].rq_lin_time = ++timestamp; // linearization point of range query (at the write to timestamp) rwlock.writeUnlock(); #endif } // invoke each time a traversal visits a node with a key in the desired range: // if the node belongs in the range query, it will be placed in rqResult[index] inline void traversal_try_add(const int tid, NodeType * const node, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi) { int start = (*startIndex); int keysInNode = ds->getKeys(tid, node, rqResultKeys+start, rqResultValues+start); assert(keysInNode < RQ_DEBUGGING_MAX_KEYS_PER_NODE); if (keysInNode == 0) return; int location = start; for (int i=start;iisInRange(rqResultKeys[i], lo, hi)){ rqResultKeys[location] = rqResultKeys[i]; rqResultValues[location] = rqResultValues[i]; ++location; } } *startIndex = location; #if defined MICROBENCH assert(*startIndex <= RQSIZE); #endif } // invoke at the end of each traversal: // any nodes that were deleted during the traversal, // and were consequently missed during the traversal, // are placed in rqResult[index] inline void traversal_end(const int tid, K * const rqResultKeys, V * const rqResultValues, int * const startIndex, const K& lo, const K& hi) { DEBUG_RECORD_RQ_SIZE(*startIndex); DEBUG_RECORD_RQ_CHECKSUM(tid, threadData[tid].rq_lin_time, rqResultKeys, *startIndex); } }; #endif /* RQ_UNSAFE_H */ ================================================ FILE: datastructures/trevor_brown_abtree/common/rq/snapcollector/reportitem.h ================================================ /* * File: reportitem.h * Author: trbot * * Created on June 21, 2017, 4:47 PM */ #ifndef REPORTITEM_H #define REPORTITEM_H enum ReportType {Add, Remove}; static int getOrdinalForReportType(ReportType t) { return (t == ReportType::Add); } class ReportItem { public: void * node; ReportType t; ReportItem * volatile next; int key; int id; ReportItem() {} void init(void * node, ReportType t, int key) { this->node = node; this->t = t; next = NULL; this->key = key; id = 0; } }; class CompactReportItem { public: void * node; ReportType t; int key; int id; CompactReportItem() {} void init(void * node, ReportType t, int key) { this->node = node; this->t = t; this->key = key; id = 0; } }; struct { bool operator()(CompactReportItem * a, CompactReportItem * b) const { if (a->key != b->key) return a->key < b->key; if (a->node != b->node) return (long long) a->node < (long long) b->node; return getOrdinalForReportType(a->t) < getOrdinalForReportType(b->t); } } compareCRI; #endif /* REPORTITEM_H */ ================================================ FILE: datastructures/trevor_brown_abtree/common/rq/snapcollector/snapcollector.h ================================================ /* * File: snapcollector.h * Author: trbot * * Created on June 21, 2017, 4:57 PM */ #ifndef SNAPCOLLECTOR_H #define SNAPCOLLECTOR_H #include #include #include #include "reportitem.h" #include template class SnapCollector { public: int NUM_THREADS; class NodeWrapper { public: NodeType * node; NodeWrapper * volatile next; K key; NodeWrapper() {} void init(K key) { this->node = NULL; this->next = NULL; this->key = key; } void init(NodeType * node, K key) { this->node = node; this->next = NULL; this->key = key; } }; private: ReportItem * volatile * reportHeads; ReportItem * volatile * reportTails; NodeWrapper * volatile head; NodeWrapper * volatile tail; ReportItem * blocker; volatile bool active; // variables used for aggregating reports after they are collected void ** currLocations; int * currRepLocations; std::vector * volatile gAllReports; K KEY_MAX; K KEY_MIN; private: inline bool isBlocker(NodeWrapper const * const wrapper) { if (wrapper) { K key = wrapper->key; NodeType * node = wrapper->node; K key2 = KEY_MAX; return (key == key2 && node == NULL); } return false; } template inline void __retireAllReports(const int tid, std::vector * v, RecordManager * recmgr) { if (v == NULL) return; for (auto it = v->begin(); it != v->end(); it++) { // retire compact report items recmgr->retire(tid, *it); } } template inline void __deallocateAllReports(const int tid, std::vector * v, RecordManager * recmgr) { if (v == NULL) return; for (auto it = v->begin(); it != v->end(); it++) { // deallocate compact report items recmgr->deallocate(tid, *it); } delete v; } public: template void init(const int tid, const int numProcesses, RecordManager * const recmgr, const K _KEY_MIN, const K _KEY_MAX) { this->KEY_MIN = _KEY_MIN; this->KEY_MAX = _KEY_MAX; this->NUM_THREADS = numProcesses; this->reportHeads = new ReportItem * volatile [NUM_THREADS*PREFETCH_SIZE_WORDS]; this->reportTails = new ReportItem * volatile [NUM_THREADS*PREFETCH_SIZE_WORDS]; // head = new NodeWrapper(std::numeric_limits::min()) this->head = recmgr->template allocate(tid); #ifdef __HANDLE_STATS GSTATS_APPEND(tid, extra_type2_allocated_addresses, ((long long) head)%(1<<12)); #endif this->head->init(this->KEY_MIN); this->tail = this->head; // oldTail = NULL; // blocker = new ReportItem(NULL, ReportType::Add, -1) this->blocker = recmgr->template allocate(tid); #ifdef __HANDLE_STATS GSTATS_APPEND(tid, extra_type3_allocated_addresses, ((long long) blocker)%(1<<12)); #endif this->blocker->init(NULL, ReportType::Add, -1); this->active = true; this->currLocations = new void * [NUM_THREADS*PREFETCH_SIZE_WORDS]; this->currRepLocations = new int[NUM_THREADS*PREFETCH_SIZE_WORDS]; this->gAllReports = NULL; for (int i=0;ireportHeads[i*PREFETCH_SIZE_WORDS] = recmgr->template allocate(tid); #ifdef __HANDLE_STATS GSTATS_APPEND(tid, extra_type3_allocated_addresses, ((long long) reportHeads[i*PREFETCH_SIZE_WORDS])%(1<<12)); #endif this->reportHeads[i*PREFETCH_SIZE_WORDS]->init(NULL, ReportType::Add, -1); // sentinel head. this->reportTails[i*PREFETCH_SIZE_WORDS] = this->reportHeads[i*PREFETCH_SIZE_WORDS]; this->currLocations[i*PREFETCH_SIZE_WORDS] = NULL; this->currRepLocations[i*PREFETCH_SIZE_WORDS] = 0; } } ~SnapCollector() { if (reportHeads) delete[] reportHeads; if (reportTails) delete[] reportTails; if (currLocations) delete[] currLocations; if (currRepLocations) delete[] currRepLocations; if (gAllReports) delete gAllReports; } template void retire(const int tid, RecordManager * recmgr) { // retire report items for (int i=0;iretire(tid, curr); // blocker can exist in many per-thread lists, but we only want to retire it once, below. curr = curr->next; } } // retire blocker recmgr->retire(tid, blocker); // if a thread has changed tail to point to a "blocker," then // threads may have appended node wrappers to the blocker, // so we have to retire any such node wrappers NodeWrapper * curr = this->tail; if (isBlocker(curr)) { while (curr != NULL) { recmgr->retire(tid, curr); curr = curr->next; } } // retire node wrappers curr = head; while (curr != NULL) { // && curr != tail /*&& curr != oldTail*/) { recmgr->retire(tid, curr); curr = curr->next; } // retire the contents of gAllReports __retireAllReports(tid, gAllReports, recmgr); // retire snap collector recmgr->retire(tid, this); } // TIMNAT: Implemented according to the optimization in A.3: // TIMNAT: Only accept nodes whose key is higher than the last, and return the last node. template NodeType * AddNode(const int tid, NodeType * node, K key, RecordManager * recmgr) { NodeWrapper * last = tail; if (last->key >= key) // TIMNAT: trying to add an out of place node. return last->node; // advance tail pointer if needed if (last->next != NULL) { if (last == tail) __sync_bool_compare_and_swap(&tail, last, last->next); return tail->node; } NodeWrapper * newNode = recmgr->template allocate(tid); #ifdef __HANDLE_STATS GSTATS_APPEND(tid, extra_type2_allocated_addresses, ((long long) newNode)%(1<<12)); #endif newNode->init(node, key); if (__sync_bool_compare_and_swap(&last->next, NULL, newNode)) { __sync_bool_compare_and_swap(&tail, last, newNode); return node; } else { recmgr->deallocate(tid, newNode); return tail->node; } } template void Report(int tid, NodeType * Node, ReportType t, K key, RecordManager * recmgr) { ReportItem * reportTail = reportTails[tid*PREFETCH_SIZE_WORDS]; ReportItem * newItem = recmgr->template allocate(tid); #ifdef __HANDLE_STATS GSTATS_APPEND(tid, extra_type3_allocated_addresses, ((long long) newItem)%(1<<12)); #endif newItem->init(Node, t, key); if (__sync_bool_compare_and_swap(&reportTail->next, NULL, newItem)) { reportTails[tid*PREFETCH_SIZE_WORDS] = newItem; } else { recmgr->deallocate(tid, newItem); } } bool IsActive() { // __sync_synchronize(); // SOFTWARE_BARRIER; bool result = active; // SOFTWARE_BARRIER; return result; } template void BlockFurtherPointers(const int tid, RecordManager * recmgr) { NodeWrapper * blocker = recmgr->template allocate(tid); #ifdef __HANDLE_STATS GSTATS_APPEND(tid, extra_type2_allocated_addresses, ((long long) blocker)%(1<<12)); #endif blocker->init(NULL, KEY_MAX); #if 1 while (true) { NodeWrapper * old = this->tail; if (isBlocker(old)) { // old is a blocker, so no need to add our own blocker recmgr->deallocate(tid, blocker); return; } if (__sync_bool_compare_and_swap(&this->tail, old, blocker)) { return; } } #else tail = blocker; #endif } /** * note: the parameters are used for the timestamping mechanism of the * test harness. they are NOT inherently needed by the snap collector. */ void Deactivate(pthread_rwlock_t * const rwlock, volatile long long * timestamp, long long * rq_lin_time) { #ifdef RQ_USE_TIMESTAMPS if (pthread_rwlock_wrlock(rwlock)) error("could not write-lock rwlock"); active = false; // range query is linearized here *timestamp = *timestamp + 1; *rq_lin_time = *timestamp; //++(*timestamp); //std::cout<<"timestamp="<<*timestamp<next == NULL) __sync_bool_compare_and_swap(&reportTail->next, NULL, blocker); // assert cas succeeded OR reportTail->next == blocker } } private: // TIMNAT: What follows is functions that are used to work with the snapshot while it is // TIMNAT: already taken. These functions are used to iterate over the nodes of the snapshot. template void AddReports(const int tid, std::vector * allReports, ReportItem * curr, RecordManager * recmgr) { curr = curr->next; while (curr != NULL && curr != blocker) { CompactReportItem * newItem = recmgr->template allocate(tid); #ifdef __HANDLE_STATS GSTATS_APPEND(tid, extra_type4_allocated_addresses, ((long long) newItem)%(1<<12)); #endif newItem->init(curr->node, curr->t, curr->key); allReports->push_back(newItem); curr = curr->next; } } public: // An optimization: sort the reports and nodes. template void Prepare(int tid, RecordManager * recmgr) { currLocations[tid*PREFETCH_SIZE_WORDS] = head; currRepLocations[tid*PREFETCH_SIZE_WORDS] = 0; if (gAllReports != NULL) return; std::vector * allReports = new std::vector(); for (int i = 0; i < NUM_THREADS; i++) { AddReports(tid, allReports, reportHeads[i*PREFETCH_SIZE_WORDS], recmgr); if (gAllReports != NULL) { // failed to publish allReports -- clean it up __deallocateAllReports(tid, allReports, recmgr); return; } } assert(!active); #ifdef SNAPCOLLECTOR_PRINT_RQS std::cout<<"this="<<(long long) this<<" allReports size="<size()<begin(), allReports->end(), compareCRI); if (__sync_bool_compare_and_swap(&gAllReports, NULL, allReports)) { // published allReports } else { // failed to publish allReports -- clean it up __deallocateAllReports(tid, allReports, recmgr); } } NodeType * GetNext(int tid) { NodeWrapper * currLoc = (NodeWrapper *) currLocations[tid*PREFETCH_SIZE_WORDS]; int currRepLoc = currRepLocations[tid*PREFETCH_SIZE_WORDS]; std::vector * allReports = gAllReports; while (true) { CompactReportItem * rep = NULL; K repKey = KEY_MAX; if (allReports->size() > currRepLoc) { rep = (*allReports)[currRepLoc]; repKey = rep->key; } K nodeKey = KEY_MAX; NodeWrapper * next = currLoc->next; if (next != NULL) { nodeKey = next->key; } // Option 1: node key < rep key. Return node. if (nodeKey < repKey) { currLocations[tid*PREFETCH_SIZE_WORDS] = next; currRepLocations[tid*PREFETCH_SIZE_WORDS] = currRepLoc; return next->node; } // Option 2: node key == rep key if (nodeKey == repKey) { // 2.a - both are infinity - iteration done. if (nodeKey == KEY_MAX) { currLocations[tid*PREFETCH_SIZE_WORDS] = currLoc; currRepLocations[tid*PREFETCH_SIZE_WORDS] = currRepLoc; return NULL; } // node and report with the same key :: // skip not-needed reports while (currRepLoc + 1 < allReports->size()) { CompactReportItem * nextRep = (*allReports)[currRepLoc + 1]; // dismiss a duplicate, or an insert followed by a matching delete: if (rep->key == nextRep->key && rep->node == nextRep->node) { currRepLoc++; rep = nextRep; } else { break; } } // standing on an insert report to a node I am holding: // 1. Return the current node. // 2. Skip over rest of reports for that key. if (rep->t == ReportType::Add && (NodeType *) rep->node == next->node) { while (currRepLoc < allReports->size() && (*allReports)[currRepLoc]->key == rep->key) { currRepLoc++; } currRepLocations[tid*PREFETCH_SIZE_WORDS] = currRepLoc; currLocations[tid*PREFETCH_SIZE_WORDS] = next; return next->node; } // standing on an insert report to a different node than I hold: // 1. Return the reported node. // 2. Skip over rest of reports for that key. if (rep->t == ReportType::Add && (NodeType *) rep->node != next->node) { NodeType * returnValue = (NodeType *) rep->node; while (currRepLoc < allReports->size() && (*allReports)[currRepLoc]->key == rep->key) { currRepLoc++; } currRepLocations[tid*PREFETCH_SIZE_WORDS] = currRepLoc; currLocations[tid*PREFETCH_SIZE_WORDS] = next; return returnValue; } // standing on a delete report to a different node than I hold: // skip over it and continue the big loop. if (rep->t == ReportType::Remove && (NodeType *) rep->node != next->node) { currRepLoc++; continue; } // standing on a delete report to the node that I hold: // 1. advance over the node that I hold. // 2. advance with the report. // 3. continue the bigloop currLoc = next; currRepLoc++; continue; } // Option 3: node key > rep key if (nodeKey > repKey) { // skip not-needed reports while (currRepLoc + 1 < allReports->size()) { CompactReportItem * nextRep = (*allReports)[currRepLoc + 1]; // dismiss a duplicate, or an insert followed by a matching delete: if (rep->key == nextRep->key && rep->node == nextRep->node) { currRepLoc++; rep = nextRep; } else { break; } } // a delete report - skip over it. if (rep->t == ReportType::Remove) { currRepLoc++; continue; } // an insert report: // 1. skip over rest of the reports for the same key. // 2. return the node. if (rep->t == ReportType::Add) { NodeType * returnValue = (NodeType *) rep->node; while (currRepLoc < allReports->size() && (*allReports)[currRepLoc]->key == rep->key) { currRepLoc++; } currRepLocations[tid*PREFETCH_SIZE_WORDS] = currRepLoc; currLocations[tid*PREFETCH_SIZE_WORDS] = currLoc; return returnValue; } } } } }; #endif /* SNAPCOLLECTOR_H */ ================================================ FILE: datastructures/trevor_brown_abtree/common/rq/snapcollector/snapcollector_test.cpp ================================================ /* * File: test.cpp * Author: trbot * * Created on June 21, 2017, 5:25 PM */ #include #include #include #include "snapcollector.h" #include "rq_snapcollector.h" using namespace std; class Node { public: int key; volatile bool marked; volatile long long itime; volatile long long dtime; Node(int key) : key(key) {} }; class DataStructure { public: inline bool isLogicallyDeleted(const int tid, Node * node) { return node->marked; } inline int getKeys(const int tid, Node * node, int * const outputKeys) { outputKeys[0] = node->key; return 1; } bool isInRange(const int& key, const int& lo, const int& hi) { return lo <= key && key <= hi; } }; /* * */ int main(int argc, char** argv) { DataStructure ds; Node node (17); const int numProcessors = 1; SnapCollector sc (numProcessors); sc.AddNode(&node, node.key); RQProvider prov (numProcessors, &ds); Node * inserted[] = {NULL}; Node * deleted[] = {NULL}; prov.linearize_update_at_cas(1, &node.key, 17, 18, inserted, deleted, (void *) NULL); return 0; } ================================================ FILE: datastructures/trevor_brown_abtree/common/rwlock.h ================================================ /* * File: rwlock.h * Author: trbot * * Created on June 29, 2017, 8:25 PM */ #ifndef RWLOCK_H #define RWLOCK_H #ifdef RWLOCK_PTHREADS #elif defined RWLOCK_FAVOR_WRITERS #elif defined RWLOCK_FAVOR_READERS #else // #warning "No RWLOCK implementation specified... using default: favour READERS. See rwlock.h for options. Note that this setting only affects algorithms that use the lock-based range query provider in common/rq/rq_rwlock.h." #define RWLOCK_FAVOR_READERS // #error Must specify RWLOCK implementation; see rwlock.h #endif #ifdef RWLOCK_PTHREADS class RWLock { private: pthread_rwlock_t lock; public: RWLock() { if (pthread_rwlock_init(&lock, NULL)) error("could not init rwlock"); } ~RWLock() { if (pthread_rwlock_destroy(&lock)) error("could not destroy rwlock"); } inline void readLock() { if (pthread_rwlock_rdlock(&lock)) error("could not read-lock rwlock"); } inline void readUnlock() { if (pthread_rwlock_unlock(&lock)) error("could not read-unlock rwlock"); } inline void writeLock() { if (pthread_rwlock_wrlock(&lock)) error("could not write-lock rwlock"); } inline void writeUnlock() { if (pthread_rwlock_unlock(&lock)) error("could not write-unlock rwlock"); } inline bool isWriteLocked() { cout<<"ERROR: isWriteLocked() is not implemented"<. */ #ifndef ABTREE_H #define ABTREE_H #include #include #include #include #include #include #include #include #include "record_manager.h" #include "descriptors.h" #include "rq_provider.h" namespace abtree_ns { #ifndef TRACE #define TRACE if(0) #endif #ifndef DEBUG #define DEBUG if(0) #endif #ifndef DEBUG1 #define DEBUG1 if(0) #endif #ifndef DEBUG2 #define DEBUG2 if(0) #endif #define ABTREE_ENABLE_DESTRUCTOR using namespace std; template struct Node; template struct SCXRecord; template class wrapper_info { public: const static int MAX_NODES = DEGREE+2; Node * nodes[MAX_NODES]; SCXRecord * scxPtrs[MAX_NODES]; Node * newNode; Node * volatile * field; int state; char numberOfNodes; char numberOfNodesToFreeze; char numberOfNodesAllocated; // for rqProvider Node * insertedNodes[MAX_NODES+1]; Node * deletedNodes[MAX_NODES+1]; }; template struct SCXRecord { const static int STATE_INPROGRESS = 0; const static int STATE_COMMITTED = 1; const static int STATE_ABORTED = 2; union { struct { volatile mutables_t mutables; int numberOfNodes; int numberOfNodesToFreeze; Node * newNode; Node * volatile * field; Node * nodes[wrapper_info::MAX_NODES]; // array of pointers to nodes SCXRecord * scxPtrsSeen[wrapper_info::MAX_NODES]; // array of pointers to scx records // for rqProvider Node * insertedNodes[wrapper_info::MAX_NODES+1]; Node * deletedNodes[wrapper_info::MAX_NODES+1]; } __attribute__((packed)) c; // WARNING: be careful with atomicity because of packed attribute!!! (this means no atomic vars smaller than word size, and all atomic vars must start on a word boundary when fields are packed tightly) char bytes[2*PREFETCH_SIZE_BYTES]; }; const static int size = sizeof(c); }; template struct Node { SCXRecord * volatile scxPtr; int leaf; // 0 or 1 volatile int marked; // 0 or 1 int weight; // 0 or 1 int size; // degree of node K searchKey; #if defined(RQ_LOCKFREE) || defined(RQ_RWLOCK) || defined(HTM_RQ_RWLOCK) volatile long long itime; // for use by range query algorithm volatile long long dtime; // for use by range query algorithm #endif K keys[DEGREE]; Node * volatile ptrs[DEGREE]; inline bool isLeaf() { return leaf; } inline int getKeyCount() { return isLeaf() ? size : size-1; } inline int getABDegree() { return size; } template inline int getChildIndex(const K& key, Compare cmp) { int nkeys = getKeyCount(); int retval = 0; while (retval < nkeys && !cmp(key, (const K&) keys[retval])) { ++retval; } return retval; } template inline int getKeyIndex(const K& key, Compare cmp) { int nkeys = getKeyCount(); int retval = 0; while (retval < nkeys && cmp((const K&) keys[retval], key)) { ++retval; } return retval; } }; template class abtree { // the following bool determines whether the optimization to guarantee // amortized constant rebalancing (at the cost of decreasing average degree // by at most one) is used. // if it is false, then an amortized logarithmic number of rebalancing steps // may be performed per operation, but average degree increases slightly. char padding0[PREFETCH_SIZE_BYTES]; const bool ALLOW_ONE_EXTRA_SLACK_PER_NODE; const int b; const int a; RecManager * const recordmgr; RQProvider, abtree, RecManager, false, false> * const rqProvider; char padding1[PREFETCH_SIZE_BYTES]; Compare cmp; // descriptor reduction algorithm #ifndef comma #define comma , #endif #define DESC1_ARRAY records #define DESC1_T SCXRecord #define MUTABLES1_OFFSET_ALLFROZEN 0 #define MUTABLES1_OFFSET_STATE 1 #define MUTABLES1_MASK_ALLFROZEN 0x1 #define MUTABLES1_MASK_STATE 0x6 #define MUTABLES1_NEW(mutables) \ ((((mutables)&MASK1_SEQ)+(1<::STATE_INPROGRESS<::STATE_COMMITTED< * entry; char padding3[PREFETCH_SIZE_BYTES]; #define DUMMY ((SCXRecord*) (void*) TAGPTR1_STATIC_DESC(0)) #define FINALIZED ((SCXRecord*) (void*) TAGPTR1_DUMMY_DESC(1)) #define FAILED ((SCXRecord*) (void*) TAGPTR1_DUMMY_DESC(2)) #define arraycopy(src, srcStart, dest, destStart, len) \ for (int ___i=0;___i<(len);++___i) { \ (dest)[(destStart)+___i] = (src)[(srcStart)+___i]; \ } #define arraycopy_ptrs(src, srcStart, dest, destStart, len) \ for (int ___i=0;___i<(len);++___i) { \ rqProvider->write_addr(tid, &(dest)[(destStart)+___i], \ rqProvider->read_addr(tid, &(src)[(srcStart)+___i])); \ } private: void* doInsert(const int tid, const K& key, void * const value, const bool replace); // returns true if the invocation of this method // (and not another invocation of a method performed by this method) // performed an scx, and false otherwise bool fixWeightViolation(const int tid, Node* viol); // returns true if the invocation of this method // (and not another invocation of a method performed by this method) // performed an scx, and false otherwise bool fixDegreeViolation(const int tid, Node* viol); bool llx(const int tid, Node* r, Node ** snapshot, const int i, SCXRecord ** ops, Node ** nodes); SCXRecord* llx(const int tid, Node* r, Node ** snapshot); bool scx(const int tid, wrapper_info * info); void helpOther(const int tid, tagptr_t tagptr); int help(const int tid, const tagptr_t tagptr, SCXRecord const * const snap, const bool helpingOther); SCXRecord* createSCXRecord(const int tid, wrapper_info * info); Node* allocateNode(const int tid); void freeSubtree(Node* node, int* nodes) { const int tid = 0; if (node == NULL) return; if (!node->isLeaf()) { for (int i=0;igetABDegree();++i) { freeSubtree(node->ptrs[i], nodes); } } ++(*nodes); recordmgr->retire(tid, node); } int init[MAX_TID_POW2] = {0,}; public: void * const NO_VALUE; const int NUM_PROCESSES; #ifdef USE_DEBUGCOUNTERS debugCounters * const counters; // debug info #endif /** * This function must be called once by each thread that will * invoke any functions on this class. * * It must be okay that we do this with the main thread and later with another thread! */ void initThread(const int tid) { if (init[tid]) return; else init[tid] = !init[tid]; recordmgr->initThread(tid); rqProvider->initThread(tid); } void deinitThread(const int tid) { if (!init[tid]) return; else init[tid] = !init[tid]; rqProvider->deinitThread(tid); recordmgr->deinitThread(tid); } /** * Creates a new relaxed (a,b)-tree wherein:
* each internal node has up to DEGREE child pointers, and
* each leaf has up to DEGREE key/value pairs, and
* keys are ordered according to the provided comparator. */ abtree(const int numProcesses, const K anyKey, int suspectedCrashSignal = SIGQUIT) : ALLOW_ONE_EXTRA_SLACK_PER_NODE(true) , b(DEGREE) , a(DEGREE/2 - 2) , recordmgr(new RecManager(numProcesses, suspectedCrashSignal)) , rqProvider(new RQProvider, abtree, RecManager, false, false>(numProcesses, this, recordmgr)) , NO_VALUE((void *) -1LL) , NUM_PROCESSES(numProcesses) { cmp = Compare(); const int tid = 0; initThread(tid); recordmgr->enterQuiescentState(tid); DESC1_INIT_ALL(numProcesses); SCXRecord *dummy = TAGPTR1_UNPACK_PTR(DUMMY); dummy->c.mutables = MUTABLES1_INIT_DUMMY; TRACE COUTATOMICTID("DUMMY mutables="<c.mutables<* _entryLeft = allocateNode(tid); _entryLeft->scxPtr = DUMMY; _entryLeft->leaf = true; _entryLeft->marked = false; _entryLeft->weight = true; _entryLeft->size = 0; _entryLeft->searchKey = anyKey; Node* _entry = allocateNode(tid); _entry = allocateNode(tid); _entry->scxPtr = DUMMY; _entry->leaf = false; _entry->marked = false; _entry->weight = true; _entry->size = 1; _entry->searchKey = anyKey; _entry->ptrs[0] = _entryLeft; // need to simulate real insertion of root and the root's child, // since range queries will actually try to add these nodes, // and we don't want blocking rq providers to spin forever // waiting for their itimes to be set to a positive number. Node* insertedNodes[] = {_entry, _entryLeft, NULL}; Node* deletedNodes[] = {NULL}; rqProvider->linearize_update_at_write(tid, &entry, _entry, insertedNodes, deletedNodes); } #ifdef ABTREE_ENABLE_DESTRUCTOR ~abtree() { int nodes = 0; freeSubtree(entry, &nodes); // COUTATOMIC("main thread: deleted tree containing "<printStatus(); delete recordmgr; } #endif Node * debug_getEntryPoint() { return entry; } private: /******************************************************************* * Utility functions for integration with the test harness *******************************************************************/ int sequentialSize(Node* node) { if (node->isLeaf()) { return node->getKeyCount(); } int retval = 0; for (int i=0;igetABDegree();++i) { Node* child = node->ptrs[i]; retval += sequentialSize(child); } return retval; } int sequentialSize() { return sequentialSize(entry->ptrs[0]); } int getNumberOfLeaves(Node* node) { if (node == NULL) return 0; if (node->isLeaf()) return 1; int result = 0; for (int i=0;igetABDegree();++i) { result += getNumberOfLeaves(node->ptrs[i]); } return result; } const int getNumberOfLeaves() { return getNumberOfLeaves(entry->ptrs[0]); } int getNumberOfInternals(Node* node) { if (node == NULL) return 0; if (node->isLeaf()) return 0; int result = 1; for (int i=0;igetABDegree();++i) { result += getNumberOfInternals(node->ptrs[i]); } return result; } const int getNumberOfInternals() { return getNumberOfInternals(entry->ptrs[0]); } const int getNumberOfNodes() { return getNumberOfLeaves() + getNumberOfInternals(); } int getSumOfKeyDepths(Node* node, int depth) { if (node == NULL) return 0; if (node->isLeaf()) return depth * node->getKeyCount(); int result = 0; for (int i=0;igetABDegree();i++) { result += getSumOfKeyDepths(node->ptrs[i], 1+depth); } return result; } const int getSumOfKeyDepths() { return getSumOfKeyDepths(entry->ptrs[0], 0); } const double getAverageKeyDepth() { long sz = sequentialSize(); return (sz == 0) ? 0 : getSumOfKeyDepths() / sz; } int getHeight(Node* node, int depth) { if (node == NULL) return 0; if (node->isLeaf()) return 0; int result = 0; for (int i=0;igetABDegree();i++) { int retval = getHeight(node->ptrs[i], 1+depth); if (retval > result) result = retval; } return result+1; } const int getHeight() { return getHeight(entry->ptrs[0], 0); } int getKeyCount(Node* entry) { if (entry == NULL) return 0; if (entry->isLeaf()) return entry->getKeyCount(); int sum = 0; for (int i=0;igetABDegree();++i) { sum += getKeyCount(entry->ptrs[i]); } return sum; } int getTotalDegree(Node* entry) { if (entry == NULL) return 0; int sum = entry->getKeyCount(); if (entry->isLeaf()) return sum; for (int i=0;igetABDegree();++i) { sum += getTotalDegree(entry->ptrs[i]); } return 1+sum; // one more children than keys } int getNodeCount(Node* entry) { if (entry == NULL) return 0; if (entry->isLeaf()) return 1; int sum = 1; for (int i=0;igetABDegree();++i) { sum += getNodeCount(entry->ptrs[i]); } return sum; } double getAverageDegree() { return getTotalDegree(entry) / (double) getNodeCount(entry); } double getSpacePerKey() { return getNodeCount(entry)*2*b / (double) getKeyCount(entry); } long long getSumOfKeys(Node* node) { TRACE COUTATOMIC(" getSumOfKeys("< * node) { return false; } inline int getKeys(const int tid, Node * node, K * const outputKeys, void ** const outputValues) { if (node->isLeaf()) { // leaf ==> its keys are in the set. const int sz = node->getKeyCount(); for (int i=0;ikeys[i]; outputValues[i] = (void *) node->ptrs[i]; } return sz; } // note: internal ==> its keys are NOT in the set return 0; } bool isInRange(const K& key, const K& lo, const K& hi) { return (!cmp(key, lo) && !cmp(hi, key)); } /** * END FUNCTIONS FOR RANGE QUERY SUPPORT */ long long getSizeInNodes() { return getNumberOfNodes(); } string getSizeString() { stringstream ss; int preallocated = wrapper_info::MAX_NODES * recordmgr->NUM_PROCESSES; ss< * node) { return sequentialSize(node); } long long getSize() { return sequentialSize(); } RecManager * const debugGetRecMgr() { return recordmgr; } long long debugKeySum() { return getSumOfKeys(); } }; } // namespace #endif /* ABTREE_H */ ================================================ FILE: datastructures/trevor_brown_abtree/ds/brown_ext_abtree_lf/brown_ext_abtree_lf_adapter.h ================================================ /* * File: bst_adapter.h * Author: trbot * * Created on August 31, 2017, 6:53 PM */ #ifndef BST_ADAPTER_H #define BST_ADAPTER_H #include #include "brown_ext_abtree_lf_impl.h" #include "errors.h" using namespace abtree_ns; #define RECORD_MANAGER_T record_manager> #define DATA_STRUCTURE_T abtree, RECORD_MANAGER_T> template , class Alloc = allocator_new, class Pool = pool_none> class ds_adapter { private: const void * NO_VALUE; DATA_STRUCTURE_T * const ds; public: ds_adapter(const int numThreads, const K ANY_KEY) : ds(new DATA_STRUCTURE_T(numThreads, ANY_KEY)) {} ~ds_adapter() { delete ds; } void * getNoValue() { return ds->NO_VALUE; } void initThread(const int tid) { ds->initThread(tid); } void deinitThread(const int tid) { ds->deinitThread(tid); } bool contains(const int tid, const K& key) { return ds->contains(tid, key); } void * const insert(const int tid, const K& key, void * const val) { return ds->insert(tid, key, val); } void * const insertIfAbsent(const int tid, const K& key, void * const val) { return ds->insertIfAbsent(tid, key, val); } void * const erase(const int tid, const K& key) { return ds->erase(tid, key).first; } void * find(const int tid, const K& key) { return ds->find(tid, key).first; } int rangeQuery(const int tid, const K& lo, const K& hi, K * const resultKeys, void ** const resultValues) { return ds->rangeQuery(tid, lo, hi, resultKeys, resultValues); } /** * Sequential operation to get the number of keys in the set */ int getSize() { return ds->getSize(); } void printSummary() { stringstream ss; ss<getSizeInNodes()<<" nodes in tree"; cout<debugGetRecMgr(); recmgr->printStatus(); } long long getKeyChecksum() { return ds->debugKeySum(); } bool validateStructure() { return true; } void printObjectSizes() { std::cout<<"sizes: node=" <<(sizeof(Node)) <<" descriptor="<<(sizeof(SCXRecord))<<" (statically allocated)" <. */ /** * Implementation note: * The ptrs arrays of internal nodes may be modified by calls to * rqProvider->linearize_update_at_cas or ->linearize_update_at_write. * Consequently, we must access access entries in the ptrs arrays of INTERNAL * nodes by performing calls to read_addr and write_addr (and linearize_...). * * However, the ptrs arrays of leaves represent fundamentally different data: * specifically values, or pointers to values, and NOT pointers to nodes. * Thus, the ptrs arrays of leaves CANNOT be modified by such calls. * So, we do NOT use these functions to access entries in leaves' ptrs arrays. */ #ifndef ABTREE_IMPL_H #define ABTREE_IMPL_H #include "brown_ext_abtree_lf.h" #define eassert(x, y) if ((x) != (y)) { cout<<"ERROR: "<<#x<<" != "<<#y<<" :: "<<#x<<"="<init_node(tid, newnode); #ifdef __HANDLE_STATS GSTATS_APPEND(tid, node_allocated_addresses, ((long long) newnode)%(1<<12)); #endif return newnode; } /** * Returns the value associated with key, or NULL if key is not present. */ template const pair abtree_ns::abtree::find(const int tid, const K& key) { pair result; this->recordmgr->leaveQuiescentState(tid); Node * l = rqProvider->read_addr(tid, &entry->ptrs[0]); while (!l->isLeaf()) { int ix = l->getChildIndex(key, cmp); l = rqProvider->read_addr(tid, &l->ptrs[ix]); } int index = l->getKeyIndex(key, cmp); if (index < l->getKeyCount() && l->keys[index] == key) { result.first = l->ptrs[index]; // this is a value, not a pointer, so it cannot be modified by rqProvider->linearize_update_at_..., so we do not use read_addr result.second = true; } else { result.first = NO_VALUE; result.second = false; } this->recordmgr->enterQuiescentState(tid); return result; } template bool abtree_ns::abtree::contains(const int tid, const K& key) { return find(tid, key).second; } template int abtree_ns::abtree::rangeQuery(const int tid, const K& lo, const K& hi, K * const resultKeys, void ** const resultValues) { block> stack (NULL); recordmgr->leaveQuiescentState(tid); rqProvider->traversal_start(tid); // depth first traversal (of interesting subtrees) int size = 0; TRACE COUTATOMICTID("rangeQuery(lo="< * node = stack.pop(); assert(node); // if leaf node, check if we should add its keys to the traversal if (node->isLeaf()) { rqProvider->traversal_try_add(tid, node, resultKeys, resultValues, &size, lo, hi); // else if internal node, explore its children } else { // find right-most sub-tree that could contain a key in [lo, hi] int nkeys = node->getKeyCount(); int r = nkeys; while (r > 0 && cmp(hi, (const K&) node->keys[r-1])) --r; // subtree rooted at node->ptrs[r] contains only keys > hi // find left-most sub-tree that could contain a key in [lo, hi] int l = 0; while (l < nkeys && !cmp(lo, (const K&) node->keys[l])) ++l; // subtree rooted at node->ptrs[l] contains only keys < lo // perform DFS from left to right (so push onto stack from right to left) for (int i=r;i>=l; --i) stack.push(rqProvider->read_addr(tid, &node->ptrs[i])); // // simply explore EVERYTHING // for (int i=0;igetABDegree();++i) { // stack.push(rqProvider->read_addr(tid, &node->ptrs[i])); // } } } // success rqProvider->traversal_end(tid, resultKeys, resultValues, &size, lo, hi); recordmgr->enterQuiescentState(tid); return size; } template void* abtree_ns::abtree::doInsert(const int tid, const K& key, void * const value, const bool replace) { wrapper_info _info; wrapper_info* info = &_info; while (true) { /** * search */ this->recordmgr->leaveQuiescentState(tid); Node* gp = NULL; Node* p = entry; Node* l = rqProvider->read_addr(tid, &p->ptrs[0]); int ixToP = -1; int ixToL = 0; while (!l->isLeaf()) { ixToP = ixToL; ixToL = l->getChildIndex(key, cmp); gp = p; p = l; l = rqProvider->read_addr(tid, &l->ptrs[ixToL]); } /** * do the update */ int keyIndex = l->getKeyIndex(key, cmp); if (keyIndex < l->getKeyCount() && l->keys[keyIndex] == key) { /** * if l already contains key, replace the existing value */ void* const oldValue = l->ptrs[keyIndex]; // this is a value, not a pointer, so it cannot be modified by rqProvider->linearize_update_at_..., so we do not use read_addr if (!replace) { this->recordmgr->enterQuiescentState(tid); return oldValue; } // perform LLXs if (!llx(tid, p, NULL, 0, info->scxPtrs, info->nodes) || rqProvider->read_addr(tid, &p->ptrs[ixToL]) != l) { this->recordmgr->enterQuiescentState(tid); continue; // retry the search } info->nodes[1] = l; // create new node(s) Node* n = allocateNode(tid); arraycopy(l->keys, 0, n->keys, 0, l->getKeyCount()); arraycopy(l->ptrs, 0, n->ptrs, 0, l->getABDegree()); // although we are copying l->ptrs, since l is a leaf, l->ptrs CANNOT contain modified by rqProvider->linearize_update_at_..., so we do not use arraycopy_ptrs. n->ptrs[keyIndex] = (Node*) value; // similarly, we don't use write_addr here n->leaf = true; n->marked = false; n->scxPtr = DUMMY; n->searchKey = l->searchKey; n->size = l->size; n->weight = true; // construct info record to pass to SCX info->numberOfNodes = 2; info->numberOfNodesAllocated = 1; info->numberOfNodesToFreeze = 1; info->field = &p->ptrs[ixToL]; info->newNode = n; info->insertedNodes[0] = n; info->insertedNodes[1] = NULL; info->deletedNodes[0] = l; info->deletedNodes[1] = NULL; if (scx(tid, info)) { TRACE COUTATOMICTID("replace pair ("<recordmgr->enterQuiescentState(tid); return oldValue; } TRACE COUTATOMICTID("replace pair ("<recordmgr->enterQuiescentState(tid); this->recordmgr->deallocate(tid, n); } else { /** * if l does not contain key, we have to insert it */ // perform LLXs if (!llx(tid, p, NULL, 0, info->scxPtrs, info->nodes) || rqProvider->read_addr(tid, &p->ptrs[ixToL]) != l) { this->recordmgr->enterQuiescentState(tid); continue; // retry the search } info->nodes[1] = l; if (l->getKeyCount() < b) { /** * Insert pair */ // create new node(s) Node* n = allocateNode(tid); arraycopy(l->keys, 0, n->keys, 0, keyIndex); arraycopy(l->keys, keyIndex, n->keys, keyIndex+1, l->getKeyCount()-keyIndex); n->keys[keyIndex] = key; arraycopy(l->ptrs, 0, n->ptrs, 0, keyIndex); // although we are copying the ptrs array, since the source node is a leaf, ptrs CANNOT contain modified by rqProvider->linearize_update_at_..., so we do not use arraycopy_ptrs. arraycopy(l->ptrs, keyIndex, n->ptrs, keyIndex+1, l->getABDegree()-keyIndex); n->ptrs[keyIndex] = (Node*) value; // similarly, we don't use write_addr here n->leaf = l->leaf; n->marked = false; n->scxPtr = DUMMY; n->searchKey = l->searchKey; n->size = l->size+1; n->weight = l->weight; // construct info record to pass to SCX info->numberOfNodes = 2; info->numberOfNodesAllocated = 1; info->numberOfNodesToFreeze = 1; info->field = &p->ptrs[ixToL]; info->newNode = n; info->insertedNodes[0] = n; info->insertedNodes[1] = NULL; info->deletedNodes[0] = l; info->deletedNodes[1] = NULL; if (scx(tid, info)) { TRACE COUTATOMICTID("insert pair ("<recordmgr->enterQuiescentState(tid); return NO_VALUE; } TRACE COUTATOMICTID("insert pair ("<recordmgr->enterQuiescentState(tid); this->recordmgr->deallocate(tid, n); } else { // assert: l->getKeyCount() == DEGREE == b) /** * Overflow */ // first, we create a pair of large arrays // containing too many keys and pointers to fit in a single node K keys[DEGREE+1]; Node* ptrs[DEGREE+1]; arraycopy(l->keys, 0, keys, 0, keyIndex); arraycopy(l->keys, keyIndex, keys, keyIndex+1, l->getKeyCount()-keyIndex); keys[keyIndex] = key; arraycopy(l->ptrs, 0, ptrs, 0, keyIndex); // although we are copying the ptrs array, since the source node is a leaf, ptrs CANNOT contain modified by rqProvider->linearize_update_at_..., so we do not use arraycopy_ptrs. arraycopy(l->ptrs, keyIndex, ptrs, keyIndex+1, l->getABDegree()-keyIndex); ptrs[keyIndex] = (Node*) value; // create new node(s): // since the new arrays are too big to fit in a single node, // we replace l by a new subtree containing three new nodes: // a parent, and two leaves; // the array contents are then split between the two new leaves const int size1 = (DEGREE+1)/2; Node* left = allocateNode(tid); arraycopy(keys, 0, left->keys, 0, size1); arraycopy(ptrs, 0, left->ptrs, 0, size1); // although we are copying the ptrs array, since the node is a leaf, ptrs CANNOT contain modified by rqProvider->linearize_update_at_..., so we do not use arraycopy_ptrs. left->leaf = true; left->marked = false; left->scxPtr = DUMMY; left->searchKey = keys[0]; left->size = size1; left->weight = true; const int size2 = (DEGREE+1) - size1; Node* right = allocateNode(tid); arraycopy(keys, size1, right->keys, 0, size2); arraycopy(ptrs, size1, right->ptrs, 0, size2); // although we are copying the ptrs array, since the node is a leaf, ptrs CANNOT contain modified by rqProvider->linearize_update_at_..., so we do not use arraycopy_ptrs. right->leaf = true; right->marked = false; right->scxPtr = DUMMY; right->searchKey = keys[size1]; right->size = size2; right->weight = true; Node* n = allocateNode(tid); n->keys[0] = keys[size1]; rqProvider->write_addr(tid, &n->ptrs[0], left); rqProvider->write_addr(tid, &n->ptrs[1], right); n->leaf = false; n->marked = false; n->scxPtr = DUMMY; n->searchKey = keys[size1]; n->size = 2; n->weight = p == entry; // note: weight of new internal node n will be zero, // unless it is the root; this is because we test // p == entry, above; in doing this, we are actually // performing Root-Zero at the same time as this Overflow // if n will become the root (of the B-slack tree) // construct info record to pass to SCX info->numberOfNodes = 2; info->numberOfNodesAllocated = 3; info->numberOfNodesToFreeze = 1; info->field = &p->ptrs[ixToL]; info->newNode = n; info->insertedNodes[0] = n; info->insertedNodes[1] = left; info->insertedNodes[2] = right; info->insertedNodes[3] = NULL; info->deletedNodes[0] = l; info->deletedNodes[1] = NULL; if (scx(tid, info)) { TRACE COUTATOMICTID("insert overflow ("<recordmgr->enterQuiescentState(tid); return NO_VALUE; } TRACE COUTATOMICTID("insert overflow ("<recordmgr->enterQuiescentState(tid); this->recordmgr->deallocate(tid, n); this->recordmgr->deallocate(tid, left); this->recordmgr->deallocate(tid, right); } } } } template const pair abtree_ns::abtree::erase(const int tid, const K& key) { wrapper_info _info; wrapper_info* info = &_info; while (true) { /** * search */ this->recordmgr->leaveQuiescentState(tid); Node* gp = NULL; Node* p = entry; Node* l = rqProvider->read_addr(tid, &p->ptrs[0]); int ixToP = -1; int ixToL = 0; while (!l->isLeaf()) { ixToP = ixToL; ixToL = l->getChildIndex(key, cmp); gp = p; p = l; l = rqProvider->read_addr(tid, &l->ptrs[ixToL]); } /** * do the update */ const int keyIndex = l->getKeyIndex(key, cmp); if (keyIndex == l->getKeyCount() || l->keys[keyIndex] != key) { /** * if l does not contain key, we are done. */ this->recordmgr->enterQuiescentState(tid); return pair(NO_VALUE,false); } else { /** * if l contains key, replace l by a new copy that does not contain key. */ // perform LLXs if (!llx(tid, p, NULL, 0, info->scxPtrs, info->nodes) || rqProvider->read_addr(tid, &p->ptrs[ixToL]) != l) { this->recordmgr->enterQuiescentState(tid); continue; // retry the search } info->nodes[1] = l; // create new node(s) Node* n = allocateNode(tid); //printf("keyIndex=%d getABDegree-keyIndex=%d\n", keyIndex, l->getABDegree()-keyIndex); arraycopy(l->keys, 0, n->keys, 0, keyIndex); arraycopy(l->keys, keyIndex+1, n->keys, keyIndex, l->getKeyCount()-(keyIndex+1)); arraycopy(l->ptrs, 0, n->ptrs, 0, keyIndex); // although we are copying the ptrs array, since the node is a leaf, ptrs CANNOT contain modified by rqProvider->linearize_update_at_..., so we do not use arraycopy_ptrs. arraycopy(l->ptrs, keyIndex+1, n->ptrs, keyIndex, l->getABDegree()-(keyIndex+1)); n->leaf = true; n->marked = false; n->scxPtr = DUMMY; n->searchKey = l->keys[0]; // NOTE: WE MIGHT BE DELETING l->keys[0], IN WHICH CASE newL IS EMPTY. HOWEVER, newL CAN STILL BE LOCATED BY SEARCHING FOR l->keys[0], SO WE USE THAT AS THE searchKey FOR newL. n->size = l->size-1; n->weight = true; // construct info record to pass to SCX info->numberOfNodes = 2; info->numberOfNodesAllocated = 1; info->numberOfNodesToFreeze = 1; info->field = &p->ptrs[ixToL]; info->newNode = n; info->insertedNodes[0] = n; info->insertedNodes[1] = NULL; info->deletedNodes[0] = l; info->deletedNodes[1] = NULL; void* oldValue = l->ptrs[keyIndex]; // since the node is a leaf, ptrs is not modified by any call to rqProvider->linearize_update_at_..., so we do not need to use read_addr to access it if (scx(tid, info)) { TRACE COUTATOMICTID("delete pair ("<recordmgr->enterQuiescentState(tid); return pair(oldValue, true); } TRACE COUTATOMICTID("delete pair ("<recordmgr->enterQuiescentState(tid); this->recordmgr->deallocate(tid, n); } } } /** * * * IMPLEMENTATION OF REBALANCING * * */ template bool abtree_ns::abtree::fixWeightViolation(const int tid, Node* viol) { if (viol->weight) return false; // assert: viol is internal (because leaves always have weight = 1) // assert: viol is not entry or root (because both always have weight = 1) // do an optimistic check to see if viol was already removed from the tree if (llx(tid, viol, NULL) == FINALIZED) { // recall that nodes are finalized precisely when // they are removed from the tree // we hand off responsibility for any violations at viol to the // process that removed it. return false; } wrapper_info _info; wrapper_info* info = &_info; // try to locate viol, and fix any weight violation at viol while (true) { const K k = viol->searchKey; Node* gp = NULL; Node* p = entry; Node* l = rqProvider->read_addr(tid, &p->ptrs[0]); int ixToP = -1; int ixToL = 0; while (!l->isLeaf() && l != viol) { ixToP = ixToL; ixToL = l->getChildIndex(k, cmp); gp = p; p = l; l = rqProvider->read_addr(tid, &l->ptrs[ixToL]); } if (l != viol) { // l was replaced by another update. // we hand over responsibility for viol to that update. return false; } // we cannot apply this update if p has a weight violation // so, we check if this is the case, and, if so, try to fix it if (!p->weight) { fixWeightViolation(tid, p); continue; } // perform LLXs if (!llx(tid, gp, NULL, 0, info->scxPtrs, info->nodes) || rqProvider->read_addr(tid, &gp->ptrs[ixToP]) != p) continue; // retry the search if (!llx(tid, p, NULL, 1, info->scxPtrs, info->nodes) || rqProvider->read_addr(tid, &p->ptrs[ixToL]) != l) continue; // retry the search if (!llx(tid, l, NULL, 2, info->scxPtrs, info->nodes)) continue; // retry the search const int c = p->getABDegree() + l->getABDegree(); const int size = c-1; if (size <= b) { /** * Absorb */ // create new node(s) // the new arrays are small enough to fit in a single node, // so we replace p by a new internal node. Node* n = allocateNode(tid); arraycopy_ptrs(p->ptrs, 0, n->ptrs, 0, ixToL); // p and l are both internal, so we use arraycopy_ptrs arraycopy_ptrs(l->ptrs, 0, n->ptrs, ixToL, l->getABDegree()); arraycopy_ptrs(p->ptrs, ixToL+1, n->ptrs, ixToL+l->getABDegree(), p->getABDegree()-(ixToL+1)); arraycopy(p->keys, 0, n->keys, 0, ixToL); arraycopy(l->keys, 0, n->keys, ixToL, l->getKeyCount()); arraycopy(p->keys, ixToL, n->keys, ixToL+l->getKeyCount(), p->getKeyCount()-ixToL); n->leaf = false; assert(!l->isLeaf()); n->marked = false; n->scxPtr = DUMMY; n->searchKey = n->keys[0]; n->size = size; n->weight = true; // construct info record to pass to SCX info->numberOfNodes = 3; info->numberOfNodesAllocated = 1; info->numberOfNodesToFreeze = 3; info->field = &gp->ptrs[ixToP]; info->newNode = n; // info->insertedNodes[0] = info->deletedNodes[0] = NULL; info->insertedNodes[0] = n; info->insertedNodes[1] = NULL; info->deletedNodes[0] = p; info->deletedNodes[1] = l; info->deletedNodes[2] = NULL; if (scx(tid, info)) { TRACE COUTATOMICTID("absorb: SCX succeeded"< eliminated // slack at pi(u) -> eliminated or slack at n // weight at u -> eliminated // no degree at u // slack at u -> slack at n /** * Compress may be needed at the new internal node we created * (since we move grandchildren from two parents together). */ fixDegreeViolation(tid, n); return true; } TRACE COUTATOMICTID("absorb: SCX FAILED"<recordmgr->deallocate(tid, n); } else { /** * Split */ // merge keys of p and l into one big array (and similarly for children) // (we essentially replace the pointer to l with the contents of l) K keys[2*DEGREE]; Node* ptrs[2*DEGREE]; arraycopy_ptrs(p->ptrs, 0, ptrs, 0, ixToL); // p and l are both internal, so we use arraycopy_ptrs arraycopy_ptrs(l->ptrs, 0, ptrs, ixToL, l->getABDegree()); arraycopy_ptrs(p->ptrs, ixToL+1, ptrs, ixToL+l->getABDegree(), p->getABDegree()-(ixToL+1)); arraycopy(p->keys, 0, keys, 0, ixToL); arraycopy(l->keys, 0, keys, ixToL, l->getKeyCount()); arraycopy(p->keys, ixToL, keys, ixToL+l->getKeyCount(), p->getKeyCount()-ixToL); // the new arrays are too big to fit in a single node, // so we replace p by a new internal node and two new children. // // we take the big merged array and split it into two arrays, // which are used to create two new children u and v. // we then create a new internal node (whose weight will be zero // if it is not the root), with u and v as its children. // create new node(s) const int size1 = size / 2; Node* left = allocateNode(tid); arraycopy(keys, 0, left->keys, 0, size1-1); arraycopy_ptrs(ptrs, 0, left->ptrs, 0, size1); left->leaf = false; assert(!l->isLeaf()); left->marked = false; left->scxPtr = DUMMY; left->searchKey = keys[0]; left->size = size1; left->weight = true; const int size2 = size - size1; Node* right = allocateNode(tid); arraycopy(keys, size1, right->keys, 0, size2-1); arraycopy_ptrs(ptrs, size1, right->ptrs, 0, size2); right->leaf = false; right->marked = false; right->scxPtr = DUMMY; right->searchKey = keys[size1]; right->size = size2; right->weight = true; Node* n = allocateNode(tid); n->keys[0] = keys[size1-1]; rqProvider->write_addr(tid, &n->ptrs[0], left); rqProvider->write_addr(tid, &n->ptrs[1], right); n->leaf = false; n->marked = false; n->scxPtr = DUMMY; n->searchKey = keys[size1-1]; // note: should be the same as n->keys[0] n->size = 2; n->weight = (gp == entry); // note: weight of new internal node n will be zero, // unless it is the root; this is because we test // gp == entry, above; in doing this, we are actually // performing Root-Zero at the same time as this Overflow // if n will become the root (of the B-slack tree) // construct info record to pass to SCX info->numberOfNodes = 3; info->numberOfNodesAllocated = 3; info->numberOfNodesToFreeze = 3; info->field = &gp->ptrs[ixToP]; info->newNode = n; // info->insertedNodes[0] = info->deletedNodes[0] = NULL; info->insertedNodes[0] = n; info->insertedNodes[1] = left; info->insertedNodes[2] = right; info->insertedNodes[3] = NULL; info->deletedNodes[0] = p; info->deletedNodes[1] = l; info->deletedNodes[2] = NULL; if (scx(tid, info)) { TRACE COUTATOMICTID("split: SCX succeeded"<recordmgr->deallocate(tid, n); this->recordmgr->deallocate(tid, left); this->recordmgr->deallocate(tid, right); } } } template bool abtree_ns::abtree::fixDegreeViolation(const int tid, Node* viol) { if (viol->getABDegree() >= a || viol == entry || viol == rqProvider->read_addr(tid, &entry->ptrs[0])) { return false; // no degree violation at viol } // do an optimistic check to see if viol was already removed from the tree if (llx(tid, viol, NULL) == FINALIZED) { // recall that nodes are finalized precisely when // they are removed from the tree. // we hand off responsibility for any violations at viol to the // process that removed it. return false; } wrapper_info _info; wrapper_info* info = &_info; // we search for viol and try to fix any violation we find there // this entails performing AbsorbSibling or Distribute. while (true) { /** * search for viol */ const K k = viol->searchKey; Node* gp = NULL; Node* p = entry; Node* l = rqProvider->read_addr(tid, &p->ptrs[0]); int ixToP = -1; int ixToL = 0; while (!l->isLeaf() && l != viol) { ixToP = ixToL; ixToL = l->getChildIndex(k, cmp); gp = p; p = l; l = rqProvider->read_addr(tid, &l->ptrs[ixToL]); } if (l != viol) { // l was replaced by another update. // we hand over responsibility for viol to that update. return false; } // assert: gp != NULL (because if AbsorbSibling or Distribute can be applied, then p is not the root) // perform LLXs if (!llx(tid, gp, NULL, 0, info->scxPtrs, info->nodes) || rqProvider->read_addr(tid, &gp->ptrs[ixToP]) != p) continue; // retry the search if (!llx(tid, p, NULL, 1, info->scxPtrs, info->nodes) || rqProvider->read_addr(tid, &p->ptrs[ixToL]) != l) continue; // retry the search int ixToS = (ixToL > 0 ? ixToL-1 : 1); Node* s = rqProvider->read_addr(tid, &p->ptrs[ixToS]); // we can only apply AbsorbSibling or Distribute if there are no // weight violations at p, l or s. // so, we first check for any weight violations, // and fix any that we see. bool foundWeightViolation = false; if (!p->weight) { foundWeightViolation = true; fixWeightViolation(tid, p); } if (!l->weight) { foundWeightViolation = true; fixWeightViolation(tid, l); } if (!s->weight) { foundWeightViolation = true; fixWeightViolation(tid, s); } // if we see any weight violations, then either we fixed one, // removing one of these nodes from the tree, // or one of the nodes has been removed from the tree by another // rebalancing step, so we retry the search for viol if (foundWeightViolation) continue; // assert: there are no weight violations at p, l or s // assert: l and s are either both leaves or both internal nodes // (because there are no weight violations at these nodes) // also note that p->size >= a >= 2 Node* left; Node* right; int leftindex; int rightindex; if (ixToL < ixToS) { if (!llx(tid, l, NULL, 2, info->scxPtrs, info->nodes)) continue; // retry the search if (!llx(tid, s, NULL, 3, info->scxPtrs, info->nodes)) continue; // retry the search left = l; right = s; leftindex = ixToL; rightindex = ixToS; } else { if (!llx(tid, s, NULL, 2, info->scxPtrs, info->nodes)) continue; // retry the search if (!llx(tid, l, NULL, 3, info->scxPtrs, info->nodes)) continue; // retry the search left = s; right = l; leftindex = ixToS; rightindex = ixToL; } int sz = left->getABDegree() + right->getABDegree(); assert(left->weight && right->weight); if (sz < 2*a) { /** * AbsorbSibling */ // create new node(s)) Node* newl = allocateNode(tid); int k1=0, k2=0; for (int i=0;igetKeyCount();++i) { newl->keys[k1++] = left->keys[i]; } for (int i=0;igetABDegree();++i) { if (left->isLeaf()) { newl->ptrs[k2++] = left->ptrs[i]; } else { //assert(left->getKeyCount() != left->getABDegree()); rqProvider->write_addr(tid, &newl->ptrs[k2++], rqProvider->read_addr(tid, &left->ptrs[i])); } } if (!left->isLeaf()) newl->keys[k1++] = p->keys[leftindex]; for (int i=0;igetKeyCount();++i) { newl->keys[k1++] = right->keys[i]; } for (int i=0;igetABDegree();++i) { if (right->isLeaf()) { newl->ptrs[k2++] = right->ptrs[i]; } else { rqProvider->write_addr(tid, &newl->ptrs[k2++], rqProvider->read_addr(tid, &right->ptrs[i])); } } newl->leaf = left->isLeaf(); newl->marked = false; newl->scxPtr = DUMMY; newl->searchKey = l->searchKey; newl->size = l->getABDegree() + s->getABDegree(); newl->weight = true; assert(left->weight && right->weight && p->weight); // now, we atomically replace p and its children with the new nodes. // if appropriate, we perform RootAbsorb at the same time. if (gp == entry && p->getABDegree() == 2) { // construct info record to pass to SCX info->numberOfNodes = 4; // gp + p + l + s info->numberOfNodesAllocated = 1; // newl info->numberOfNodesToFreeze = 4; // gp + p + l + s info->field = &gp->ptrs[ixToP]; info->newNode = newl; info->insertedNodes[0] = newl; info->insertedNodes[1] = NULL; info->deletedNodes[0] = p; info->deletedNodes[1] = l; info->deletedNodes[2] = s; info->deletedNodes[3] = NULL; if (scx(tid, info)) { TRACE COUTATOMICTID("absorbsibling AND rootabsorb: SCX succeeded"<recordmgr->deallocate(tid, newl); } else { assert(gp != entry || p->getABDegree() > 2); // create n from p by: // 1. skipping the key for leftindex and child pointer for ixToS // 2. replacing l with newl Node* n = allocateNode(tid); for (int i=0;ikeys[i] = p->keys[i]; } for (int i=0;iwrite_addr(tid, &n->ptrs[i], rqProvider->read_addr(tid, &p->ptrs[i])); // n and p are internal, so their ptrs arrays might have entries that are being modified by rqProvider->linearize_update_at_..., so we use read_addr and write_addr } for (int i=leftindex+1;igetKeyCount();++i) { n->keys[i-1] = p->keys[i]; } for (int i=ixToL+1;igetABDegree();++i) { rqProvider->write_addr(tid, &n->ptrs[i-1], rqProvider->read_addr(tid, &p->ptrs[i])); // n and p are internal, so their ptrs arrays might have entries that are being modified by rqProvider->linearize_update_at_..., so we use read_addr and write_addr } // replace l with newl rqProvider->write_addr(tid, &n->ptrs[ixToL - (ixToL > ixToS)], newl); n->leaf = false; n->marked = false; n->scxPtr = DUMMY; n->searchKey = p->searchKey; n->size = p->getABDegree()-1; n->weight = true; // construct info record to pass to SCX info->numberOfNodes = 4; // gp + p + l + s info->numberOfNodesAllocated = 2; // n + newl info->numberOfNodesToFreeze = 4; // gp + p + l + s info->field = &gp->ptrs[ixToP]; info->newNode = n; info->insertedNodes[0] = n; info->insertedNodes[1] = newl; info->insertedNodes[2] = NULL; info->deletedNodes[0] = p; info->deletedNodes[1] = l; info->deletedNodes[2] = s; info->deletedNodes[3] = NULL; if (scx(tid, info)) { TRACE COUTATOMICTID("absorbsibling: SCX succeeded"<recordmgr->deallocate(tid, newl); this->recordmgr->deallocate(tid, n); } } else { /** * Distribute */ int leftsz = sz/2; int rightsz = sz-leftsz; // create new node(s)) Node* n = allocateNode(tid); Node* newleft = allocateNode(tid); Node* newright = allocateNode(tid); // combine the contents of l and s (and one key from p if l and s are internal) K keys[2*DEGREE]; Node* ptrs[2*DEGREE]; int k1=0, k2=0; for (int i=0;igetKeyCount();++i) { keys[k1++] = left->keys[i]; } for (int i=0;igetABDegree();++i) { if (left->isLeaf()) { ptrs[k2++] = left->ptrs[i]; } else { ptrs[k2++] = rqProvider->read_addr(tid, &left->ptrs[i]); } } if (!left->isLeaf()) keys[k1++] = p->keys[leftindex]; for (int i=0;igetKeyCount();++i) { keys[k1++] = right->keys[i]; } for (int i=0;igetABDegree();++i) { if (right->isLeaf()) { ptrs[k2++] = right->ptrs[i]; } else { ptrs[k2++] = rqProvider->read_addr(tid, &right->ptrs[i]); } } // distribute contents between newleft and newright k1=0; k2=0; for (int i=0;iisLeaf();++i) { newleft->keys[i] = keys[k1++]; } for (int i=0;iisLeaf()) { newleft->ptrs[i] = ptrs[k2++]; } else { rqProvider->write_addr(tid, &newleft->ptrs[i], ptrs[k2++]); } } newleft->leaf = left->isLeaf(); newleft->marked = false; newleft->scxPtr = DUMMY; newleft->searchKey = newleft->keys[0]; newleft->size = leftsz; newleft->weight = true; // reserve one key for the parent (to go between newleft and newright) K keyp = keys[k1]; if (!left->isLeaf()) ++k1; for (int i=0;iisLeaf();++i) { newright->keys[i] = keys[k1++]; } for (int i=0;iisLeaf()) { newright->ptrs[i] = ptrs[k2++]; } else { rqProvider->write_addr(tid, &newright->ptrs[i], ptrs[k2++]); } } newright->leaf = right->isLeaf(); newright->marked = false; newright->scxPtr = DUMMY; newright->searchKey = newright->keys[0]; newright->size = rightsz; newright->weight = true; // create n from p by replacing left with newleft and right with newright, // and replacing one key (between these two pointers) for (int i=0;igetKeyCount();++i) { n->keys[i] = p->keys[i]; } for (int i=0;igetABDegree();++i) { rqProvider->write_addr(tid, &n->ptrs[i], rqProvider->read_addr(tid, &p->ptrs[i])); // n and p are internal, so their ptrs arrays might have entries that are being modified by rqProvider->linearize_update_at_..., so we use read_addr and write_addr } n->keys[leftindex] = keyp; rqProvider->write_addr(tid, &n->ptrs[leftindex], newleft); rqProvider->write_addr(tid, &n->ptrs[rightindex], newright); n->leaf = false; n->marked = false; n->scxPtr = DUMMY; n->searchKey = p->searchKey; n->size = p->size; n->weight = true; // construct info record to pass to SCX info->numberOfNodes = 4; // gp + p + l + s info->numberOfNodesAllocated = 3; // n + newleft + newright info->numberOfNodesToFreeze = 4; // gp + p + l + s info->field = &gp->ptrs[ixToP]; info->newNode = n; info->insertedNodes[0] = n; info->insertedNodes[1] = newleft; info->insertedNodes[2] = newright; info->insertedNodes[3] = NULL; info->deletedNodes[0] = p; info->deletedNodes[1] = l; info->deletedNodes[2] = s; info->deletedNodes[3] = NULL; if (scx(tid, info)) { TRACE COUTATOMICTID("distribute: SCX succeeded"<recordmgr->deallocate(tid, n); this->recordmgr->deallocate(tid, newleft); this->recordmgr->deallocate(tid, newright); } } } /** * * IMPLEMENTATION OF LLX AND SCX * * */ template bool abtree_ns::abtree::llx(const int tid, Node* r, Node ** snapshot, const int i, SCXRecord ** ops, Node ** nodes) { SCXRecord* result = llx(tid, r, snapshot); if (result == FAILED || result == FINALIZED) return false; ops[i] = result; nodes[i] = r; return true; } template abtree_ns::SCXRecord* abtree_ns::abtree::llx(const int tid, Node* r, Node ** snapshot) { const bool marked = r->marked; SOFTWARE_BARRIER; tagptr_t tagptr = (tagptr_t) r->scxPtr; // read mutable state field of descriptor bool succ; TRACE COUTATOMICTID("tagged ptr seq="<c.nodes[i]))<<" is a leaf\n"); assert(i > 0); // nodes[0] cannot be a leaf... continue; // do not freeze leaves } bool successfulCAS = __sync_bool_compare_and_swap(&snap->c.nodes[i]->scxPtr, snap->c.scxPtrsSeen[i], tagptr); SCXRecord *exp = snap->c.nodes[i]->scxPtr; // TRACE if (successfulCAS) COUTATOMICTID((helpingOther?" ":"")<<"help froze nodes["<c.nodes[i])<<" with tagptr="<c.nodes[i]->scxPtr)<c.mutables, tagptr, MUTABLES1_MASK_ALLFROZEN, MUTABLES1_OFFSET_ALLFROZEN); if (!succ) return SCXRecord::STATE_ABORTED; if (allFrozen) { TRACE COUTATOMICTID((helpingOther?" ":"")<<"help return state "<::STATE_COMMITTED<<" after failed freezing cas on nodes["<::STATE_COMMITTED; } else { const int newState = SCXRecord::STATE_ABORTED; TRACE COUTATOMICTID((helpingOther?" ":"")<<"help return state "<c.mutables, snap->c.mutables, newState, MUTABLES1_MASK_STATE, MUTABLES1_OFFSET_STATE); return newState; } } MUTABLES1_WRITE_BIT(ptr->c.mutables, snap->c.mutables, MUTABLES1_MASK_ALLFROZEN); SOFTWARE_BARRIER; for (int i=1; ic.numberOfNodesToFreeze; ++i) { if (snap->c.nodes[i]->isLeaf()) continue; // do not mark leaves snap->c.nodes[i]->marked = true; // finalize all but first node } // CAS in the new sub-tree (update CAS) rqProvider->linearize_update_at_cas(tid, snap->c.field, snap->c.nodes[1], snap->c.newNode, snap->c.insertedNodes, snap->c.deletedNodes); // __sync_bool_compare_and_swap(snap->c.field, snap->c.nodes[1], snap->c.newNode); TRACE COUTATOMICTID((helpingOther?" ":"")<<"help CAS'ed to newNode@0x"<<((uintptr_t)snap->c.newNode)<c.mutables, snap->c.mutables, SCXRecord::STATE_COMMITTED, MUTABLES1_MASK_STATE, MUTABLES1_OFFSET_STATE); TRACE COUTATOMICTID((helpingOther?" ":"")<<"help return COMMITTED after performing update cas"<::STATE_COMMITTED; // success } #endif /* ABTREE_IMPL_H */ ================================================ FILE: datastructures/trevor_brown_abtree/minimal_example.cpp ================================================ /** * Author: Trevor Brown (me [at] tbrown [dot] pro). * Copyright 2018. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, version 2 * of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #include #include #include #include "brown_ext_abtree_lf_adapter.h" int main(int argc, char** argv) { const int NODE_DEGREE = 16; const int ANY_KEY = 0; const int NUM_THREADS = 1; auto tree = new ds_adapter(NUM_THREADS, ANY_KEY); const int threadID = 0; tree->initThread(threadID); void * oldVal = tree->insertIfAbsent(threadID, 7, (void *) 1020); assert(oldVal == tree->getNoValue()); bool result = tree->contains(threadID, 7); assert(result); result = tree->contains(threadID, 8); assert(!result); void * val = tree->find(threadID, 7); assert(val == (void *) 1020); val = tree->erase(threadID, 7); assert(val == (void *) 1020); result = tree->contains(threadID, 7); assert(!result); tree->deinitThread(threadID); delete tree; std::cout<<"Passed quick tests."< #include #include #include #include "common/ThreadRegistry.hpp" #include "ds/natarajan_ext_bst_lf/natarajan_ext_bst_lf_adapter.h" /* * This is a wrapper to Trevor Brown's implementation of Naratajan's lock=free Tree so we can use it in our benchmarks */ template class TrevorBrownNatarajanTree { const int NUM_THREADS = 128; //ds_adapter>* tree; ds_adapter* tree; public: TrevorBrownNatarajanTree(int numThreads) { const int minValue = 0; const int maxValue = std::numeric_limits::max(); const int noValue = -1; //tree = new ds_adapter>(minValue, maxValue, noValue, NUM_THREADS); tree = new ds_adapter(minValue, maxValue, noValue, NUM_THREADS); } ~TrevorBrownNatarajanTree() { // TODO: deinit threads? delete tree; } // Inserts a key only if it's not already present bool add(K key, const int tid=0) { int threadID = tl_tcico.tid; if (threadID == ThreadCheckInCheckOut::NOT_ASSIGNED) { threadID = ThreadRegistry::getTID(); tree->initThread(threadID); } return tree->insertIfAbsent(threadID, key, 1) != tree->getNoValue(); } // Returns true only if the key was present bool remove(K key, const int tid=0) { int threadID = tl_tcico.tid; if (threadID == ThreadCheckInCheckOut::NOT_ASSIGNED) { threadID = ThreadRegistry::getTID(); tree->initThread(threadID); } return tree->erase(threadID, key) != tree->getNoValue(); } bool contains(K key, const int tid=0) { int threadID = tl_tcico.tid; if (threadID == ThreadCheckInCheckOut::NOT_ASSIGNED) { threadID = ThreadRegistry::getTID(); tree->initThread(threadID); } return tree->contains(threadID, key); } // This is not fully transactionally but it's ok because we use it only on initialization. // We could make it fully transactionally, but we would have to increase the size of allocation/store logs. void addAll(K** keys, int size, const int tid=0) { for (int i = 0; i < size; i++) add(*keys[i], tid); } static std::string className() { return "TrevorBrown-Natarajan-Tree"; } }; #endif // _TREVOR_BROWN_NATARAJAN_TREE_HP_H_ ================================================ FILE: datastructures/trevor_brown_natarajan/ds/natarajan_ext_bst_lf/natarajan_ext_bst_lf_adapter.h ================================================ /* * Implementation of the lock-free tree of Natarajan and Mittal. * * Heavily edited by Trevor Brown (me [at] tbrown [dot] pro). * (Late 2017, early 2018.) * * Notable changes: * - Converted original implementation to a class. * - Fixed a bug: atomic_ops types don't contain "volatile," so the original * implementation behaved erroneously under high contention. * - Fixed the original implementation's erroneous memory reclamation, * which would leak many nodes. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, version 2 * of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * Created on August 31, 2017, 6:22 PM */ #ifndef NATARAJAN_EXT_BST_LF_ADAPTER_H #define NATARAJAN_EXT_BST_LF_ADAPTER_H #include #include "errors.h" #include "natarajan_ext_bst_lf_stage2_impl.h" #define RECORD_MANAGER_T record_manager> #define DATA_STRUCTURE_T natarajan_ext_bst_lf template , class Alloc = allocator_new, class Pool = pool_none> class ds_adapter { private: const V NO_VALUE; DATA_STRUCTURE_T * const tree; public: ds_adapter(const K& MIN_KEY, const K& MAX_KEY, const V& _NO_VALUE, const int numThreads) : NO_VALUE(_NO_VALUE) , tree(new DATA_STRUCTURE_T(MAX_KEY, NO_VALUE, numThreads)) {} ~ds_adapter() { delete tree; } V getNoValue() { return NO_VALUE; } void initThread(const int tid) { tree->initThread(tid); } void deinitThread(const int tid) { tree->deinitThread(tid); } bool contains(const int tid, const K& key) { return tree->find(tid, key) != getNoValue(); } V insert(const int tid, const K& key, const V& val) { error("insert-replace not implemented for this data structure"); } V insertIfAbsent(const int tid, const K& key, const V& val) { return tree->insertIfAbsent(tid, key, val); } V erase(const int tid, const K& key) { return tree->erase(tid, key); } V find(const int tid, const K& key) { return tree->find(tid, key); } int rangeQuery(const int tid, const K& lo, const K& hi, K * const resultKeys, V * const resultValues) { error("rangeQuery not implemented for this data structure"); } /** * Sequential operation to get the number of keys in the set */ int getSize() { return tree->getSize(); } void printSummary() { tree->printSummary(); } long long getKeyChecksum() { return tree->getKeyChecksum(); } bool validateStructure() { return tree->validateStructure(); } void printObjectSizes() { std::cout<<"sizes: node=" <<(sizeof(node_t)) < * Description: * A Lock Free Binary Search Tree * * Copyright (c) 2013-2014. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, version 2 * of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. Please cite our PPoPP 2014 paper - Fast Concurrent Lock-Free Binary Search Trees by Aravind Natarajan and Neeraj Mittal if you use our code in your experiments Features: 1. Insert operations directly install their window without injecting the operation into the tree. They help any conflicting operation at the injection point, before executing their window txn. 2. Delete operations are the same as that of the original algorithm. */ /* * File: wfrbt.h * Author: Maya Arbel-Raviv * * Created on June 8, 2017, 10:45 AM */ /* * Heavily edited by Trevor Brown (me [at] tbrown [dot] pro). * (Late 2017, early 2018.) * * - Converted to a class and added proper memory reclamation. * - Fixed a bug: atomic_ops types don't contain "volatile," so the original * implementation behaved erroneously under high contention. * - Fixed the original implementation's erroneous memory reclamation, * which would leak many nodes. */ #ifndef NATARAJAN_EXT_BST_LF_H #define NATARAJAN_EXT_BST_LF_H #include "errors.h" #include "record_manager.h" #include "atomic_ops.h" #if (INDEX_STRUCT == IDX_NATARAJAN_EXT_BST_LF) #elif (INDEX_STRUCT == IDX_NATARAJAN_EXT_BST_LF_BASELINE) #error cannot support baseline with int keys and no value. #else #error #endif // Most of these macros are not used in this algorithm #define MARK_BIT 1 #define FLAG_BIT 0 #define atomic_cas_full(addr, old_val, new_val) __sync_bool_compare_and_swap(addr, old_val, new_val); #define create_child_word(addr, mark, flag) (((uintptr_t) addr << 2) + (mark << 1) + (flag)) #define is_marked(x) ( ((x >> 1) & 1) == 1 ? true:false) #define is_flagged(x) ( (x & 1 ) == 1 ? true:false) #define get_addr(x) (x >> 2) #define add_mark_bit(x) (x + 4UL) #define is_free(x) (((x) & 3) == 0? true:false) enum { INSERT, DELETE }; enum { UNMARK, MARK }; enum { UNFLAG, FLAG }; typedef uintptr_t Word; template struct node_t { union { struct { skey_t key; sval_t value; volatile AO_double_t child; }; #ifdef MIN_NODE_SIZE char bytes[MIN_NODE_SIZE]; #endif }; }; template struct seekRecord_t { skey_t leafKey; sval_t leafValue; struct node_t* leaf; struct node_t* parent; AO_t pL; bool isLeftL; // is L the left child of P? struct node_t* lum; AO_t lumC; bool isLeftUM; // is last unmarked node's child on access path the left child of the last unmarked node? }; template struct thread_data_t { int id; struct node_t* rootOfTree; seekRecord_t* sr; // seek record seekRecord_t * ssr; // secondary seek record }; //static __thread thread_data_t * data = NULL; template > class natarajan_ext_bst_lf { private: RecMgr * const recmgr; Compare cmp; node_t * root; seekRecord_t* insseek(thread_data_t* data, skey_t key, int op); seekRecord_t* delseek(thread_data_t* data, skey_t key, int op); seekRecord_t* secondary_seek(thread_data_t* data, skey_t key, seekRecord_t* sr); sval_t delete_node(thread_data_t* data, skey_t key); sval_t insertIfAbsent(thread_data_t* data, skey_t key, sval_t value); sval_t search(thread_data_t* data, skey_t key); int help_conflicting_operation (thread_data_t* data, seekRecord_t* R); int inject(thread_data_t* data, seekRecord_t* R, int op); int perform_one_delete_window_operation(thread_data_t* data, seekRecord_t* R, skey_t key); int perform_one_insert_window_operation(thread_data_t* data, seekRecord_t* R, skey_t newKey, sval_t value); void retireDeletedNodes(thread_data_t* data, node_t * node, node_t * targetNode, bool pointerFlagged = false); int init[MAX_TID_POW2] = {0,}; public: const skey_t MAX_KEY; const sval_t NO_VALUE; const int NUM_PROCESSES; natarajan_ext_bst_lf(const skey_t& _MAX_KEY, const sval_t& _NO_VALUE, const int numProcesses) : MAX_KEY(_MAX_KEY) , NO_VALUE(_NO_VALUE) , NUM_PROCESSES(numProcesses) , recmgr(new RecMgr(numProcesses, SIGQUIT)) { const int tid = 0; initThread(tid); cmp = Compare(); recmgr->enterQuiescentState(tid); // block crash recovery signal for this thread, and enter an initial quiescent state. root = recmgr->template allocate>(tid); node_t * newLC = recmgr->template allocate>(tid); node_t * newRC = recmgr->template allocate>(tid); memset(newLC, 0, sizeof (struct node_t)); memset(newRC, 0, sizeof (struct node_t)); root->key = _MAX_KEY; newLC->key = _MAX_KEY - 1; newRC->key = _MAX_KEY; root->value = NO_VALUE; newLC->value = NO_VALUE; newRC->value = NO_VALUE; root->child.AO_val1 = create_child_word(newLC, UNMARK, UNFLAG); root->child.AO_val2 = create_child_word(newRC, UNMARK, UNFLAG); } void freeSubtree(node_t * curr) { const int tid = 0; if (curr == NULL) return; node_t * left = get_left(curr); node_t * right = get_right(curr); recmgr->deallocate(tid, curr); freeSubtree(left); freeSubtree(right); } ~natarajan_ext_bst_lf() { freeSubtree(root); delete recmgr; } void initThread(const int tid) { if (init[tid]) return; else init[tid] = !init[tid]; recmgr->initThread(tid); } void deinitThread(const int tid) { if (!init[tid]) return; else init[tid] = !init[tid]; recmgr->deinitThread(tid); } sval_t insertIfAbsent(const int tid, skey_t key, sval_t item) { assert(cmp(key, MAX_KEY-1)); thread_data_t data; seekRecord_t sr; seekRecord_t ssr; data.id = tid; data.sr = &sr; data.ssr = &ssr; data.rootOfTree = root; return insertIfAbsent(&data,key,item); } sval_t erase(const int tid, skey_t key) { assert(cmp(key, MAX_KEY-1)); thread_data_t data; seekRecord_t sr; seekRecord_t ssr; data.id = tid; data.sr = &sr; data.ssr = &ssr; data.rootOfTree = root; return delete_node(&data,key); } sval_t find(const int tid, skey_t key) { thread_data_t data; seekRecord_t sr; seekRecord_t ssr; data.id = tid; data.sr = &sr; data.ssr = &ssr; data.rootOfTree = root; return search(&data,key); } node_t * get_root() { return root; } node_t * get_left(node_t * curr) { return (node_t *)get_addr(curr->child.AO_val1); } node_t * get_right(node_t * curr) { return (node_t *)get_addr(curr->child.AO_val2); } long long getKeyChecksum(node_t * curr) { if (curr == NULL) return 0; node_t * left = get_left(curr); node_t * right = get_right(curr); if (!left && !right) return (long long) curr->key; // leaf return getKeyChecksum(left) + getKeyChecksum(right); } long long getKeyChecksum() { return getKeyChecksum(get_left(get_left(root))); } long long getSize(node_t * curr) { if (curr == NULL) return 0; node_t * left = get_left(curr); node_t * right = get_right(curr); if (!left && !right) return 1; // leaf return getSize(left) + getSize(right); } bool validateStructure() { return true; } long long getSize() { return getSize(get_left(get_left(root))); } long long getSizeInNodes(node_t * const curr) { if (curr == NULL) return 0; return 1 + getSizeInNodes(get_left(curr)) + getSizeInNodes(get_right(curr)); } long long getSizeInNodes() { return getSizeInNodes(root); } void printSummary() { stringstream ss; ss<printStatus(); } }; #endif /* NATARAJAN_EXT_BST_LF_H */ ================================================ FILE: datastructures/trevor_brown_natarajan/ds/natarajan_ext_bst_lf/natarajan_ext_bst_lf_stage2_impl.h ================================================ /*A Lock Free Binary Search Tree * File: * wfrbt.cpp * Author(s): * Aravind Natarajan * Description: * A Lock Free Binary Search Tree * * Copyright (c) 2013-2014. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, version 2 * of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. Please cite our PPoPP 2014 paper - Fast Concurrent Lock-Free Binary Search Trees by Aravind Natarajan and Neeraj Mittal if you use our code in your experiments Features: 1. Insert operations directly install their window without injecting the operation into the tree. They help any conflicting operation at the injection point, before executing their window txn. 2. Delete operations are the same as that of the original algorithm. */ /* * File: wfrbt_impl.h * Author: Maya Arbel-Raviv * * Created on June 8, 2017, 10:45 AM */ /* * Heavily edited by Trevor Brown (me [at] tbrown [dot] pro). * (Late 2017, early 2018.) * * - Converted to a class and added proper memory reclamation. * - Fixed a bug: atomic_ops types don't contain "volatile," so the original * implementation behaved erroneously under high contention. * - Fixed the original implementation's erroneous memory reclamation, * which would leak many nodes. */ #ifndef NATARAJAN_EXT_BST_LF_IMPL_H #define NATARAJAN_EXT_BST_LF_IMPL_H #include "natarajan_ext_bst_lf_stage1.h" static inline bool SetBit(volatile size_t *array, int bit) { bool flag; __asm__ __volatile__("lock bts %2,%1; setb %0" : "=q" (flag) : "m" (*array), "r" (bit)); return flag; } static bool mark_Node(volatile AO_t * word) { return (SetBit(word, MARK_BIT)); } static volatile AO_t stop = 0; static volatile AO_t stop2 = 0; //long total_insert = 0; /* STRUCTURES */ enum { Front, Back }; //long blackCount = -1; //long leafNodes = 0; template seekRecord_t* natarajan_ext_bst_lf::insseek(thread_data_t* data, skey_t key, int op) { node_t * gpar = NULL; // last node (ancestor of parent on access path) whose child pointer field is unmarked node_t * par = data->rootOfTree; node_t * leaf; node_t * leafchild; AO_t parentPointerWord = (size_t) NULL; // contents in gpar AO_t leafPointerWord = par->child.AO_val1; // contents in par. Tree has two imaginary keys \inf_{1} and \inf_{2} which are larger than all other keys. AO_t leafchildPointerWord; // contents in leaf bool isparLC = false; // is par the left child of gpar bool isleafLC = true; // is leaf the left child of par bool isleafchildLC; // is leafchild the left child of leaf leaf = (node_t *)get_addr(leafPointerWord); if (cmp(key, leaf->key)) { leafchildPointerWord = leaf->child.AO_val1; isleafchildLC = true; } else { leafchildPointerWord = leaf->child.AO_val2; isleafchildLC = false; } leafchild = (node_t *)get_addr(leafchildPointerWord); while (leafchild != NULL) { if (!is_marked(leafPointerWord)) { gpar = par; parentPointerWord = leafPointerWord; isparLC = isleafLC; } par = leaf; leafPointerWord = leafchildPointerWord; isleafLC = isleafchildLC; leaf = leafchild; if (cmp(key, leaf->key)) { leafchildPointerWord = leaf->child.AO_val1; isleafchildLC = true; } else { leafchildPointerWord = leaf->child.AO_val2; isleafchildLC = false; } leafchild = (node_t *)get_addr(leafchildPointerWord); } // if (key == leaf->key) { // // key matches that being inserted // return NULL; // } seekRecord_t* R = data->sr; R->leafKey = leaf->key; R->leafValue = leaf->value; R->parent = par; R->pL = leafPointerWord; R->isLeftL = isleafLC; R->lum = gpar; R->lumC = parentPointerWord; R->isLeftUM = isparLC; return R; } template seekRecord_t* natarajan_ext_bst_lf::delseek(thread_data_t* data, skey_t key, int op) { node_t * gpar = NULL; // last node (ancestor of parent on access path) whose child pointer field is unmarked node_t * par = data->rootOfTree; node_t * leaf; node_t * leafchild; AO_t parentPointerWord = (AO_t) NULL; // contents in gpar AO_t leafPointerWord = par->child.AO_val1; // contents in par. Tree has two imaginary keys \inf_{1} and \inf_{2} which are larger than all other keys. AO_t leafchildPointerWord; // contents in leaf bool isparLC = false; // is par the left child of gpar bool isleafLC = true; // is leaf the left child of par bool isleafchildLC; // is leafchild the left child of leaf leaf = (node_t *)get_addr(leafPointerWord); if (cmp(key, leaf->key)) { leafchildPointerWord = leaf->child.AO_val1; isleafchildLC = true; } else { leafchildPointerWord = leaf->child.AO_val2; isleafchildLC = false; } leafchild = (node_t *)get_addr(leafchildPointerWord); while (leafchild != NULL) { if (!is_marked(leafPointerWord)) { gpar = par; parentPointerWord = leafPointerWord; isparLC = isleafLC; } par = leaf; leafPointerWord = leafchildPointerWord; isleafLC = isleafchildLC; leaf = leafchild; if (cmp(key, leaf->key)) { leafchildPointerWord = leaf->child.AO_val1; isleafchildLC = true; } else { leafchildPointerWord = leaf->child.AO_val2; isleafchildLC = false; } leafchild = (node_t *)get_addr(leafchildPointerWord); } // op = DELETE if (key != leaf->key) { // key is not found in the tree. return NULL; } seekRecord_t* R = data->sr; R->leafKey = leaf->key; R->leafValue = leaf->value; R->parent = par; R->leaf = leaf; R->pL = leafPointerWord; R->isLeftL = isleafLC; R->lum = gpar; R->lumC = parentPointerWord; R->isLeftUM = isparLC; return R; } template seekRecord_t* natarajan_ext_bst_lf::secondary_seek(thread_data_t* data, skey_t key, seekRecord_t* sr) { //std::cout << "sseek" << std::endl; node_t * flaggedLeaf = (node_t *)get_addr(sr->pL); node_t * gpar = NULL; // last node (ancestor of parent on access path) whose child pointer field is unmarked node_t * par = data->rootOfTree; node_t * leaf; node_t * leafchild; AO_t parentPointerWord = (AO_t) NULL; // contents in gpar AO_t leafPointerWord = par->child.AO_val1; // contents in par. Tree has two imaginary keys \inf_{1} and \inf_{2} which are larger than all other keys. AO_t leafchildPointerWord; // contents in leaf bool isparLC = false; // is par the left child of gpar bool isleafLC = true; // is leaf the left child of par bool isleafchildLC; // is leafchild the left child of leaf leaf = (node_t *)get_addr(leafPointerWord); if (cmp(key, leaf->key)) { leafchildPointerWord = leaf->child.AO_val1; isleafchildLC = true; } else { leafchildPointerWord = leaf->child.AO_val2; isleafchildLC = false; } leafchild = (node_t *)get_addr(leafchildPointerWord); while (leafchild != NULL) { if (!is_marked(leafPointerWord)) { gpar = par; parentPointerWord = leafPointerWord; isparLC = isleafLC; } par = leaf; leafPointerWord = leafchildPointerWord; isleafLC = isleafchildLC; leaf = leafchild; if (cmp(key, leaf->key)) { leafchildPointerWord = leaf->child.AO_val1; isleafchildLC = true; } else { leafchildPointerWord = leaf->child.AO_val2; isleafchildLC = false; } leafchild = (node_t *)get_addr(leafchildPointerWord); } if (!is_flagged(leafPointerWord) || (leaf != flaggedLeaf)) { // operation has been completed by another process. return NULL; } seekRecord_t* R = data->ssr; R->leafKey = leaf->key; R->parent = par; R->pL = leafPointerWord; R->isLeftL = isleafLC; R->lum = gpar; R->lumC = parentPointerWord; R->isLeftUM = isparLC; return R; } template sval_t natarajan_ext_bst_lf::search(thread_data_t* data, skey_t key) { recmgr->leaveQuiescentState(data->id); node_t * cur = (node_t *)get_addr(data->rootOfTree->child.AO_val1); skey_t lastKey = 0; node_t * lastNode = NULL; while (cur != NULL) { lastKey = cur->key; lastNode = cur; cur = (cmp(key, lastKey) ? (node_t *)get_addr(cur->child.AO_val1) : (node_t *)get_addr(cur->child.AO_val2)); } if (key == lastKey) { recmgr->enterQuiescentState(data->id); return lastNode->value; } recmgr->enterQuiescentState(data->id); return NO_VALUE; } //------------------------------------------------------------------------------------------------------------------------------------------------------- //------------------------------------------------------------------------------------------------------------------------------------------------------- template void natarajan_ext_bst_lf::retireDeletedNodes(thread_data_t* data, node_t * node, node_t * targetNode, bool pointerFlagged) { // traverse from node, retiring everything we deleted // (that is: every leaf pointed to by a flagged pointer, // and every internal node with a flagged pointer.) if (node == NULL) return; if (node == targetNode) return; // we reached the end of the nodes we deleted if ((node_t *) node->child.AO_val1 == NULL) { // node is a leaf if (pointerFlagged) { recmgr->retire(data->id, node); } return; } // node is internal if (is_flagged(node->child.AO_val1) || is_flagged(node->child.AO_val2)) { recmgr->retire(data->id, node); if (!is_free(node->child.AO_val1)) retireDeletedNodes(data, (node_t *) get_addr(node->child.AO_val1), targetNode, is_flagged(node->child.AO_val1)); if (!is_free(node->child.AO_val2)) retireDeletedNodes(data, (node_t *) get_addr(node->child.AO_val2), targetNode, is_flagged(node->child.AO_val2)); } } template int natarajan_ext_bst_lf::help_conflicting_operation(thread_data_t* data, seekRecord_t* R) { int result; node_t * target = NULL; if (is_flagged(R->pL)) { // leaf node is flagged for deletion by another process. //1. mark sibling of leaf node for deletion and then read its contents. AO_t pS; if (R->isLeftL) { // L is the left child of P mark_Node(&R->parent->child.AO_val2); pS = R->parent->child.AO_val2; } else { mark_Node(&R->parent->child.AO_val1); pS = R->parent->child.AO_val1; } // 2. Execute cas on the last unmarked node to remove the // if pS is flagged, propagate it. AO_t newWord; if (is_flagged(pS)) { newWord = create_child_word((node_t *)get_addr(pS), UNMARK, FLAG); } else { newWord = create_child_word((node_t *)get_addr(pS), UNMARK, UNFLAG); } target = (node_t *) get_addr(pS); if (R->isLeftUM) { result = atomic_cas_full(&R->lum->child.AO_val1, R->lumC, newWord); } else { result = atomic_cas_full(&R->lum->child.AO_val2, R->lumC, newWord); } } else { // leaf node is marked for deletion by another process. // Note that leaf is not flagged, as it will be taken care of in the above case. AO_t newWord; if (is_flagged(R->pL)) { newWord = create_child_word((node_t *)get_addr(R->pL), UNMARK, FLAG); } else { newWord = create_child_word((node_t *)get_addr(R->pL), UNMARK, UNFLAG); } target = (node_t *) get_addr(R->pL); if (R->isLeftUM) { result = atomic_cas_full(&R->lum->child.AO_val1, R->lumC, newWord); } else { result = atomic_cas_full(&R->lum->child.AO_val2, R->lumC, newWord); } } if (result) { retireDeletedNodes(data, (node_t *) get_addr(R->lumC), target); } return result; } //------------------------------------------------------------------------------------------------------------------------------------------------------- //------------------------------------------------------------------------------------------------------------------------------------------------------- template int natarajan_ext_bst_lf::inject(thread_data_t* data, seekRecord_t* R, int op) { // pL is free //1. Flag L AO_t newWord = create_child_word((node_t *)get_addr(R->pL), UNMARK, FLAG); int result; if (R->isLeftL) { result = atomic_cas_full(&R->parent->child.AO_val1, R->pL, newWord); } else { result = atomic_cas_full(&R->parent->child.AO_val2, R->pL, newWord); } return result; } template sval_t natarajan_ext_bst_lf::insertIfAbsent(thread_data_t* data, skey_t key, sval_t value) { int injectResult; // int fasttry = 0; while (true) { recmgr->leaveQuiescentState(data->id); seekRecord_t* R = insseek(data, key, INSERT); // fasttry++; if (R->leafKey == key) { // if (fasttry == 1) { return R->leafValue; // } else { // return NO_VALUE; // } } if (!is_free(R->pL)) { help_conflicting_operation(data, R); recmgr->enterQuiescentState(data->id); continue; } // key not present in the tree. Insert injectResult = perform_one_insert_window_operation(data, R, key, value); if (injectResult == 1) { // Operation injected and executed recmgr->enterQuiescentState(data->id); return NO_VALUE; } recmgr->enterQuiescentState(data->id); } // execute insert window operation. } template sval_t natarajan_ext_bst_lf::delete_node(thread_data_t* data, skey_t key) { int injectResult; sval_t retval = NO_VALUE; while (true) { recmgr->leaveQuiescentState(data->id); seekRecord_t* R = delseek(data, key, DELETE); if (R == NULL) { recmgr->enterQuiescentState(data->id); return retval; } // key is present in the tree. Inject operation into the tree if (!is_free(R->pL)) { help_conflicting_operation(data, R); recmgr->enterQuiescentState(data->id); continue; } injectResult = inject(data, R, DELETE); if (injectResult == 1) { retval = R->leafValue; // recmgr->retire(data->id, R->leaf); // if we won consensus and injected the operation, we retire the replaced leaf. (the replaced parent is retired by the guy who marks the sibling pointer in the parent.) // Operation injected //data->numActualDelete++; int res = perform_one_delete_window_operation(data, R, key); if (res == 1) { // operation successfully executed. recmgr->enterQuiescentState(data->id); return retval; } else { // window transaction could not be executed. // perform secondary seek. while (true) { R = secondary_seek(data, key, R); if (R == NULL) { // flagged leaf not found. Operation has been executed by some other process. recmgr->enterQuiescentState(data->id); return retval; } res = perform_one_delete_window_operation(data, R, key); if (res == 1) { recmgr->enterQuiescentState(data->id); return retval; } } } } recmgr->enterQuiescentState(data->id); // otherwise, operation was not injected. Restart. } } template int natarajan_ext_bst_lf::perform_one_insert_window_operation(thread_data_t* data, seekRecord_t* R, skey_t newKey, sval_t value) { node_t * newInt; node_t * newLeaf; // if(data->recycledNodes.empty()){ // node_t * allocedNodeArr = (node_t *)malloc(2 * sizeof (struct node_t)); // new pointerNode_t[2]; // newInt = &allocedNodeArr[0]; // newLeaf = &allocedNodeArr[1]; newInt = recmgr->template allocate>(data->id); if (newInt == NULL) { error("out of memory"); } #ifdef __HANDLE_STATS GSTATS_APPEND(data->id, node_allocated_addresses, (long long) newInt); #endif newLeaf = recmgr->template allocate>(data->id); if (newLeaf == NULL) { error("out of memory"); } #ifdef __HANDLE_STATS GSTATS_APPEND(data->id, node_allocated_addresses, (long long) newLeaf); #endif /* } else{ // reuse memory of previously allocated nodes. newInt = data->recycledNodes.back(); data->recycledNodes.pop_back(); newLeaf = data->recycledNodes.back(); data->recycledNodes.pop_back(); } */ newLeaf->child.AO_val1 = (size_t) NULL; newLeaf->child.AO_val2 = (size_t) NULL; newLeaf->key = newKey; newLeaf->value = value; node_t * existLeaf = (node_t *)get_addr(R->pL); skey_t existKey = R->leafKey; if (cmp(newKey, existKey)) { // key is to be inserted on lchild newInt->key = existKey; newInt->child.AO_val1 = create_child_word(newLeaf, 0, 0); newInt->child.AO_val2 = create_child_word(existLeaf, 0, 0); } else { // key is to be inserted on rchild newInt->key = newKey; newInt->child.AO_val2 = create_child_word(newLeaf, 0, 0); newInt->child.AO_val1 = create_child_word(existLeaf, 0, 0); } // cas to replace window AO_t newCasField; newCasField = create_child_word(newInt, UNMARK, UNFLAG); int result; if (R->isLeftL) { result = atomic_cas_full(&R->parent->child.AO_val1, R->pL, newCasField); } else { result = atomic_cas_full(&R->parent->child.AO_val2, R->pL, newCasField); } if (result == 1) { // successfully inserted. //data->numInsert++; return 1; } else { // reuse data and pointer nodes recmgr->deallocate(data->id, newInt); recmgr->deallocate(data->id, newLeaf); //data->recycledNodes.push_back(newInt); //data->recycledNodes.push_back(newLeaf); return 0; } } /*************************************************************************************************/ template int natarajan_ext_bst_lf::perform_one_delete_window_operation(thread_data_t* data, seekRecord_t* R, skey_t key) { // mark sibling. AO_t pS; bool markResult = 0; if (R->isLeftL) { // L is the left child of P markResult = mark_Node(&R->parent->child.AO_val2); pS = R->parent->child.AO_val2; } else { markResult = mark_Node(&R->parent->child.AO_val1); pS = R->parent->child.AO_val1; } //cout<<"key="<leafKey<<" markResult="<parent // recmgr->retire(data->id, R->parent); // } AO_t newWord; if (is_flagged(pS)) { newWord = create_child_word((node_t *)get_addr(pS), UNMARK, FLAG); } else { newWord = create_child_word((node_t *)get_addr(pS), UNMARK, UNFLAG); } int result; if (R->isLeftUM) { result = atomic_cas_full(&R->lum->child.AO_val1, R->lumC, newWord); } else { result = atomic_cas_full(&R->lum->child.AO_val2, R->lumC, newWord); } if (result) { retireDeletedNodes(data, (node_t *) get_addr(R->lumC), (node_t *) get_addr(pS)); } return result; } #endif /* NATARAJAN_EXT_BST_LF_IMPL_H */ ================================================ FILE: graphs/BenchmarkLatencyCounter.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _BENCHMARK_LATENCY_COUNTER_H_ #define _BENCHMARK_LATENCY_COUNTER_H_ #include #include #include #include #include #include using namespace std; using namespace chrono; /** * This is a micro-benchmark for measuring on an array of counters */ class BenchmarkLatencyCounter { private: // Latency constants static const long long kLatencyMeasures = 1000000LL; // We measure 100M iterations static const long long kLatencyWarmups = 100000LL; // Plus these many warmup static const long long NSEC_IN_SEC = 1000000000LL; static const uint64_t NUM_COUNTERS = 64; int numThreads; public: struct Result { uint64_t delay50000; uint64_t delay90000; uint64_t delay99000; uint64_t delay99900; uint64_t delay99990; uint64_t delay99999; }; BenchmarkLatencyCounter(int numThreads) { this->numThreads = numThreads; } /* * Execute latency benchmarks * We only do one run for this benchmark */ template class TMTYPE> Result latencyBenchmark(std::string& className) { atomic start = { false }; TMTYPE *counters; TM::template updateTx([&] () { // It's ok to pass by reference because we're single-threaded counters = (TMTYPE*)TM::tmMalloc(sizeof(TMTYPE)*NUM_COUNTERS); for (int i = 0; i < NUM_COUNTERS; i++) counters[i] = 0; }); auto latency_lambda = [this,&start,&counters](nanoseconds* delays, const int tid) { long long delayIndex = 0; while (!start.load()) this_thread::yield(); // Warmup + Measurements for (int iter=0; iter < (kLatencyWarmups+kLatencyMeasures)/numThreads; iter++) { // Alternate transactions between left-right and right-left auto startBeats = steady_clock::now(); TM::updateTx([=] () { for (int i = 0; i < NUM_COUNTERS; i++) counters[i] = counters[i]+1; }); auto stopBeats = steady_clock::now(); if (iter >= kLatencyWarmups/numThreads) delays[delayIndex++] = (stopBeats-startBeats); TM::updateTx([=] () { for (int i = NUM_COUNTERS-1; i > 0; i--) counters[i] = counters[i]+1; }); } }; nanoseconds* delays[numThreads]; for (int it = 0; it < numThreads; it++) { delays[it] = new nanoseconds[kLatencyMeasures/numThreads]; for (int imeas=0; imeas < kLatencyMeasures/numThreads; imeas++) delays[it][imeas] = 0ns; } cout << "##### " << TM::className() << " ##### \n"; className = TM::className(); thread latencyThreads[numThreads]; for (int tid = 0; tid < numThreads; tid++) latencyThreads[tid] = thread(latency_lambda, delays[tid], tid); start.store(true); this_thread::sleep_for(50ms); for (int tid = 0; tid < numThreads; tid++) latencyThreads[tid].join(); // Aggregate all the delays for enqueues and dequeues and compute the maxs cout << "Aggregating delays for " << kLatencyMeasures/1000000 << " million measurements...\n"; vector aggDelay(kLatencyMeasures); long long idx = 0; for (int it = 0; it < numThreads; it++) { for (int i = 0; i < kLatencyMeasures/numThreads; i++) { aggDelay[idx] = delays[it][i]; idx++; } } // Sort the aggregated delays cout << "Sorting delays...\n"; sort(aggDelay.begin(), aggDelay.end()); // Show the 50% (median), 90%, 99%, 99.9%, 99.99%, 99.999% and maximum in microsecond/nanoseconds units long per50000 = (long)(kLatencyMeasures*50000LL/100000LL); long per70000 = (long)(kLatencyMeasures*70000LL/100000LL); long per80000 = (long)(kLatencyMeasures*80000LL/100000LL); long per90000 = (long)(kLatencyMeasures*90000LL/100000LL); long per99000 = (long)(kLatencyMeasures*99000LL/100000LL); long per99900 = (long)(kLatencyMeasures*99900LL/100000LL); long per99990 = (long)(kLatencyMeasures*99990LL/100000LL); long per99999 = (long)(kLatencyMeasures*99999LL/100000LL); long imax = kLatencyMeasures-1; cout << "Enqueue delay (us): 50%=" << aggDelay[per50000].count()/1000 << " 70%=" << aggDelay[per70000].count()/1000 << " 80%=" << aggDelay[per80000].count()/1000 << " 90%=" << aggDelay[per90000].count()/1000 << " 99%=" << aggDelay[per99000].count()/1000 << " 99.9%=" << aggDelay[per99900].count()/1000 << " 99.99%=" << aggDelay[per99990].count()/1000 << " 99.999%=" << aggDelay[per99999].count()/1000 << " max=" << aggDelay[imax].count()/1000 << "\n"; Result res = { (uint64_t)aggDelay[per50000].count()/1000, (uint64_t)aggDelay[per90000].count()/1000, (uint64_t)aggDelay[per99000].count()/1000, (uint64_t)aggDelay[per99900].count()/1000, (uint64_t)aggDelay[per99990].count()/1000, (uint64_t)aggDelay[per99999].count()/1000 }; /* // Show in csv format cout << "delay (us):\n"; cout << "50, " << aggDelay[per50000].count()/1000 << "\n"; cout << "90, " << aggDelay[per90000].count()/1000 << "\n"; cout << "99, " << aggDelay[per99000].count()/1000 << "\n"; cout << "99.9, " << aggDelay[per99900].count()/1000 << "\n"; cout << "99.99, " << aggDelay[per99990].count()/1000 << "\n"; cout << "99.999, " << aggDelay[per99999].count()/1000 << "\n"; */ TM::template updateTx([&] () { // It's ok to pass by reference because we're single-threaded TM::tmFree(counters); }); // Cleanup for (int it = 0; it < numThreads; it++) delete[] delays[it]; return res; } #ifdef NEVER public: static void allLatencyTests() { // Burst Latency benchmarks //vector threadList = { 30, 30, 30, 30, 30, 30, 30 }; // For the latency table in the paper //vector threadList = { 4 }; vector threadList = { 1, 2, 4, 8, 12, 16, 20, 24, 28, 30, 32 }; for (int nThreads : threadList) { BenchmarkLatencyQ bench(nThreads, 0, 0s); // Only the numThreads is used in this test std::cout << "\n----- Burst Latency numThreads=" << bench.numThreads << " kLatencyMeasures=" << kLatencyMeasures/1000000LL << "M -----\n"; bench.latencyBurstBenchmark>(); } for (int nThreads : threadList) { BenchmarkLatencyQ bench(nThreads, 0, 0s); // Only the numThreads is used in this test std::cout << "\n----- Burst Latency numThreads=" << bench.numThreads << " kLatencyMeasures=" << kLatencyMeasures/1000000LL << "M -----\n"; bench.latencyBurstBenchmark>(); } for (int nThreads : threadList) { BenchmarkLatencyQ bench(nThreads, 0, 0s); // Only the numThreads is used in this test std::cout << "\n----- Burst Latency numThreads=" << bench.numThreads << " kLatencyMeasures=" << kLatencyMeasures/1000000LL << "M -----\n"; bench.latencyBurstBenchmark>(); } /* for (int nThreads : threadList) { BenchmarkLatencyQ bench(nThreads, 0, 0s); // Only the numThreads is used in this test std::cout << "\n----- Burst Latency numThreads=" << bench.numThreads << " kLatencyMeasures=" << kLatencyMeasures/1000000LL << "M -----\n"; bench.latencyBurstBenchmark>(); } for (int nThreads : threadList) { BenchmarkLatencyQ bench(nThreads, 0, 0s); // Only the numThreads is used in this test std::cout << "\n----- Burst Latency numThreads=" << bench.numThreads << " kLatencyMeasures=" << kLatencyMeasures/1000000LL << "M -----\n"; bench.latencyBurstBenchmark>(); } */ } #endif }; #endif ================================================ FILE: graphs/BenchmarkLatencyQueues.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _BENCHMARK_LATENCY_Q_H_ #define _BENCHMARK_LATENCY_Q_H_ #include #include #include #include #include #include using namespace std; using namespace chrono; /** * This is a micro-benchmark for measuring latency */ class BenchmarkLatencyQueues { private: struct UserData { long long seq; int tid; UserData(long long lseq, int ltid) { this->seq = lseq; this->tid = ltid; } UserData() { this->seq = -2; this->tid = -2; } UserData(const UserData &other) : seq(other.seq), tid(other.tid) { } bool operator < (const UserData& other) const { return seq < other.seq; } }; struct Result { nanoseconds nsEnq = 0ns; nanoseconds nsDeq = 0ns; long long numEnq = 0; long long numDeq = 0; long long totOpsSec = 0; Result() { } Result(const Result &other) { nsEnq = other.nsEnq; nsDeq = other.nsDeq; numEnq = other.numEnq; numDeq = other.numDeq; totOpsSec = other.totOpsSec; } bool operator < (const Result& other) const { return totOpsSec < other.totOpsSec; } }; // Latency constants static const long long kLatencyMeasures = 200000000LL; // We measure 100M iterations divided among the different threads static const long long kLatencyWarmupIterations = 10; // At start of latency tests we do 10M warmup enqueues and dequeues in bursts of 100K static const long long kLatencyIterations = 200; // We do this many iterations of 100K enqueues and dequeues until we get kLatencyMeasures static const long long kLatencyBurst = kLatencyMeasures/kLatencyIterations; static const long long NSEC_IN_SEC = 1000000000LL; int numThreads; int numRuns; seconds testLengthSeconds; public: BenchmarkLatencyQueues(int numThreads, int numRuns, seconds testLength) { this->numThreads = numThreads; this->numRuns = numRuns; this->testLengthSeconds = testLength; } /* * Execute latency benchmarks * Make sure to enable high priority for the Windows process * * We can use this Mathematica function to compute the Inverse CDF of a Poisson and model the latency at 99.99% for lock-free algorithms: * https://reference.wolfram.com/language/ref/InverseCDF.html * * We only do one run for this benchmark */ template void latencyBurstBenchmark() { atomic startEnq = { false }; atomic startDeq = { false }; atomic barrier = { 0 }; Q* queue = new Q(numThreads); auto latency_lambda = [this,&startEnq,&startDeq,&barrier,&queue](nanoseconds* enqDelays, nanoseconds* deqDelays, const int tid) { UserData ud(0,0); long long enqDelayIndex = 0; long long deqDelayIndex = 0; // Warmup + Measurements for (int iter=0; iter < kLatencyIterations+kLatencyWarmupIterations; iter++) { // Start with enqueues while (!startEnq.load()) this_thread::yield(); for (long long i = 0; i < kLatencyBurst/numThreads; i++) { auto startBeats = steady_clock::now(); queue->enqueue(&ud, tid); auto stopBeats = steady_clock::now(); if (iter >= kLatencyWarmupIterations) enqDelays[enqDelayIndex++] = (stopBeats-startBeats); } if (barrier.fetch_add(1) == numThreads) cout << "ERROR: in barrier\n"; // dequeues while (!startDeq.load()) this_thread::yield(); for (long long i = 0; i < kLatencyBurst/numThreads; i++) { auto startBeats = steady_clock::now(); if (queue->dequeue(tid) == nullptr) cout << "ERROR: dequeued nullptr in i=" << i << "\n"; auto stopBeats = steady_clock::now(); if (iter >= kLatencyWarmupIterations) deqDelays[deqDelayIndex++] = (stopBeats-startBeats); } if (barrier.fetch_add(1) == numThreads) cout << "ERROR: in barrier\n"; } }; nanoseconds* enqDelays[numThreads]; // Half enqueues and half dequeues nanoseconds* deqDelays[numThreads]; for (int it = 0; it < numThreads; it++) { enqDelays[it] = new nanoseconds[kLatencyMeasures/numThreads]; deqDelays[it] = new nanoseconds[kLatencyMeasures/numThreads]; for (int imeas=0; imeas < kLatencyMeasures/numThreads; imeas++) { enqDelays[it][imeas] = 0ns; deqDelays[it][imeas] = 0ns; } } cout << "##### " << queue->className() << " ##### \n"; thread latencyThreads[numThreads]; for (int tid = 0; tid < numThreads; tid++) latencyThreads[tid] = thread(latency_lambda, enqDelays[tid], deqDelays[tid], tid); this_thread::sleep_for(50ms); for (int iter=0; iter < kLatencyIterations+kLatencyWarmupIterations; iter++) { // enqueue round startEnq.store(true); while (barrier.load() != numThreads) this_thread::yield(); startEnq.store(false); long tmp = numThreads; if (!barrier.compare_exchange_strong(tmp, 0)) cout << "ERROR: CAS\n"; // dequeue round startDeq.store(true); while (barrier.load() != numThreads) this_thread::yield(); startDeq.store(false); tmp = numThreads; if (!barrier.compare_exchange_strong(tmp, 0)) cout << "ERROR: CAS\n"; } for (int tid = 0; tid < numThreads; tid++) latencyThreads[tid].join(); delete queue; // Aggregate all the delays for enqueues and dequeues and compute the maxs cout << "Aggregating delays for " << kLatencyMeasures/1000000 << " million measurements...\n"; vector aggEnqDelay(kLatencyMeasures); long long idx = 0; for (int it = 0; it < numThreads; it++) { for (int i = 0; i < kLatencyMeasures/numThreads; i++) { aggEnqDelay[idx] = enqDelays[it][i]; idx++; } } vector aggDeqDelay(kLatencyMeasures); idx = 0; for (int it = 0; it < numThreads; it++) { for (int i = 0; i < kLatencyMeasures/numThreads; i++) { aggDeqDelay[idx] = deqDelays[it][i]; idx++; } } // Sort the aggregated delays cout << "Sorting delays...\n"; sort(aggEnqDelay.begin(), aggEnqDelay.end()); sort(aggDeqDelay.begin(), aggDeqDelay.end()); // Show the 50% (median), 90%, 99%, 99.9%, 99.99%, 99.999% and maximum in microsecond/nanoseconds units long per50000 = (long)(kLatencyMeasures*50000LL/100000LL); long per70000 = (long)(kLatencyMeasures*70000LL/100000LL); long per80000 = (long)(kLatencyMeasures*80000LL/100000LL); long per90000 = (long)(kLatencyMeasures*90000LL/100000LL); long per99000 = (long)(kLatencyMeasures*99000LL/100000LL); long per99900 = (long)(kLatencyMeasures*99900LL/100000LL); long per99990 = (long)(kLatencyMeasures*99990LL/100000LL); long per99999 = (long)(kLatencyMeasures*99999LL/100000LL); long imax = kLatencyMeasures-1; cout << "Enqueue delay (us): 50%=" << aggEnqDelay[per50000].count()/1000 << " 70%=" << aggEnqDelay[per70000].count()/1000 << " 80%=" << aggEnqDelay[per80000].count()/1000 << " 90%=" << aggEnqDelay[per90000].count()/1000 << " 99%=" << aggEnqDelay[per99000].count()/1000 << " 99.9%=" << aggEnqDelay[per99900].count()/1000 << " 99.99%=" << aggEnqDelay[per99990].count()/1000 << " 99.999%=" << aggEnqDelay[per99999].count()/1000 << " max=" << aggEnqDelay[imax].count()/1000 << "\n"; cout << "Dequeue delay (us): 50%=" << aggDeqDelay[per50000].count()/1000 << " 70%=" << aggDeqDelay[per70000].count()/1000 << " 80%=" << aggDeqDelay[per80000].count()/1000 << " 90%=" << aggDeqDelay[per90000].count()/1000 << " 99%=" << aggDeqDelay[per99000].count()/1000 << " 99.9%=" << aggDeqDelay[per99900].count()/1000 << " 99.99%=" << aggDeqDelay[per99990].count()/1000 << " 99.999%=" << aggDeqDelay[per99999].count()/1000 << " max=" << aggDeqDelay[imax].count()/1000 << "\n"; // Show in csv format cout << "Enqueue delay (us):\n"; cout << "50, " << aggEnqDelay[per50000].count()/1000 << "\n"; cout << "90, " << aggEnqDelay[per90000].count()/1000 << "\n"; cout << "99, " << aggEnqDelay[per99000].count()/1000 << "\n"; cout << "99.9, " << aggEnqDelay[per99900].count()/1000 << "\n"; cout << "99.99, " << aggEnqDelay[per99990].count()/1000 << "\n"; cout << "99.999, " << aggEnqDelay[per99999].count()/1000 << "\n"; cout << "Dequeue delay (us):\n"; cout << "50, " << aggDeqDelay[per50000].count()/1000 << "\n"; cout << "90, " << aggDeqDelay[per90000].count()/1000 << "\n"; cout << "99, " << aggDeqDelay[per99000].count()/1000 << "\n"; cout << "99.9, " << aggDeqDelay[per99900].count()/1000 << "\n"; cout << "99.99, " << aggDeqDelay[per99990].count()/1000 << "\n"; cout << "99.999, " << aggDeqDelay[per99999].count()/1000 << "\n"; // Cleanup for (int it = 0; it < numThreads; it++) { delete[] enqDelays[it]; delete[] deqDelays[it]; } } #ifdef NEVER public: static void allLatencyTests() { // Burst Latency benchmarks //vector threadList = { 30, 30, 30, 30, 30, 30, 30 }; // For the latency table in the paper //vector threadList = { 4 }; vector threadList = { 1, 2, 4, 8, 12, 16, 20, 24, 28, 30, 32 }; for (int nThreads : threadList) { BenchmarkLatencyQ bench(nThreads, 0, 0s); // Only the numThreads is used in this test std::cout << "\n----- Burst Latency numThreads=" << bench.numThreads << " kLatencyMeasures=" << kLatencyMeasures/1000000LL << "M -----\n"; bench.latencyBurstBenchmark>(); } for (int nThreads : threadList) { BenchmarkLatencyQ bench(nThreads, 0, 0s); // Only the numThreads is used in this test std::cout << "\n----- Burst Latency numThreads=" << bench.numThreads << " kLatencyMeasures=" << kLatencyMeasures/1000000LL << "M -----\n"; bench.latencyBurstBenchmark>(); } for (int nThreads : threadList) { BenchmarkLatencyQ bench(nThreads, 0, 0s); // Only the numThreads is used in this test std::cout << "\n----- Burst Latency numThreads=" << bench.numThreads << " kLatencyMeasures=" << kLatencyMeasures/1000000LL << "M -----\n"; bench.latencyBurstBenchmark>(); } /* for (int nThreads : threadList) { BenchmarkLatencyQ bench(nThreads, 0, 0s); // Only the numThreads is used in this test std::cout << "\n----- Burst Latency numThreads=" << bench.numThreads << " kLatencyMeasures=" << kLatencyMeasures/1000000LL << "M -----\n"; bench.latencyBurstBenchmark>(); } for (int nThreads : threadList) { BenchmarkLatencyQ bench(nThreads, 0, 0s); // Only the numThreads is used in this test std::cout << "\n----- Burst Latency numThreads=" << bench.numThreads << " kLatencyMeasures=" << kLatencyMeasures/1000000LL << "M -----\n"; bench.latencyBurstBenchmark>(); } */ } #endif }; #endif ================================================ FILE: graphs/BenchmarkMaps.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _BENCHMARK_MAPS_H_ #define _BENCHMARK_MAPS_H_ #include #include #include #include #include #include #include using namespace std; using namespace chrono; // Regular UserData struct UserData { long long seq; int tid; UserData(long long lseq, int ltid=0) { this->seq = lseq; this->tid = ltid; } UserData() { this->seq = -2; this->tid = -2; } UserData(const UserData &other) : seq(other.seq), tid(other.tid) { } bool operator < (const UserData& other) const { return seq < other.seq; } bool operator == (const UserData& other) const { return seq == other.seq && tid == other.tid; } bool operator != (const UserData& other) const { return seq != other.seq || tid != other.tid; } }; namespace std { template <> struct hash { std::size_t operator()(const UserData& k) const { using std::size_t; using std::hash; return (hash()(k.seq)); // This hash has no collisions, which is irealistic } }; } /** * This is a micro-benchmark of sets, used in the CX paper */ class BenchmarkMaps { private: struct Result { nanoseconds nsEnq = 0ns; nanoseconds nsDeq = 0ns; long long numEnq = 0; long long numDeq = 0; long long totOpsSec = 0; Result() { } Result(const Result &other) { nsEnq = other.nsEnq; nsDeq = other.nsDeq; numEnq = other.numEnq; numDeq = other.numDeq; totOpsSec = other.totOpsSec; } bool operator < (const Result& other) const { return totOpsSec < other.totOpsSec; } }; static const long long NSEC_IN_SEC = 1000000000LL; int numThreads; public: BenchmarkMaps(int numThreads) { this->numThreads = numThreads; } /** * When doing "updates" we execute a random removal and if the removal is successful we do an add() of the * same item immediately after. This keeps the size of the data structure equal to the original size (minus * MAX_THREADS items at most) which gives more deterministic results. */ template class S, typename K, typename V> long long benchmark(const int updateRatio, const seconds testLengthSeconds, const int numRuns, const int numElements, const bool dedicated=false) { long long ops[numThreads][numRuns]; long long lengthSec[numRuns]; atomic quit = { false }; atomic startFlag = { false }; S* set = nullptr; #ifdef TINY_STM stm_init_thread(); //const int tid = 0; //WRITE_TX_BEGIN //set = TM_ALLOC(); //WRITE_TX_END #endif // Create all the keys and values in the concurrent set K** keyarray = new K*[numElements]; for (int i = 0; i < numElements; i++) keyarray[i] = new K(i); V** valarray = new V*[numElements]; for (int i = 0; i < numElements; i++) valarray[i] = new V(i); // Can either be a Reader or a Writer auto rw_lambda = [&](const int updateRatio, long long *ops, const int tid) { uint64_t accum = 0; long long numOps = 0; #ifdef TINY_STM stm_init_thread(); #endif while (!startFlag.load()) ; // spin uint64_t seed = tid+1234567890123456781ULL; while (!quit.load()) { seed = randomLong(seed); int update = seed%1000; seed = randomLong(seed); auto ix = (unsigned int)(seed%numElements); if (update < updateRatio) { // I'm a Writer if (set->remove(*keyarray[ix])) { numOps++; set->put(*keyarray[ix], *valarray[ix]); } numOps++; } else { // I'm a Reader set->get(*keyarray[ix]); seed = randomLong(seed); ix = (unsigned int)(seed%numElements); set->get(*keyarray[ix]); numOps+=2; } } *ops = numOps; #ifdef TINY_STM stm_exit_thread(); #endif }; for (int irun = 0; irun < numRuns; irun++) { set = new S(); // Add all the items to the list set->addAll(keyarray, valarray, numElements); if (irun == 0) std::cout << "##### " << set->className() << " ##### \n"; thread rwThreads[numThreads]; if (dedicated) { rwThreads[0] = thread(rw_lambda, 1000, &ops[0][irun], 0); rwThreads[1] = thread(rw_lambda, 1000, &ops[1][irun], 1); for (int tid = 2; tid < numThreads; tid++) rwThreads[tid] = thread(rw_lambda, updateRatio, &ops[tid][irun], tid); } else { for (int tid = 0; tid < numThreads; tid++) rwThreads[tid] = thread(rw_lambda, updateRatio, &ops[tid][irun], tid); } this_thread::sleep_for(100ms); auto startBeats = steady_clock::now(); startFlag.store(true); // Sleep for testLengthSeconds seconds this_thread::sleep_for(testLengthSeconds); quit.store(true); auto stopBeats = steady_clock::now(); for (int tid = 0; tid < numThreads; tid++) rwThreads[tid].join(); lengthSec[irun] = (stopBeats-startBeats).count(); if (dedicated) { // We don't account for the write-only operations but we aggregate the values from the two threads and display them std::cout << "Mutative transactions per second = " << (ops[0][irun] + ops[1][irun])*1000000000LL/lengthSec[irun] << "\n"; ops[0][irun] = 0; ops[1][irun] = 0; } quit.store(false); startFlag.store(false); // Measure the time the destructor takes to complete and if it's more than 1 second, print it out auto startDel = steady_clock::now(); #ifdef TINY_STM WRITE_TX_BEGIN TM_FREE(set); WRITE_TX_END #endif delete set; auto stopDel = steady_clock::now(); if ((startDel-stopDel).count() > NSEC_IN_SEC) { std::cout << "Destructor took " << (startDel-stopDel).count()/NSEC_IN_SEC << " seconds\n"; } // Compute ops at the end of each run long long agg = 0; for (int tid = 0; tid < numThreads; tid++) { agg += ops[tid][irun]*1000000000LL/lengthSec[irun]; } } for (int i = 0; i < numElements; i++) delete keyarray[i]; delete[] keyarray; // Accounting vector agg(numRuns); for (int irun = 0; irun < numRuns; irun++) { for (int tid = 0; tid < numThreads; tid++) { agg[irun] += ops[tid][irun]*1000000000LL/lengthSec[irun]; } } // Compute the median. numRuns must be an odd number sort(agg.begin(),agg.end()); auto maxops = agg[numRuns-1]; auto minops = agg[0]; auto medianops = agg[numRuns/2]; auto delta = (long)(100.*(maxops-minops) / ((double)medianops)); // Printed value is the median of the number of ops per second that all threads were able to accomplish (on average) std::cout << "Ops/sec = " << medianops << " delta = " << delta << "% min = " << minops << " max = " << maxops << "\n"; #ifdef TINY_STM stm_exit_thread(); #endif return medianops; } /** * An imprecise but fast random number generator */ uint64_t randomLong(uint64_t x) { x ^= x >> 12; // a x ^= x << 25; // b x ^= x >> 27; // c return x * 2685821657736338717LL; } }; #endif ================================================ FILE: graphs/BenchmarkQueues.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _BENCHMARK_Q_H_ #define _BENCHMARK_Q_H_ #include #include #include #include #include #include #include using namespace std; using namespace chrono; struct UserData { long long seq; int tid; UserData(long long lseq, int ltid) { this->seq = lseq; this->tid = ltid; } UserData() { this->seq = -2; this->tid = -2; } UserData(const UserData &other) : seq(other.seq), tid(other.tid) { } bool operator < (const UserData& other) const { return seq < other.seq; } }; /** * This is a micro-benchmark to run the tests shown in CRTurnQueue paper * *

Performance Benchmarks

* TODO * * *

Latency Distribution

* * - We fire up 28 threads of type LatencyThread; * - Each thread does alternatively 1000 enqueue() and 1000 dequeue(). All dequeues are non-null; * - After start, each thread does 1M iterations as warmup. * - Measurements are done for 4M iterations, that are saved in a local array, 2M enqueue and 2M dequeue; * - * */ class BenchmarkQueues { private: struct Result { nanoseconds nsEnq = 0ns; nanoseconds nsDeq = 0ns; long long numEnq = 0; long long numDeq = 0; long long totOpsSec = 0; Result() { } Result(const Result &other) { nsEnq = other.nsEnq; nsDeq = other.nsDeq; numEnq = other.numEnq; numDeq = other.numDeq; totOpsSec = other.totOpsSec; } bool operator < (const Result& other) const { return totOpsSec < other.totOpsSec; } }; // Performance benchmark constants static const long long kNumPairsWarmup = 1000000LL; // Each threads does 1M iterations as warmup // Contants for Ping-Pong performance benchmark static const int kPingPongBatch = 1000; // Each thread starts by injecting 1k items in the queue static const long long NSEC_IN_SEC = 1000000000LL; int numThreads; public: BenchmarkQueues(int numThreads) { this->numThreads = numThreads; } /** * enqueue-dequeue pairs: in each iteration a thread executes an enqueue followed by a dequeue; * the benchmark executes 10^8 pairs partitioned evenly among all threads; */ template uint64_t enqDeq(std::string& className, const long numPairs, const int numRuns) { nanoseconds deltas[numThreads][numRuns]; atomic startFlag = { false }; Q* queue = nullptr; className = Q::className(); cout << "##### " << className << " ##### \n"; auto enqdeq_lambda = [this,&startFlag,&numPairs,&queue](nanoseconds *delta, const int tid) { UserData ud(0,0); while (!startFlag.load()) {} // Spin until the startFlag is set // Warmup phase for (long long iter = 0; iter < kNumPairsWarmup/numThreads; iter++) { queue->enqueue(&ud, tid); if (queue->dequeue(tid) == nullptr) cout << "Error at warmup dequeueing iter=" << iter << "\n"; } // Measurement phase auto startBeats = steady_clock::now(); for (long long iter = 0; iter < numPairs/numThreads; iter++) { queue->enqueue(&ud, tid); if (queue->dequeue(tid) == nullptr) cout << "Error at measurement dequeueing iter=" << iter << "\n"; } auto stopBeats = steady_clock::now(); *delta = stopBeats - startBeats; }; for (int irun = 0; irun < numRuns; irun++) { queue = new Q(numThreads); thread enqdeqThreads[numThreads]; for (int tid = 0; tid < numThreads; tid++) enqdeqThreads[tid] = thread(enqdeq_lambda, &deltas[tid][irun], tid); startFlag.store(true); // Sleep for 2 seconds just to let the threads see the startFlag this_thread::sleep_for(2s); for (int tid = 0; tid < numThreads; tid++) enqdeqThreads[tid].join(); startFlag.store(false); delete (Q*)queue; } // Sum up all the time deltas of all threads so we can find the median run vector agg(numRuns); for (int irun = 0; irun < numRuns; irun++) { agg[irun] = 0ns; for (int tid = 0; tid < numThreads; tid++) { agg[irun] += deltas[tid][irun]; } } // Compute the median. numRuns should be an odd number sort(agg.begin(),agg.end()); auto median = agg[numRuns/2].count()/numThreads; // Normalize back to per-thread time (mean of time for this run) cout << "Total Ops/sec = " << numPairs*2*NSEC_IN_SEC/median << "\n"; return (numPairs*2*NSEC_IN_SEC/median); } /** * Start with only enqueues 100K/numThreads, wait for them to finish, then do only dequeues but only 100K/numThreads */ template void burst(std::string& className, uint64_t& resultsEnq, uint64_t& resultsDeq, const long long burstSize, const int numIters, const int numRuns, const bool isSC=false) { Result results[numThreads][numRuns]; atomic startEnq = { false }; atomic startDeq = { false }; atomic barrier = { 0 }; Q* queue = nullptr; auto burst_lambda = [this,&startEnq,&startDeq,&burstSize,&barrier,&numIters,&isSC,&queue](Result *res, const int tid) { UserData ud(0,0); // Warmup only if it is not Single-Consumer if (!isSC) { const long long warmupIters = 100000LL; // Do 100K for each thread as a warmup for (long long iter = 0; iter < warmupIters; iter++) queue->enqueue(&ud, tid); for (long long iter = 0; iter < warmupIters; iter++) { if (queue->dequeue(tid) == nullptr) cout << "ERROR: warmup dequeued nullptr in iter=" << iter << "\n"; } } // Measurements for (int iter = 0; iter < numIters; iter++) { // Start with enqueues while (!startEnq.load()) {} // spin is better than yield here auto startBeats = steady_clock::now(); for (long long i = 0; i < burstSize/numThreads; i++) { queue->enqueue(&ud, tid); } auto stopBeats = steady_clock::now(); res->nsEnq += (stopBeats-startBeats); res->numEnq += burstSize/numThreads; if (barrier.fetch_add(1) == numThreads) cout << "ERROR: in barrier\n"; // dequeues while (!startDeq.load()) { } // spin is better than yield here if (isSC) { // Handle the single-consumer case if (tid == 0) { startBeats = steady_clock::now(); // We need to deal with rounding errors in the single-consumer case for (long long i = 0; i < ((long long)(burstSize/numThreads))*numThreads; i++) { if (queue->dequeue(tid) == nullptr) { cout << "ERROR: dequeued nullptr in iter=" << i << "\n"; assert(false); } } stopBeats = steady_clock::now(); if (queue->dequeue(tid) != nullptr) cout << "ERROR: dequeued non-null, there must be duplicate items!\n"; res->nsDeq += (stopBeats-startBeats); res->numDeq += burstSize/numThreads; } } else { startBeats = steady_clock::now(); for (long long i = 0; i < burstSize/numThreads; i++) { if (queue->dequeue(tid) == nullptr) { cout << "ERROR: dequeued nullptr in iter=" << i << "\n"; assert(false); } } stopBeats = steady_clock::now(); res->nsDeq += (stopBeats-startBeats); res->numDeq += burstSize/numThreads; } if (barrier.fetch_add(1) == numThreads) cout << "ERROR: in barrier\n"; } }; for (int irun = 0; irun < numRuns; irun++) { queue = new Q(numThreads); if (irun == 0) { className = queue->className(); cout << "##### " << queue->className() << " ##### \n"; } thread burstThreads[numThreads]; for (int tid = 0; tid < numThreads; tid++) burstThreads[tid] = thread(burst_lambda, &results[tid][irun], tid); this_thread::sleep_for(100ms); for (int iter=0; iter < numIters; iter++) { // enqueue round startEnq.store(true); while (barrier.load() != numThreads) this_thread::yield(); startEnq.store(false); long tmp = numThreads; if (!barrier.compare_exchange_strong(tmp, 0)) cout << "ERROR: CAS\n"; // dequeue round startDeq.store(true); while (barrier.load() != numThreads) this_thread::yield(); startDeq.store(false); tmp = numThreads; if (!barrier.compare_exchange_strong(tmp, 0)) cout << "ERROR: CAS\n"; } for (int tid = 0; tid < numThreads; tid++) burstThreads[tid].join(); delete queue; } // Accounting vector agg(numRuns); for (int irun = 0; irun < numRuns; irun++) { nanoseconds maxNsEnq = 0ns; nanoseconds maxNsDeq = 0ns; for (int tid = 0; tid < numThreads; tid++) { if (results[tid][irun].nsEnq > maxNsEnq) maxNsEnq = results[tid][irun].nsEnq; if (results[tid][irun].nsDeq > maxNsDeq) maxNsDeq = results[tid][irun].nsDeq; agg[irun].numEnq += results[tid][irun].numEnq; agg[irun].numDeq += results[tid][irun].numDeq; } agg[irun].nsEnq = maxNsEnq; agg[irun].nsDeq = maxNsDeq; agg[irun].totOpsSec = agg[irun].nsEnq.count()+agg[irun].nsDeq.count(); } // Compute the median. numRuns should be an odd number sort(agg.begin(),agg.end()); Result median = agg[numRuns/2]; const long long allThreadsEnqPerSec = median.numEnq*NSEC_IN_SEC/median.nsEnq.count(); const long long allThreadsDeqPerSec = median.numDeq*NSEC_IN_SEC/median.nsDeq.count(); // Printed value is the median of the number of ops per second that all threads were able to accomplish (on average) cout << "Enq/sec = " << allThreadsEnqPerSec << " Deq/sec = " << allThreadsDeqPerSec << "\n"; resultsEnq = allThreadsEnqPerSec; resultsDeq = allThreadsDeqPerSec; } }; #endif ================================================ FILE: graphs/BenchmarkSPS.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _BENCHMARK_SPS_H_ #define _BENCHMARK_SPS_H_ #include #include #include #include #include #include #include #include #include static const long arraySize = 1000; // 1k or 1M entries in the SPS array using namespace std; using namespace chrono; /** * This is a micro-benchmark */ class BenchmarkSPS { private: int numThreads; public: struct UserData { long long seq; int tid; UserData(long long lseq, int ltid) { this->seq = lseq; this->tid = ltid; } UserData() { this->seq = -2; this->tid = -2; } UserData(const UserData &other) : seq(other.seq), tid(other.tid) { } bool operator < (const UserData& other) const { return seq < other.seq; } }; BenchmarkSPS(int numThreads) { this->numThreads = numThreads; } /* * An array of integers that gets randomly permutated. */ template class TMTYPE> uint64_t benchmarkSPSInteger(std::string& className, const seconds testLengthSeconds, const long numSwapsPerTx, const int numRuns) { long long ops[numThreads][numRuns]; long long lengthSec[numRuns]; atomic startFlag = { false }; atomic quit = { false }; className = TM::className(); cout << "##### " << TM::className() << " ##### \n"; // Create the array of integers and initialize it TMTYPE* parray; // It's ok to capture by reference, we're running single-threaded now TM::updateTx([&] () { parray = new TMTYPE[arraySize]; //parray = (TMTYPE*)TM::tmMalloc(sizeof(TMTYPE)*arraySize); } ); // Break up the initialization into transactions of 1k stores, so it fits in the log for (long j = 0; j < arraySize; j+=1000) { TM::updateTx([&] () { for (int i = 0; i < 1000 && i+j < arraySize; i++) parray[i+j] = i+j; } ); } auto func = [this,&startFlag,&quit,&numSwapsPerTx,&parray](long long *ops, const int tid) { uint64_t seed = tid+1234567890123456781ULL; // Spin until the startFlag is set while (!startFlag.load()) {} // Do transactions until the quit flag is set long long tcount = 0; while (!quit.load()) { TM::updateTx([&] () { for (int i = 0; i < numSwapsPerTx; i++) { seed = randomLong(seed); auto ia = seed%arraySize; uint64_t tmp = parray[ia]; seed = randomLong(seed); auto ib = seed%arraySize; parray[ia] = parray[ib]; parray[ib] = tmp; } } ); ++tcount; /* PE::read_transaction([this,&seed,&parray,&numWordsPerTransaction] () { PersistentArrayInt* read_array = PE::template get_object>(PIDX_INT_ARRAY); // Check that the array is consistent int sum = 0; for (int i = 0; i < arraySize; i++) { sum += read_array->counters[i]; } assert(sum == 0); } ); */ } *ops = tcount; }; for (int irun = 0; irun < numRuns; irun++) { if (irun == 0) className = TM::className(); thread enqdeqThreads[numThreads]; for (int tid = 0; tid < numThreads; tid++) enqdeqThreads[tid] = thread(func, &ops[tid][irun], tid); auto startBeats = steady_clock::now(); startFlag.store(true); // Sleep for 20 seconds this_thread::sleep_for(testLengthSeconds); quit.store(true); auto stopBeats = steady_clock::now(); for (int tid = 0; tid < numThreads; tid++) enqdeqThreads[tid].join(); lengthSec[irun] = (stopBeats-startBeats).count(); startFlag.store(false); quit.store(false); } // It's ok to capture by reference, we're running single-threaded now TM::updateTx([&] () { delete[] parray; //TM::tmFree(parray); }); // Accounting vector agg(numRuns); for (int irun = 0; irun < numRuns; irun++) { for(int i=0;i class TMTYPE, typename TMBASE> uint64_t benchmarkSPSObject(std::string& className, const seconds testLengthSeconds, const long numSwapsPerTx, const int numRuns) { long long ops[numThreads][numRuns]; long long lengthSec[numRuns]; atomic startFlag = { false }; atomic quit = { false }; struct MyObject : public TMBASE { uint64_t a {0}; // For the OneFile STMs these don't need to be tmtypes because they're immutable after visible in this benchmark uint64_t b {0}; MyObject(uint64_t a0, uint64_t b0) { a = a0; b = b0; } MyObject(const MyObject &other) { a = other.a; b = other.b; } }; // Create the array of integers and initialize it TMTYPE* parray; parray = new TMTYPE[arraySize]; // Break up the initialization into transactions of 1k stores, so it fits in the log for (long j = 0; j < arraySize; j+=1000) { TM::updateTx([&] () { for (int i = 0; i < 1000 && i+j < arraySize; i++) parray[i+j] = TM::template tmNew((uint64_t)i+j,(uint64_t)i); } ); } /* TM::updateTx([&] () { for (int i = 0; i < arraySize; i++) parray[i] = TM::template tmNew((uint64_t)i,(uint64_t)i); } ); */ auto func = [this,&startFlag,&quit,&numSwapsPerTx,&parray](long long *ops, const int tid) { uint64_t seed = tid+1234567890123456781ULL; // Spin until the startFlag is set while (!startFlag.load()) {} // Do transactions until the quit flag is set long long tcount = 0; while (!quit.load()) { TM::updateTx([&] () { for (int i = 0; i < numSwapsPerTx; i++) { seed = randomLong(seed); auto ia = seed%arraySize; // Create a new object with the same contents to replace the old object, at a random location MyObject* tmp = TM::template tmNew(*parray[ia]); TM::template tmDelete(parray[ia]); parray[ia] = tmp; } } ); ++tcount; } *ops = tcount; }; for (int irun = 0; irun < numRuns; irun++) { if (irun == 0) { className = TM::className(); cout << "##### " << TM::className() << " ##### \n"; } thread enqdeqThreads[numThreads]; for (int tid = 0; tid < numThreads; tid++) enqdeqThreads[tid] = thread(func, &ops[tid][irun], tid); auto startBeats = steady_clock::now(); startFlag.store(true); // Sleep for 20 seconds this_thread::sleep_for(testLengthSeconds); quit.store(true); auto stopBeats = steady_clock::now(); for (int tid = 0; tid < numThreads; tid++) enqdeqThreads[tid].join(); lengthSec[irun] = (stopBeats-startBeats).count(); startFlag.store(false); quit.store(false); } TM::updateTx([&] () { for (int i = 0; i < arraySize; i++) TM::template tmDelete(parray[i]); }); delete[] parray; // Accounting vector agg(numRuns); for (int irun = 0; irun < numRuns; irun++) { for(int i=0;i> 12; // a x ^= x << 25; // b x ^= x >> 27; // c return x * 2685821657736338717LL; } }; #endif ================================================ FILE: graphs/BenchmarkSets.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _BENCHMARK_SETS_H_ #define _BENCHMARK_SETS_H_ #include #include #include #include #include #include #include using namespace std; using namespace chrono; // Regular UserData struct UserData { long long seq; int tid; UserData(long long lseq, int ltid=0) { this->seq = lseq; this->tid = ltid; } UserData() { this->seq = -2; this->tid = -2; } UserData(const UserData &other) : seq(other.seq), tid(other.tid) { } bool operator < (const UserData& other) const { return seq < other.seq; } bool operator == (const UserData& other) const { return seq == other.seq && tid == other.tid; } bool operator != (const UserData& other) const { return seq != other.seq || tid != other.tid; } }; namespace std { template <> struct hash { std::size_t operator()(const UserData& k) const { using std::size_t; using std::hash; return (hash()(k.seq)); // This hash has no collisions, which is irealistic } }; } /** * This is a micro-benchmark of sets */ class BenchmarkSets { private: struct Result { nanoseconds nsEnq = 0ns; nanoseconds nsDeq = 0ns; long long numEnq = 0; long long numDeq = 0; long long totOpsSec = 0; Result() { } Result(const Result &other) { nsEnq = other.nsEnq; nsDeq = other.nsDeq; numEnq = other.numEnq; numDeq = other.numDeq; totOpsSec = other.totOpsSec; } bool operator < (const Result& other) const { return totOpsSec < other.totOpsSec; } }; static const long long NSEC_IN_SEC = 1000000000LL; int numThreads; public: BenchmarkSets(int numThreads) { this->numThreads = numThreads; } /** * When doing "updates" we execute a random removal and if the removal is successful we do an add() of the * same item immediately after. This keeps the size of the data structure equal to the original size (minus * MAX_THREADS items at most) which gives more deterministic results. */ template long long benchmark(std::string& className, const int updateRatio, const seconds testLengthSeconds, const int numRuns, const int numElements, const bool dedicated=false) { long long ops[numThreads][numRuns]; long long lengthSec[numRuns]; atomic quit = { false }; atomic startFlag = { false }; className = S::className(); std::cout << "##### " << S::className() << " ##### \n"; S* set = new S(numThreads); // Create all the keys in the concurrent set K** udarray = new K*[numElements]; for (int i = 0; i < numElements; i++) udarray[i] = new K(i); // Add all the items to the list set->addAll(udarray, numElements, 0); // Can either be a Reader or a Writer auto rw_lambda = [this,&quit,&startFlag,&set,&udarray,&numElements](const int updateRatio, long long *ops, const int tid) { long long numOps = 0; while (!startFlag.load()) ; // spin uint64_t seed = tid+1234567890123456781ULL; while (!quit.load()) { seed = randomLong(seed); int update = seed%1000; seed = randomLong(seed); auto ix = (unsigned int)(seed%numElements); if (update < updateRatio) { // I'm a Writer if (set->remove(*udarray[ix], tid)) { numOps++; set->add(*udarray[ix], tid); } numOps++; } else { // I'm a Reader set->contains(*udarray[ix], tid); seed = randomLong(seed); ix = (unsigned int)(seed%numElements); set->contains(*udarray[ix], tid); numOps += 2; } } *ops = numOps; }; for (int irun = 0; irun < numRuns; irun++) { thread rwThreads[numThreads]; if (dedicated) { rwThreads[0] = thread(rw_lambda, 1000, &ops[0][irun], 0); rwThreads[1] = thread(rw_lambda, 1000, &ops[1][irun], 1); for (int tid = 2; tid < numThreads; tid++) rwThreads[tid] = thread(rw_lambda, updateRatio, &ops[tid][irun], tid); } else { for (int tid = 0; tid < numThreads; tid++) rwThreads[tid] = thread(rw_lambda, updateRatio, &ops[tid][irun], tid); } this_thread::sleep_for(100ms); auto startBeats = steady_clock::now(); startFlag.store(true); // Sleep for testLengthSeconds seconds this_thread::sleep_for(testLengthSeconds); quit.store(true); auto stopBeats = steady_clock::now(); for (int tid = 0; tid < numThreads; tid++) rwThreads[tid].join(); lengthSec[irun] = (stopBeats-startBeats).count(); if (dedicated) { // We don't account for the write-only operations but we aggregate the values from the two threads and display them std::cout << "Mutative transactions per second = " << (ops[0][irun] + ops[1][irun])*1000000000LL/lengthSec[irun] << "\n"; ops[0][irun] = 0; ops[1][irun] = 0; } quit.store(false); startFlag.store(false); // Compute ops at the end of each run long long agg = 0; for (int tid = 0; tid < numThreads; tid++) { agg += ops[tid][irun]*1000000000LL/lengthSec[irun]; } } // Clear the set, one key at a time and then delete the instance for (int i = 0; i < numElements; i++) set->remove(*udarray[i], 0); delete set; for (int i = 0; i < numElements; i++) delete udarray[i]; delete[] udarray; // Accounting vector agg(numRuns); for (int irun = 0; irun < numRuns; irun++) { for (int tid = 0; tid < numThreads; tid++) { agg[irun] += ops[tid][irun]*1000000000LL/lengthSec[irun]; } } // Compute the median. numRuns must be an odd number sort(agg.begin(),agg.end()); auto maxops = agg[numRuns-1]; auto minops = agg[0]; auto medianops = agg[numRuns/2]; auto delta = (long)(100.*(maxops-minops) / ((double)medianops)); // Printed value is the median of the number of ops per second that all threads were able to accomplish (on average) std::cout << "Ops/sec = " << medianops << " delta = " << delta << "% min = " << minops << " max = " << maxops << "\n"; return medianops; } /* * Inspired by Trevor Brown's benchmarks (does everyone else do it like this?) */ template long long benchmarkRandomFill(std::string& className, const int updateRatio, const seconds testLengthSeconds, const int numRuns, const int numElements, const bool dedicated=false) { long long ops[numThreads][numRuns]; long long lengthSec[numRuns]; atomic quit = { false }; atomic startFlag = { false }; className = S::className(); std::cout << "##### " << S::className() << " ##### \n"; S* set = new S(numThreads); // Create all the keys in the concurrent set K** udarray = new K*[2*numElements]; for (int i = 0; i < 2*numElements; i++) udarray[i] = new K(i); // Add half the keys to the list long ielem = 0; uint64_t seed = 1234567890123456781ULL; while (ielem < numElements/2) { seed = randomLong(seed); // Insert new random keys until we have 'numElements/2' keys in the tree if (set->add(*udarray[seed%(numElements)], 0)) ielem++; } // Add all keys, repeating if needed set->addAll(udarray, numElements, 0); // Can either be a Reader or a Writer auto rw_lambda = [this,&quit,&startFlag,&set,&udarray,&numElements](const int updateRatio, long long *ops, const int tid) { long long numOps = 0; while (!startFlag.load()) ; // spin uint64_t seed = tid+1234567890123456781ULL; while (!quit.load()) { seed = randomLong(seed); int update = seed%1000; seed = randomLong(seed); auto ix = (unsigned int)(seed%numElements); if (update < updateRatio) { // I'm a Writer if (set->remove(*udarray[ix], tid)) { numOps++; set->add(*udarray[ix], tid); } numOps++; } else { // I'm a Reader set->contains(*udarray[ix], tid); seed = randomLong(seed); ix = (unsigned int)(seed%numElements); set->contains(*udarray[ix], tid); numOps += 2; } } *ops = numOps; }; for (int irun = 0; irun < numRuns; irun++) { thread rwThreads[numThreads]; if (dedicated) { rwThreads[0] = thread(rw_lambda, 1000, &ops[0][irun], 0); rwThreads[1] = thread(rw_lambda, 1000, &ops[1][irun], 1); for (int tid = 2; tid < numThreads; tid++) rwThreads[tid] = thread(rw_lambda, updateRatio, &ops[tid][irun], tid); } else { for (int tid = 0; tid < numThreads; tid++) rwThreads[tid] = thread(rw_lambda, updateRatio, &ops[tid][irun], tid); } this_thread::sleep_for(100ms); auto startBeats = steady_clock::now(); startFlag.store(true); // Sleep for testLengthSeconds seconds this_thread::sleep_for(testLengthSeconds); quit.store(true); auto stopBeats = steady_clock::now(); for (int tid = 0; tid < numThreads; tid++) rwThreads[tid].join(); lengthSec[irun] = (stopBeats-startBeats).count(); if (dedicated) { // We don't account for the write-only operations but we aggregate the values from the two threads and display them std::cout << "Mutative transactions per second = " << (ops[0][irun] + ops[1][irun])*1000000000LL/lengthSec[irun] << "\n"; ops[0][irun] = 0; ops[1][irun] = 0; } quit.store(false); startFlag.store(false); // Compute ops at the end of each run long long agg = 0; for (int tid = 0; tid < numThreads; tid++) { agg += ops[tid][irun]*1000000000LL/lengthSec[irun]; } } /* Clear the tree, one key at a time and then delete the instance */ for (int i = 0; i < numElements; i++) set->remove(*udarray[i], 0); delete set; for (int i = 0; i < numElements; i++) delete udarray[i]; delete[] udarray; // Accounting vector agg(numRuns); for (int irun = 0; irun < numRuns; irun++) { for (int tid = 0; tid < numThreads; tid++) { agg[irun] += ops[tid][irun]*1000000000LL/lengthSec[irun]; } } // Compute the median. numRuns must be an odd number sort(agg.begin(),agg.end()); auto maxops = agg[numRuns-1]; auto minops = agg[0]; auto medianops = agg[numRuns/2]; auto delta = (long)(100.*(maxops-minops) / ((double)medianops)); // Printed value is the median of the number of ops per second that all threads were able to accomplish (on average) std::cout << "Ops/sec = " << medianops << " delta = " << delta << "% min = " << minops << " max = " << maxops << "\n"; return medianops; } /** * An imprecise but fast random number generator */ uint64_t randomLong(uint64_t x) { x ^= x >> 12; // a x ^= x << 25; // b x ^= x >> 27; // c return x * 2685821657736338717LL; } }; #endif ================================================ FILE: graphs/Makefile ================================================ CXX = g++-8 CXXFLAGS = -std=c++17 -g -O2 -DPWB_IS_CLFLUSHOPT # -fuse-ld=gold -fsanitize=address # For castor-1 #CXXFLAGS = -std=c++17 -g -O2 -DPWB_IS_CLWB -DPM_REGION_SIZE=64*1024*1024*1024ULL -DPM_USE_DAX -DPM_FILE_NAME="\"/mnt/pmem0/durable\"" # Possible options for PWB are: # -DPWB_IS_CLFLUSH pwb is a CLFLUSH and pfence/psync are nops (Broadwell) # -DPWB_IS_CLFLUSHOPT pwb is a CLFLUSHOPT and pfence/psync are SFENCE (Kaby Lake) # -DPWB_IS_CLWB pwb is a CLWB and pfence/psync are SFENCE (Sky Lake SP, or Canon Lake SP and beyond) # -DPWB_IS_NOP pwb/pfence/psync are nops. Used for shared memory persistence INCLUDES = -I../ -I../common/ #LIBS = -l/home/vagrant/tinystm/lib/libstm.a # This library is needed for ESTM ESTM_LIB = -L../stms/estm-0.3.0/lib/ -lstm -lpthread # This library is needed for TinySTM TINYSTM_LIB = -L../stms/tinystm/lib/ -lstm -lpthread TINYSTM_INC = -I../stms/tinystm/ # This library is needed for PMDK PMDKLIBS = -L/usr/local/lib -lpmemobj BINARIES = \ bin/sps-integer \ bin/sps-integer-tiny \ bin/sps-object \ bin/sps-object-tiny \ bin/set-ll-1k \ bin/set-ll-1k-tiny \ bin/set-ll-10k \ bin/set-ll-10k-tiny \ bin/set-tree-1k \ bin/set-tree-1k-tiny \ bin/set-tree-10k \ bin/set-tree-10k-tiny \ bin/set-tree-1m \ bin/set-tree-1m-tiny \ bin/set-hash-1k \ bin/set-hash-1k-tiny \ bin/q-ll-enq-deq \ bin/q-ll-enq-deq-tiny \ bin/q-array-enq-deq \ bin/q-array-enq-deq-tiny \ bin/psps-integer \ bin/pset-ll-1k \ bin/pset-ll-10k \ bin/pset-hash-1k \ bin/pset-tree-1k \ bin/pq-ll-enq-deq \ bin/latency-counter \ bin/latency-counter-tiny \ bin/pset-tree-1m-oflf \ bin/pset-tree-1m-ofwf \ bin/pset-tree-1m-pmdk \ bin/pset-tree-1m-romlog \ bin/pset-tree-1m-romlr \ # bin/pset-tree-1m-pmdk \ # bin/pread-while-writing-romlog \ bin/pread-while-writing-romlr \ bin/pread-while-writing-oflf \ bin/pread-while-writing-ofwf \ bin/pread-while-writing-pmdk \ STMS = \ ../stms/CRWWPSTM.hpp \ ../stms/OneFileLF.hpp \ ../stms/OneFileWF.hpp \ ../stms/TinySTM.hpp \ ../stms/tinystm/lib/libstm.a \ PTMS = \ ../ptms/OneFilePTMLF.hpp \ ../ptms/OneFilePTMWF.hpp \ ../ptms/PMDKTM.hpp \ lib/libromulus.a \ ../ptms/romuluslog/RomulusLog.hpp \ ../ptms/romuluslr/RomulusLR.hpp \ SRC_LISTS = \ ../datastructures/linkedlists/CRWWPLinkedListSet.hpp \ ../datastructures/linkedlists/ESTMLinkedListSet.hpp \ ../datastructures/linkedlists/OFLFLinkedListSet.hpp \ ../datastructures/linkedlists/OFWFLinkedListSet.hpp \ ../datastructures/linkedlists/STMLinkedListSet.hpp \ ../datastructures/linkedlists/TinySTMLinkedListSet.hpp \ SRC_TREES = \ ../datastructures/treemaps/ESTMRedBlackTree.hpp \ ../datastructures/treemaps/NatarajanTreeHE.hpp \ ../datastructures/treemaps/OFLFRedBlackTree.hpp \ ../datastructures/treemaps/OFWFRedBlackTree.hpp \ QUEUES_DEP = \ ../datastructures/queues/ESTMArrayLinkedListQueue.hpp \ ../datastructures/queues/ESTMLinkedListQueue.hpp \ ../datastructures/queues/FAAArrayQueue.hpp \ ../datastructures/queues/LCRQueue.hpp \ ../datastructures/queues/MichaelScottQueue.hpp \ ../datastructures/queues/OFLFArrayLinkedListQueue.hpp \ ../datastructures/queues/OFLFLinkedListQueue.hpp \ ../datastructures/queues/OFWFArrayLinkedListQueue.hpp \ ../datastructures/queues/OFWFLinkedListQueue.hpp \ ../datastructures/queues/TurnQueue.hpp \ PQUEUES_DEP = \ ../pdatastructures/pqueues/MichaelScottQueue.hpp \ ../pdatastructures/pqueues/PFriedmanQueue.hpp \ ../pdatastructures/pqueues/PMDKLinkedListQueue.hpp \ ../pdatastructures/pqueues/PMichaelScottQueue.hpp \ ../pdatastructures/TMLinkedListQueue.hpp \ ../pdatastructures/pqueues/POFLFLinkedListQueue.hpp \ ../pdatastructures/pqueues/POFWFLinkedListQueue.hpp \ ../pdatastructures/pqueues/RomLogLinkedListQueue.hpp \ ../pdatastructures/pqueues/RomLRLinkedListQueue.hpp \ ROMULUS_LIB_SRC = \ ../common/ThreadRegistry.cpp \ ../ptms/romuluslog/malloc.cpp \ ../ptms/romuluslog/RomulusLog.cpp \ ../ptms/romuluslr/malloc.cpp \ ../ptms/romuluslr/RomulusLR.cpp \ ROMULUS_LIB_DEP = \ $(ROMULUS_LIB_SRC) \ ../ptms/romuluslog/RomulusLog.hpp \ ../ptms/romuluslr/RomulusLR.hpp \ TREVOR_BROWN_INCLUDES = \ -I../datastructures/trevor_brown_abtree/common/recordmgr \ -I../datastructures/trevor_brown_abtree/common \ -I../datastructures/trevor_brown_abtree/common/descriptors \ -I../datastructures/trevor_brown_abtree/common/rq \ -I../datastructures/trevor_brown_abtree/common/rq \ -I../datastructures/trevor_brown_abtree/common/atomic_ops \ all: $(BINARIES) persistencyclean clean: persistencyclean rm -f bin/* rm -f lib/* persistencyclean: rm -f /dev/shm/*_shared rm -f /dev/shm/psegments/* # # Create a library for RomulusLog and RomulusLR # lib/threadregistry.o: $(ROMULUS_LIB_DEP) $(CXX) $(CXXFLAGS) $(INCLUDES) -c ../common/ThreadRegistry.cpp -o lib/threadregistry.o lib/mallocromlog.o: $(ROMULUS_LIB_DEP) $(CXX) $(CXXFLAGS) $(INCLUDES) -c ../ptms/romuluslog/malloc.cpp -o lib/mallocromlog.o lib/romlog.o: $(ROMULUS_LIB_DEP) $(CXX) $(CXXFLAGS) $(INCLUDES) -c ../ptms/romuluslog/RomulusLog.cpp -o lib/romlog.o lib/mallocromlr.o: $(ROMULUS_LIB_DEP) $(CXX) $(CXXFLAGS) $(INCLUDES) -c ../ptms/romuluslr/malloc.cpp -o lib/mallocromlr.o lib/romlr.o: $(ROMULUS_LIB_DEP) $(CXX) $(CXXFLAGS) $(INCLUDES) -c ../ptms/romuluslr/RomulusLR.cpp -o lib/romlr.o lib/libromulus.a: lib/threadregistry.o lib/mallocromlog.o lib/romlog.o lib/mallocromlr.o lib/romlr.o ar rcs lib/libromulus.a lib/threadregistry.o lib/mallocromlog.o lib/romlog.o lib/mallocromlr.o lib/romlr.o # # Queues for volatile memory # bin/q-ll-enq-deq: q-ll-enq-deq.cpp $(STMS) $(QUEUES_DEP) $(CXX) $(CXXFLAGS) $(INCLUDES) $(CSRCS) q-ll-enq-deq.cpp -o bin/q-ll-enq-deq -lpthread $(ESTM_LIB) bin/q-array-enq-deq: q-array-enq-deq.cpp $(STMS) $(QUEUES_DEP) $(CXX) $(CXXFLAGS) $(INCLUDES) $(CSRCS) q-array-enq-deq.cpp -o bin/q-array-enq-deq -lpthread $(ESTM_LIB) bin/q-ll-burst: q-ll-burst.cpp $(QUEUES_DEP) $(CXX) $(CXXFLAGS) $(INCLUDES) $(CSRCS) q-ll-burst.cpp -o bin/q-ll-burst -lpthread $(ESTM_LIB) # Same as above but for TinySTM bin/q-ll-enq-deq-tiny: q-ll-enq-deq.cpp $(STMS) $(QUEUES_DEP) $(CXX) $(CXXFLAGS) -DUSE_TINY $(INCLUDES) $(TINYSTM_INC) $(CSRCS) q-ll-enq-deq.cpp -o bin/q-ll-enq-deq-tiny -lpthread $(TINYSTM_LIB) bin/q-array-enq-deq-tiny: q-array-enq-deq.cpp $(STMS) $(QUEUES_DEP) $(CXX) $(CXXFLAGS) -DUSE_TINY $(INCLUDES) $(TINYSTM_INC) $(CSRCS) q-array-enq-deq.cpp -o bin/q-array-enq-deq-tiny -lpthread $(TINYSTM_LIB) # # Queues for persistent memory # bin/pq-ll-enq-deq: pq-ll-enq-deq.cpp $(PTMS) $(PQUEUES_DEP) PBenchmarkQueues.hpp $(CXX) $(CXXFLAGS) $(INCLUDES) pq-ll-enq-deq.cpp -o bin/pq-ll-enq-deq -lpthread $(PMDKLIBS) lib/libromulus.a # # Sets for volatile memory # bin/set-ll-1k: set-ll-1k.cpp $(STMS) $(SRC_LISTS) $(CXX) $(CXXFLAGS) $(INCLUDES) $(CSRCS) set-ll-1k.cpp -o bin/set-ll-1k -lpthread $(ESTM_LIB) bin/set-ll-10k: set-ll-10k.cpp $(STMS) $(SRC_LISTS) $(CXX) $(CXXFLAGS) $(INCLUDES) $(CSRCS) set-ll-10k.cpp -o bin/set-ll-10k -lpthread $(ESTM_LIB) bin/set-tree-1k: set-tree-1k.cpp $(STMS) $(SRC_TREES) $(CXX) $(CXXFLAGS) -fuse-ld=gold -fsanitize=address $(INCLUDES) $(TREVOR_BROWN_INCLUDES) ../common/ThreadRegistry.cpp $(CSRCS) set-tree-1k.cpp -o bin/set-tree-1k -lpthread $(ESTM_LIB) bin/set-tree-10k: set-tree-10k.cpp $(STMS) $(SRC_TREES) $(CXX) $(CXXFLAGS) $(INCLUDES) $(TREVOR_BROWN_INCLUDES) ../common/ThreadRegistry.cpp $(CSRCS) set-tree-10k.cpp -o bin/set-tree-10k -lpthread $(ESTM_LIB) bin/set-tree-1m: set-tree-1m.cpp $(STMS) $(SRC_TREES) $(CXX) $(CXXFLAGS) $(INCLUDES) $(TREVOR_BROWN_INCLUDES) ../common/ThreadRegistry.cpp $(CSRCS) set-tree-1m.cpp -o bin/set-tree-1m -lpthread $(ESTM_LIB) bin/set-hash-1k: set-hash-1k.cpp $(STMS) $(CXX) $(CXXFLAGS) $(INCLUDES) $(CSRCS) set-hash-1k.cpp -o bin/set-hash-1k -lpthread $(ESTM_LIB) # Same as above, but for Tiny STM only bin/set-ll-1k-tiny: set-ll-1k.cpp $(STMS) $(SRC_LISTS) $(CXX) $(CXXFLAGS) -DUSE_TINY $(INCLUDES) $(TINYSTM_INC) $(CSRCS) set-ll-1k.cpp -o bin/set-ll-1k-tiny -lpthread $(TINYSTM_LIB) bin/set-ll-10k-tiny: set-ll-10k.cpp $(STMS) $(SRC_LISTS) $(CXX) $(CXXFLAGS) -DUSE_TINY $(INCLUDES) $(TINYSTM_INC) $(CSRCS) set-ll-10k.cpp -o bin/set-ll-10k-tiny -lpthread $(TINYSTM_LIB) bin/set-tree-1k-tiny: set-tree-1k.cpp $(STMS) $(CXX) $(CXXFLAGS) -DUSE_TINY $(INCLUDES) $(TINYSTM_INC) $(CSRCS) set-tree-1k.cpp -o bin/set-tree-1k-tiny -lpthread $(TINYSTM_LIB) bin/set-tree-10k-tiny: set-tree-10k.cpp $(STMS) $(CXX) $(CXXFLAGS) -DUSE_TINY $(INCLUDES) $(TINYSTM_INC) $(CSRCS) set-tree-10k.cpp -o bin/set-tree-10k-tiny -lpthread $(TINYSTM_LIB) bin/set-tree-1m-tiny: set-tree-1m.cpp $(STMS) $(CXX) $(CXXFLAGS) -DUSE_TINY $(INCLUDES) $(TINYSTM_INC) $(CSRCS) set-tree-1m.cpp -o bin/set-tree-1m-tiny -lpthread $(TINYSTM_LIB) bin/set-hash-1k-tiny: set-hash-1k.cpp $(STMS) $(CXX) $(CXXFLAGS) -DUSE_TINY $(INCLUDES) $(TINYSTM_INC) $(CSRCS) set-hash-1k.cpp -o bin/set-hash-1k-tiny -lpthread $(TINYSTM_LIB) # # Sets for persistent memory # bin/pset-ll-1k: pset-ll-1k.cpp $(PTMS) PBenchmarkSets.hpp ../pdatastructures/TMLinkedListSet.hpp lib/libromulus.a $(CXX) $(CXXFLAGS) $(INCLUDES) pset-ll-1k.cpp -o bin/pset-ll-1k -lpthread $(PMDKLIBS) lib/libromulus.a bin/pset-ll-10k: pset-ll-10k.cpp $(PTMS) PBenchmarkSets.hpp ../pdatastructures/TMLinkedListSet.hpp lib/libromulus.a $(CXX) $(CXXFLAGS) $(INCLUDES) pset-ll-10k.cpp -o bin/pset-ll-10k -lpthread $(PMDKLIBS) lib/libromulus.a bin/pset-hash-1k: pset-hash-1k.cpp $(PTMS) PBenchmarkSets.hpp ../pdatastructures/TMHashMap.hpp lib/libromulus.a $(CXX) $(CXXFLAGS) $(INCLUDES) pset-hash-1k.cpp -o bin/pset-hash-1k -lpthread $(PMDKLIBS) lib/libromulus.a bin/pset-tree-1k: pset-tree-1k.cpp $(PTMS) PBenchmarkSets.hpp ../pdatastructures/TMRedBlackTree.hpp lib/libromulus.a $(CXX) $(CXXFLAGS) $(INCLUDES) pset-tree-1k.cpp -o bin/pset-tree-1k -lpthread $(PMDKLIBS) lib/libromulus.a bin/pset-tree-1m: pset-tree-1m.cpp $(PTMS) PBenchmarkSets.hpp ../pdatastructures/TMRedBlackTree.hpp lib/libromulus.a $(CXX) $(CXXFLAGS) $(INCLUDES) pset-tree-1m.cpp -o bin/pset-tree-1m -lpthread $(PMDKLIBS) lib/libromulus.a # # SPS for volatile memory # bin/sps-integer: sps-integer.cpp $(STMS) BenchmarkSPS.hpp $(CXX) $(CXXFLAGS) $(INCLUDES) $(CSRCS) sps-integer.cpp -o bin/sps-integer -lpthread $(ESTM_LIB) bin/sps-integer-tiny: sps-integer.cpp $(STMS) BenchmarkSPS.hpp $(CXX) $(CXXFLAGS) -DUSE_TINY $(INCLUDES) $(TINYSTM_INC) $(CSRCS) sps-integer.cpp -o bin/sps-integer-tiny -lpthread $(TINYSTM_LIB) bin/sps-object: sps-object.cpp $(STMS) BenchmarkSPS.hpp $(CXX) $(CXXFLAGS) $(INCLUDES) $(CSRCS) sps-object.cpp -o bin/sps-object -lpthread $(ESTM_LIB) bin/sps-object-tiny: sps-object.cpp $(STMS) BenchmarkSPS.hpp $(CXX) $(CXXFLAGS) -DUSE_TINY $(INCLUDES) $(TINYSTM_INC) $(CSRCS) sps-object.cpp -o bin/sps-object-tiny -lpthread $(TINYSTM_LIB) # # SPS for persistent memory # bin/psps-integer: psps-integer.cpp $(PTMS) PBenchmarkSPS.hpp lib/libromulus.a $(CXX) $(CXXFLAGS) $(INCLUDES) psps-integer.cpp -o bin/psps-integer -lpthread $(PMDKLIBS) lib/libromulus.a bin/psps-integer-atlas: psps-integer-atlas.cpp $(PTMS) PBenchmarkSPS.hpp persistencyclean lib/libromulus.a $(CXX) $(CXXFLAGS) $(INCLUDES) psps-integer-atlas.cpp -o bin/psps-integer-atlas -lpthread lib/libromulus.a # TODO: is it worth doing sps-object for PTMs ? # # Latency for STMs # bin/latency-counter: latency-counter.cpp $(STMS) BenchmarkLatencyCounter.hpp $(CXX) $(CXXFLAGS) $(INCLUDES) $(CSRCS) latency-counter.cpp -o bin/latency-counter -lpthread $(ESTM_LIB) bin/latency-counter-tiny: latency-counter.cpp $(STMS) BenchmarkLatencyCounter.hpp $(CXX) $(CXXFLAGS) -DUSE_TINY $(INCLUDES) $(TINYSTM_INC) $(CSRCS) latency-counter.cpp -o bin/latency-counter-tiny -lpthread $(TINYSTM_LIB) # # Persistent balanced tree with 1M keys. Must be compiled one at a time otherwise you get all the NVM heaps allocated, which is too much # bin/pset-tree-1m-romlog: pset-tree-1m.cpp PBenchmarkSets.hpp ../pdatastructures/TMRedBlackTree.hpp lib/libromulus.a $(CXX) $(CXXFLAGS) -DUSE_ROMLOG $(INCLUDES) pset-tree-1m.cpp -o bin/pset-tree-1m-romlog -lpthread lib/libromulus.a bin/pset-tree-1m-romlr: pset-tree-1m.cpp PBenchmarkSets.hpp ../pdatastructures/TMRedBlackTree.hpp lib/libromulus.a $(CXX) $(CXXFLAGS) -DUSE_ROMLR $(INCLUDES) pset-tree-1m.cpp -o bin/pset-tree-1m-romlr -lpthread lib/libromulus.a bin/pset-tree-1m-oflf: pset-tree-1m.cpp PBenchmarkSets.hpp ../pdatastructures/TMRedBlackTree.hpp ../ptms/OneFilePTMLF.hpp $(CXX) $(CXXFLAGS) -DUSE_OFLF $(INCLUDES) pset-tree-1m.cpp -o bin/pset-tree-1m-oflf -lpthread bin/pset-tree-1m-ofwf: pset-tree-1m.cpp PBenchmarkSets.hpp ../pdatastructures/TMRedBlackTree.hpp ../ptms/OneFilePTMWF.hpp $(CXX) $(CXXFLAGS) -DUSE_OFWF $(INCLUDES) pset-tree-1m.cpp -o bin/pset-tree-1m-ofwf -lpthread bin/pset-tree-1m-pmdk: pset-tree-1m.cpp PBenchmarkSets.hpp ../pdatastructures/TMRedBlackTree.hpp $(CXX) $(CXXFLAGS) -DUSE_PMDK $(INCLUDES) pset-tree-1m.cpp -o bin/pset-tree-1m-pmdk -lpthread $(PMDKLIBS) # experimental... bin/pread-while-writing-romlog: pread-while-writing.cpp PBenchmarkSets.hpp ../pdatastructures/TMRedBlackTree.hpp lib/libromulus.a $(CXX) $(CXXFLAGS) -DUSE_ROMLOG $(INCLUDES) pread-while-writing.cpp -o bin/pread-while-writing-romlog -lpthread lib/libromulus.a bin/pread-while-writing-romlr: pread-while-writing.cpp PBenchmarkSets.hpp ../pdatastructures/TMRedBlackTree.hpp lib/libromulus.a $(CXX) $(CXXFLAGS) -DUSE_ROMLR $(INCLUDES) pread-while-writing.cpp -o bin/pread-while-writing-romlr -lpthread lib/libromulus.a bin/pread-while-writing-oflf: pread-while-writing.cpp PBenchmarkSets.hpp ../pdatastructures/TMRedBlackTree.hpp ../ptms/OneFilePTMLF.hpp $(CXX) $(CXXFLAGS) -DUSE_OFLF $(INCLUDES) pread-while-writing.cpp -o bin/pread-while-writing-oflf -lpthread bin/pread-while-writing-ofwf: pread-while-writing.cpp PBenchmarkSets.hpp ../pdatastructures/TMRedBlackTree.hpp ../ptms/OneFilePTMWF.hpp $(CXX) $(CXXFLAGS) -DUSE_OFWF $(INCLUDES) pread-while-writing.cpp -o bin/pread-while-writing-ofwf -lpthread bin/pread-while-writing-pmdk: pread-while-writing.cpp PBenchmarkSets.hpp ../pdatastructures/TMRedBlackTree.hpp $(CXX) $(CXXFLAGS) -DUSE_PMDK $(INCLUDES) pread-while-writing.cpp -o bin/pread-while-writing-pmdk -lpthread $(PMDKLIBS) ================================================ FILE: graphs/PBenchmarkQueues.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _PERSISTENT_BENCHMARK_Q_H_ #define _PERSISTENT_BENCHMARK_Q_H_ #include #include #include #include #include #include #include using namespace std; using namespace chrono; struct UserData { long long seq; int tid; UserData(long long lseq, int ltid) { this->seq = lseq; this->tid = ltid; } UserData() { this->seq = -2; this->tid = -2; } UserData(const UserData &other) : seq(other.seq), tid(other.tid) { } bool operator < (const UserData& other) const { return seq < other.seq; } }; /** * This is a micro-benchmark for persistent queues */ class PBenchmarkQueues { private: struct Result { nanoseconds nsEnq = 0ns; nanoseconds nsDeq = 0ns; long long numEnq = 0; long long numDeq = 0; long long totOpsSec = 0; Result() { } Result(const Result &other) { nsEnq = other.nsEnq; nsDeq = other.nsDeq; numEnq = other.numEnq; numDeq = other.numDeq; totOpsSec = other.totOpsSec; } bool operator < (const Result& other) const { return totOpsSec < other.totOpsSec; } }; // Performance benchmark constants static const long long kNumPairsWarmup = 1000000LL; // Each threads does 1M iterations as warmup // Contants for Ping-Pong performance benchmark static const int kPingPongBatch = 1000; // Each thread starts by injecting 1k items in the queue static const long long NSEC_IN_SEC = 1000000000LL; int numThreads; public: PBenchmarkQueues(int numThreads) { this->numThreads = numThreads; } /** * enqueue-dequeue pairs: in each iteration a thread executes an enqueue followed by a dequeue; * the benchmark executes 10^8 pairs partitioned evenly among all threads; * WARNING: If you modify this, please modify enqDeqNoTransaction() also */ template uint64_t enqDeq(std::string& className, const long numPairs, const int numRuns) { nanoseconds deltas[numThreads][numRuns]; atomic startFlag = { false }; Q* queue = nullptr; className = Q::className(); cout << "##### " << className << " ##### \n"; auto enqdeq_lambda = [this,&startFlag,&numPairs,&queue](nanoseconds *delta, const int tid) { //UserData* ud = new UserData{0,0}; uint64_t* ud = new uint64_t(42); while (!startFlag.load()) {} // Spin until the startFlag is set // Warmup phase for (long long iter = 0; iter < numPairs/(numThreads*10); iter++) { // Do 1/10 iterations as warmup PTM::updateTx([=] () { queue->enqueue(*ud, tid); if (queue->dequeue(tid) == queue->EMPTY) cout << "Error at warmup dequeueing iter=" << iter << "\n"; }); } // Measurement phase auto startBeats = steady_clock::now(); for (long long iter = 0; iter < numPairs/numThreads; iter++) { PTM::updateTx([=] () { queue->enqueue(*ud, tid); if (queue->dequeue(tid) == queue->EMPTY) cout << "Error at measurement dequeueing iter=" << iter << "\n"; }); } auto stopBeats = steady_clock::now(); *delta = stopBeats - startBeats; }; for (int irun = 0; irun < numRuns; irun++) { PTM::updateTx([&] () { // It's ok to capture by reference, only the main thread is active (but it is not ok for CX-PTM) queue = PTM::template tmNew(); }); thread enqdeqThreads[numThreads]; for (int tid = 0; tid < numThreads; tid++) enqdeqThreads[tid] = thread(enqdeq_lambda, &deltas[tid][irun], tid); startFlag.store(true); // Sleep for 2 seconds just to let the threads see the startFlag this_thread::sleep_for(2s); for (int tid = 0; tid < numThreads; tid++) enqdeqThreads[tid].join(); startFlag.store(false); PTM::updateTx([=] () { PTM::tmDelete(queue); }); } // Sum up all the time deltas of all threads so we can find the median run vector agg(numRuns); for (int irun = 0; irun < numRuns; irun++) { agg[irun] = 0ns; for (int tid = 0; tid < numThreads; tid++) { agg[irun] += deltas[tid][irun]; } } // Compute the median. numRuns should be an odd number sort(agg.begin(),agg.end()); auto median = agg[numRuns/2].count()/numThreads; // Normalize back to per-thread time (mean of time for this run) cout << "Total Ops/sec = " << numPairs*2*NSEC_IN_SEC/median << "\n"; return (numPairs*2*NSEC_IN_SEC/median); } /* * WARNING: If you modify this, please modify enqDeq() also */ template uint64_t enqDeqNoTransaction(std::string& className, const long numPairs, const int numRuns) { nanoseconds deltas[numThreads][numRuns]; atomic startFlag = { false }; Q* queue = nullptr; className = Q::className(); cout << "##### " << className << " ##### \n"; auto enqdeq_lambda = [this,&startFlag,&numPairs,&queue](nanoseconds *delta, const int tid) { uint64_t* ud = new uint64_t(42); while (!startFlag.load()) {} // Spin until the startFlag is set // Warmup phase for (long long iter = 0; iter < numPairs/(numThreads*10); iter++) { // Do 1/10 iterations as warmup queue->enqueue(*ud, tid); if (queue->dequeue(tid) == queue->EMPTY) cout << "Error at warmup dequeueing iter=" << iter << "\n"; } // Measurement phase auto startBeats = steady_clock::now(); for (long long iter = 0; iter < numPairs/numThreads; iter++) { queue->enqueue(*ud, tid); if (queue->dequeue(tid) == queue->EMPTY) cout << "Error at measurement dequeueing iter=" << iter << "\n"; } auto stopBeats = steady_clock::now(); *delta = stopBeats - startBeats; }; for (int irun = 0; irun < numRuns; irun++) { queue = new Q(); // TODO: use a PTM allocator, maybe the one in PMDK thread enqdeqThreads[numThreads]; for (int tid = 0; tid < numThreads; tid++) enqdeqThreads[tid] = thread(enqdeq_lambda, &deltas[tid][irun], tid); startFlag.store(true); // Sleep for 2 seconds just to let the threads see the startFlag this_thread::sleep_for(2s); for (int tid = 0; tid < numThreads; tid++) enqdeqThreads[tid].join(); startFlag.store(false); delete queue; // TODO: use PTM de-allocator } // Sum up all the time deltas of all threads so we can find the median run vector agg(numRuns); for (int irun = 0; irun < numRuns; irun++) { agg[irun] = 0ns; for (int tid = 0; tid < numThreads; tid++) { agg[irun] += deltas[tid][irun]; } } // Compute the median. numRuns should be an odd number sort(agg.begin(),agg.end()); auto median = agg[numRuns/2].count()/numThreads; // Normalize back to per-thread time (mean of time for this run) cout << "Total Ops/sec = " << numPairs*2*NSEC_IN_SEC/median << "\n"; return (numPairs*2*NSEC_IN_SEC/median); } /** * Start with only enqueues 100K/numThreads, wait for them to finish, then do only dequeues but only 100K/numThreads * TODO: must fix this for persistency, not yet working */ template void burst(std::string& className, uint64_t& resultsEnq, uint64_t& resultsDeq, const long long burstSize, const int numIters, const int numRuns, const bool isSC=false) { Result results[numThreads][numRuns]; atomic startEnq = { false }; atomic startDeq = { false }; atomic barrier = { 0 }; Q* queue = nullptr; auto burst_lambda = [this,&startEnq,&startDeq,&burstSize,&barrier,&numIters,&isSC,&queue](Result *res, const int tid) { UserData ud(0,0); // Warmup only if it is not Single-Consumer if (!isSC) { const long long warmupIters = 100000LL; // Do 100K for each thread as a warmup for (long long iter = 0; iter < warmupIters; iter++) queue->enqueue(&ud, tid); for (long long iter = 0; iter < warmupIters; iter++) { if (queue->dequeue(tid) == nullptr) cout << "ERROR: warmup dequeued nullptr in iter=" << iter << "\n"; } } // Measurements for (int iter = 0; iter < numIters; iter++) { // Start with enqueues while (!startEnq.load()) {} // spin is better than yield here auto startBeats = steady_clock::now(); for (long long i = 0; i < burstSize/numThreads; i++) { queue->enqueue(&ud, tid); } auto stopBeats = steady_clock::now(); res->nsEnq += (stopBeats-startBeats); res->numEnq += burstSize/numThreads; if (barrier.fetch_add(1) == numThreads) cout << "ERROR: in barrier\n"; // dequeues while (!startDeq.load()) { } // spin is better than yield here if (isSC) { // Handle the single-consumer case if (tid == 0) { startBeats = steady_clock::now(); // We need to deal with rounding errors in the single-consumer case for (long long i = 0; i < ((long long)(burstSize/numThreads))*numThreads; i++) { if (queue->dequeue(tid) == nullptr) { cout << "ERROR: dequeued nullptr in iter=" << i << "\n"; assert(false); } } stopBeats = steady_clock::now(); if (queue->dequeue(tid) != nullptr) cout << "ERROR: dequeued non-null, there must be duplicate items!\n"; res->nsDeq += (stopBeats-startBeats); res->numDeq += burstSize/numThreads; } } else { startBeats = steady_clock::now(); for (long long i = 0; i < burstSize/numThreads; i++) { if (queue->dequeue(tid) == nullptr) { cout << "ERROR: dequeued nullptr in iter=" << i << "\n"; assert(false); } } stopBeats = steady_clock::now(); res->nsDeq += (stopBeats-startBeats); res->numDeq += burstSize/numThreads; } if (barrier.fetch_add(1) == numThreads) cout << "ERROR: in barrier\n"; } }; for (int irun = 0; irun < numRuns; irun++) { queue = new Q(numThreads); if (irun == 0) { className = queue->className(); cout << "##### " << queue->className() << " ##### \n"; } thread burstThreads[numThreads]; for (int tid = 0; tid < numThreads; tid++) burstThreads[tid] = thread(burst_lambda, &results[tid][irun], tid); this_thread::sleep_for(100ms); for (int iter=0; iter < numIters; iter++) { // enqueue round startEnq.store(true); while (barrier.load() != numThreads) this_thread::yield(); startEnq.store(false); long tmp = numThreads; if (!barrier.compare_exchange_strong(tmp, 0)) cout << "ERROR: CAS\n"; // dequeue round startDeq.store(true); while (barrier.load() != numThreads) this_thread::yield(); startDeq.store(false); tmp = numThreads; if (!barrier.compare_exchange_strong(tmp, 0)) cout << "ERROR: CAS\n"; } for (int tid = 0; tid < numThreads; tid++) burstThreads[tid].join(); delete queue; } // Accounting vector agg(numRuns); for (int irun = 0; irun < numRuns; irun++) { nanoseconds maxNsEnq = 0ns; nanoseconds maxNsDeq = 0ns; for (int tid = 0; tid < numThreads; tid++) { if (results[tid][irun].nsEnq > maxNsEnq) maxNsEnq = results[tid][irun].nsEnq; if (results[tid][irun].nsDeq > maxNsDeq) maxNsDeq = results[tid][irun].nsDeq; agg[irun].numEnq += results[tid][irun].numEnq; agg[irun].numDeq += results[tid][irun].numDeq; } agg[irun].nsEnq = maxNsEnq; agg[irun].nsDeq = maxNsDeq; agg[irun].totOpsSec = agg[irun].nsEnq.count()+agg[irun].nsDeq.count(); } // Compute the median. numRuns should be an odd number sort(agg.begin(),agg.end()); Result median = agg[numRuns/2]; const long long allThreadsEnqPerSec = median.numEnq*NSEC_IN_SEC/median.nsEnq.count(); const long long allThreadsDeqPerSec = median.numDeq*NSEC_IN_SEC/median.nsDeq.count(); // Printed value is the median of the number of ops per second that all threads were able to accomplish (on average) cout << "Enq/sec = " << allThreadsEnqPerSec << " Deq/sec = " << allThreadsDeqPerSec << "\n"; resultsEnq = allThreadsEnqPerSec; resultsDeq = allThreadsDeqPerSec; } }; #endif ================================================ FILE: graphs/PBenchmarkSPS.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * Nachshon Cohen * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _PERSISTENT_BENCHMARK_SPS_H_ #define _PERSISTENT_BENCHMARK_SPS_H_ #include #include #include #include #include #include #include #include #include static const long arraySize=1000*1000; // 1M entries in the SPS array using namespace std; using namespace chrono; /** * This is a micro-benchmark with integer swaps (SPS) for PTMs */ class PBenchmarkSPS { private: int numThreads; public: struct UserData { long long seq; int tid; UserData(long long lseq, int ltid) { this->seq = lseq; this->tid = ltid; } UserData() { this->seq = -2; this->tid = -2; } UserData(const UserData &other) : seq(other.seq), tid(other.tid) { } bool operator < (const UserData& other) const { return seq < other.seq; } }; PBenchmarkSPS(int numThreads) { this->numThreads = numThreads; } /* * An array of integers that gets randomly permutated. */ template class PERSIST> uint64_t benchmarkSPSInteger(std::string& className, const seconds testLengthSeconds, const long numSwapsPerTx, const int numRuns) { long long ops[numThreads][numRuns]; long long lengthSec[numRuns]; atomic startFlag = { false }; atomic quit = { false }; // Create the array of integers and initialize it, saving it in root pointer 0 int larraySize = arraySize; PTM::template updateTx([larraySize] () { //PTM::pfree( PTM::template get_object>(0) ); // TODO: re-enable this after we add the clear of objects as a transaction in CX PTM::put_object(0, PTM::pmalloc( larraySize*sizeof(PERSIST*) )); return true; }); // Break up the initialization into transactions of 1k stores, so it fits in the log for (long j = 0; j < arraySize; j+=1000) { PTM::template updateTx([larraySize,j] () { PERSIST* parray = PTM::template get_object>(0); for (int i = 0; i < 1000 && i+j < larraySize; i++) parray[i+j] = i+j; return true; } ); } auto func = [this,&startFlag,&quit,&numSwapsPerTx](long long *ops, const int tid) { uint64_t seed = (tid*1024)+tid+1234567890123456781ULL; int larraySize = arraySize; // Spin until the startFlag is set while (!startFlag.load()) {} // Do transactions until the quit flag is set long long tcount = 0; while (!quit.load()) { // Everything has to be captured by value, or get/put in root pointers PTM::template updateTx([seed,numSwapsPerTx,larraySize] () { PERSIST* parray = PTM::template get_object>(0); uint64_t lseed = seed; for (int i = 0; i < numSwapsPerTx; i++) { lseed = randomLong(lseed); auto ia = lseed%arraySize; uint64_t tmp = parray[ia]; lseed = randomLong(lseed); auto ib = lseed%arraySize; parray[ia] = parray[ib]; parray[ib] = tmp; } return true; }); // Can't have capture by ref for wait-free, so replicate seed advance outside tx seed = randomLong(seed); seed = randomLong(seed); ++tcount; /* PE::read_transaction([this,&seed,&parray,&numWordsPerTransaction] () { PersistentArrayInt* read_array = PE::template get_object>(PIDX_INT_ARRAY); // Check that the array is consistent int sum = 0; for (int i = 0; i < arraySize; i++) { sum += read_array->counters[i]; } assert(sum == 0); } ); */ } *ops = tcount; }; for (int irun = 0; irun < numRuns; irun++) { if (irun == 0) { className = PTM::className(); cout << "##### " << PTM::className() << " ##### \n"; } thread enqdeqThreads[numThreads]; for (int tid = 0; tid < numThreads; tid++) enqdeqThreads[tid] = thread(func, &ops[tid][irun], tid); auto startBeats = steady_clock::now(); startFlag.store(true); // Sleep for 20 seconds this_thread::sleep_for(testLengthSeconds); quit.store(true); auto stopBeats = steady_clock::now(); for (int tid = 0; tid < numThreads; tid++) enqdeqThreads[tid].join(); lengthSec[irun] = (stopBeats-startBeats).count(); startFlag.store(false); quit.store(false); } PTM::template updateTx([] () { PTM::pfree( PTM::template get_object>(0) ); PTM::template put_object(0, nullptr); return true; }); // Accounting vector agg(numRuns); for (int irun = 0; irun < numRuns; irun++) { for(int i=0;i> 12; // a x ^= x << 25; // b x ^= x >> 27; // c return x * 2685821657736338717LL; } }; #endif ================================================ FILE: graphs/PBenchmarkSets.hpp ================================================ /* * Copyright 2017-2018 * Andreia Correia * Pedro Ramalhete * Pascal Felber * * This work is published under the MIT license. See LICENSE.txt */ #ifndef _PERSISTENT_BENCHMARK_SETS_H_ #define _PERSISTENT_BENCHMARK_SETS_H_ #include #include #include #include #include #include #include using namespace std; using namespace chrono; template