Repository: electronicarts/EASTL Branch: master Commit: e323f67e5d93 Files: 416 Total size: 6.9 MB Directory structure: gitextract_oaro068o/ ├── .clang-format ├── .gitattributes ├── .github/ │ └── workflows/ │ └── c-cpp.yml ├── .gitignore ├── .p4ignore ├── 3RDPARTYLICENSES.TXT ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── _config.yml ├── benchmark/ │ ├── CMakeLists.txt │ └── source/ │ ├── BenchmarkAlgorithm.cpp │ ├── BenchmarkBitset.cpp │ ├── BenchmarkDeque.cpp │ ├── BenchmarkHash.cpp │ ├── BenchmarkHeap.cpp │ ├── BenchmarkList.cpp │ ├── BenchmarkMap.cpp │ ├── BenchmarkSet.cpp │ ├── BenchmarkSort.cpp │ ├── BenchmarkString.cpp │ ├── BenchmarkTupleVector.cpp │ ├── BenchmarkVector.cpp │ ├── EASTLBenchmark.cpp │ ├── EASTLBenchmark.h │ └── main.cpp ├── doc/ │ ├── Benchmarks.md │ ├── BestPractices.md │ ├── Bonus/ │ │ └── tuple_vector_readme.md │ ├── CMake/ │ │ └── EASTL_Project_Integration.md │ ├── Design.md │ ├── EASTL.natvis │ ├── FAQ.md │ ├── Glossary.md │ ├── Gotchas.md │ ├── Introduction.md │ ├── Maintenance.md │ ├── Modules.md │ └── html/ │ ├── EASTL Benchmarks.html │ ├── EASTL Best Practices.html │ ├── EASTL Design.html │ ├── EASTL FAQ.html │ ├── EASTL Glossary.html │ ├── EASTL Gotchas.html │ ├── EASTL Introduction.html │ ├── EASTL Maintenance.html │ ├── EASTL Modules.html │ └── EASTLDoc.css ├── include/ │ └── EASTL/ │ ├── algorithm.h │ ├── allocator.h │ ├── allocator_malloc.h │ ├── any.h │ ├── array.h │ ├── atomic.h │ ├── atomic_raw.h │ ├── bit.h │ ├── bitset.h │ ├── bitvector.h │ ├── bonus/ │ │ ├── adaptors.h │ │ ├── call_traits.h │ │ ├── compressed_pair.h │ │ ├── fixed_ring_buffer.h │ │ ├── fixed_tuple_vector.h │ │ ├── flags.h │ │ ├── intrusive_sdlist.h │ │ ├── intrusive_slist.h │ │ ├── list_map.h │ │ ├── lru_cache.h │ │ ├── overloaded.h │ │ ├── ring_buffer.h │ │ ├── sort_extra.h │ │ └── tuple_vector.h │ ├── chrono.h │ ├── compare.h │ ├── core_allocator.h │ ├── core_allocator_adapter.h │ ├── deque.h │ ├── expected.h │ ├── finally.h │ ├── fixed_allocator.h │ ├── fixed_function.h │ ├── fixed_hash_map.h │ ├── fixed_hash_set.h │ ├── fixed_list.h │ ├── fixed_map.h │ ├── fixed_set.h │ ├── fixed_slist.h │ ├── fixed_string.h │ ├── fixed_substring.h │ ├── fixed_vector.h │ ├── functional.h │ ├── hash_map.h │ ├── hash_set.h │ ├── heap.h │ ├── initializer_list.h │ ├── internal/ │ │ ├── atomic/ │ │ │ ├── arch/ │ │ │ │ ├── arch.h │ │ │ │ ├── arch_acquire_fence.h │ │ │ │ ├── arch_add_fetch.h │ │ │ │ ├── arch_and_fetch.h │ │ │ │ ├── arch_cmpxchg_strong.h │ │ │ │ ├── arch_cmpxchg_weak.h │ │ │ │ ├── arch_compiler_barrier.h │ │ │ │ ├── arch_cpu_pause.h │ │ │ │ ├── arch_exchange.h │ │ │ │ ├── arch_fetch_add.h │ │ │ │ ├── arch_fetch_and.h │ │ │ │ ├── arch_fetch_or.h │ │ │ │ ├── arch_fetch_sub.h │ │ │ │ ├── arch_fetch_xor.h │ │ │ │ ├── arch_load.h │ │ │ │ ├── arch_memory_barrier.h │ │ │ │ ├── arch_or_fetch.h │ │ │ │ ├── arch_signal_fence.h │ │ │ │ ├── arch_store.h │ │ │ │ ├── arch_sub_fetch.h │ │ │ │ ├── arch_thread_fence.h │ │ │ │ ├── arch_xor_fetch.h │ │ │ │ ├── arm/ │ │ │ │ │ ├── arch_arm.h │ │ │ │ │ ├── arch_arm_acquire_fence.h │ │ │ │ │ ├── arch_arm_load.h │ │ │ │ │ ├── arch_arm_memory_barrier.h │ │ │ │ │ ├── arch_arm_store.h │ │ │ │ │ └── arch_arm_thread_fence.h │ │ │ │ └── x86/ │ │ │ │ ├── arch_x86.h │ │ │ │ ├── arch_x86_acquire_fence.h │ │ │ │ ├── arch_x86_add_fetch.h │ │ │ │ ├── arch_x86_and_fetch.h │ │ │ │ ├── arch_x86_cmpxchg_strong.h │ │ │ │ ├── arch_x86_cmpxchg_weak.h │ │ │ │ ├── arch_x86_exchange.h │ │ │ │ ├── arch_x86_fetch_add.h │ │ │ │ ├── arch_x86_fetch_and.h │ │ │ │ ├── arch_x86_fetch_or.h │ │ │ │ ├── arch_x86_fetch_sub.h │ │ │ │ ├── arch_x86_fetch_xor.h │ │ │ │ ├── arch_x86_load.h │ │ │ │ ├── arch_x86_memory_barrier.h │ │ │ │ ├── arch_x86_or_fetch.h │ │ │ │ ├── arch_x86_store.h │ │ │ │ ├── arch_x86_sub_fetch.h │ │ │ │ ├── arch_x86_thread_fence.h │ │ │ │ └── arch_x86_xor_fetch.h │ │ │ ├── atomic.h │ │ │ ├── atomic_asserts.h │ │ │ ├── atomic_base_width.h │ │ │ ├── atomic_casts.h │ │ │ ├── atomic_flag.h │ │ │ ├── atomic_flag_standalone.h │ │ │ ├── atomic_integral.h │ │ │ ├── atomic_macros/ │ │ │ │ ├── atomic_macros.h │ │ │ │ ├── atomic_macros_acquire_fence.h │ │ │ │ ├── atomic_macros_add_fetch.h │ │ │ │ ├── atomic_macros_and_fetch.h │ │ │ │ ├── atomic_macros_base.h │ │ │ │ ├── atomic_macros_cmpxchg_strong.h │ │ │ │ ├── atomic_macros_cmpxchg_weak.h │ │ │ │ ├── atomic_macros_compiler_barrier.h │ │ │ │ ├── atomic_macros_cpu_pause.h │ │ │ │ ├── atomic_macros_exchange.h │ │ │ │ ├── atomic_macros_fetch_add.h │ │ │ │ ├── atomic_macros_fetch_and.h │ │ │ │ ├── atomic_macros_fetch_or.h │ │ │ │ ├── atomic_macros_fetch_sub.h │ │ │ │ ├── atomic_macros_fetch_xor.h │ │ │ │ ├── atomic_macros_load.h │ │ │ │ ├── atomic_macros_memory_barrier.h │ │ │ │ ├── atomic_macros_or_fetch.h │ │ │ │ ├── atomic_macros_signal_fence.h │ │ │ │ ├── atomic_macros_store.h │ │ │ │ ├── atomic_macros_sub_fetch.h │ │ │ │ ├── atomic_macros_thread_fence.h │ │ │ │ └── atomic_macros_xor_fetch.h │ │ │ ├── atomic_macros.h │ │ │ ├── atomic_memory_order.h │ │ │ ├── atomic_pointer.h │ │ │ ├── atomic_size_aligned.h │ │ │ ├── atomic_standalone.h │ │ │ └── compiler/ │ │ │ ├── compiler.h │ │ │ ├── compiler_acquire_fence.h │ │ │ ├── compiler_add_fetch.h │ │ │ ├── compiler_and_fetch.h │ │ │ ├── compiler_barrier.h │ │ │ ├── compiler_cmpxchg_strong.h │ │ │ ├── compiler_cmpxchg_weak.h │ │ │ ├── compiler_cpu_pause.h │ │ │ ├── compiler_exchange.h │ │ │ ├── compiler_fetch_add.h │ │ │ ├── compiler_fetch_and.h │ │ │ ├── compiler_fetch_or.h │ │ │ ├── compiler_fetch_sub.h │ │ │ ├── compiler_fetch_xor.h │ │ │ ├── compiler_load.h │ │ │ ├── compiler_memory_barrier.h │ │ │ ├── compiler_or_fetch.h │ │ │ ├── compiler_signal_fence.h │ │ │ ├── compiler_store.h │ │ │ ├── compiler_sub_fetch.h │ │ │ ├── compiler_thread_fence.h │ │ │ ├── compiler_xor_fetch.h │ │ │ ├── gcc/ │ │ │ │ ├── compiler_gcc.h │ │ │ │ ├── compiler_gcc_add_fetch.h │ │ │ │ ├── compiler_gcc_and_fetch.h │ │ │ │ ├── compiler_gcc_barrier.h │ │ │ │ ├── compiler_gcc_cmpxchg_strong.h │ │ │ │ ├── compiler_gcc_cmpxchg_weak.h │ │ │ │ ├── compiler_gcc_cpu_pause.h │ │ │ │ ├── compiler_gcc_exchange.h │ │ │ │ ├── compiler_gcc_fetch_add.h │ │ │ │ ├── compiler_gcc_fetch_and.h │ │ │ │ ├── compiler_gcc_fetch_or.h │ │ │ │ ├── compiler_gcc_fetch_sub.h │ │ │ │ ├── compiler_gcc_fetch_xor.h │ │ │ │ ├── compiler_gcc_load.h │ │ │ │ ├── compiler_gcc_or_fetch.h │ │ │ │ ├── compiler_gcc_signal_fence.h │ │ │ │ ├── compiler_gcc_store.h │ │ │ │ ├── compiler_gcc_sub_fetch.h │ │ │ │ ├── compiler_gcc_thread_fence.h │ │ │ │ └── compiler_gcc_xor_fetch.h │ │ │ └── msvc/ │ │ │ ├── compiler_msvc.h │ │ │ ├── compiler_msvc_add_fetch.h │ │ │ ├── compiler_msvc_and_fetch.h │ │ │ ├── compiler_msvc_barrier.h │ │ │ ├── compiler_msvc_cmpxchg_strong.h │ │ │ ├── compiler_msvc_cmpxchg_weak.h │ │ │ ├── compiler_msvc_cpu_pause.h │ │ │ ├── compiler_msvc_exchange.h │ │ │ ├── compiler_msvc_fetch_add.h │ │ │ ├── compiler_msvc_fetch_and.h │ │ │ ├── compiler_msvc_fetch_or.h │ │ │ ├── compiler_msvc_fetch_sub.h │ │ │ ├── compiler_msvc_fetch_xor.h │ │ │ ├── compiler_msvc_or_fetch.h │ │ │ ├── compiler_msvc_signal_fence.h │ │ │ ├── compiler_msvc_sub_fetch.h │ │ │ └── compiler_msvc_xor_fetch.h │ │ ├── char_traits.h │ │ ├── concepts.h │ │ ├── config.h │ │ ├── copy_help.h │ │ ├── enable_shared.h │ │ ├── fill_help.h │ │ ├── fixed_pool.h │ │ ├── function.h │ │ ├── function_detail.h │ │ ├── function_help.h │ │ ├── functional_base.h │ │ ├── generic_iterator.h │ │ ├── hashtable.h │ │ ├── in_place_t.h │ │ ├── integer_sequence.h │ │ ├── intrusive_hashtable.h │ │ ├── mem_fn.h │ │ ├── memory_base.h │ │ ├── memory_uses_allocator.h │ │ ├── move_help.h │ │ ├── pair_fwd_decls.h │ │ ├── piecewise_construct_t.h │ │ ├── red_black_tree.h │ │ ├── smart_ptr.h │ │ ├── special_member_functions.h │ │ ├── special_member_functions_expected.h │ │ ├── special_member_functions_variant_optional.h │ │ ├── thread_support.h │ │ ├── tuple_fwd_decls.h │ │ ├── type_compound.h │ │ ├── type_detected.h │ │ ├── type_fundamental.h │ │ ├── type_pod.h │ │ ├── type_properties.h │ │ ├── type_transformations.h │ │ └── type_void_t.h │ ├── intrusive_hash_map.h │ ├── intrusive_hash_set.h │ ├── intrusive_list.h │ ├── intrusive_ptr.h │ ├── iterator.h │ ├── linked_array.h │ ├── linked_ptr.h │ ├── list.h │ ├── map.h │ ├── memory.h │ ├── meta.h │ ├── numeric.h │ ├── numeric_limits.h │ ├── optional.h │ ├── priority_queue.h │ ├── queue.h │ ├── random.h │ ├── ratio.h │ ├── safe_ptr.h │ ├── scoped_array.h │ ├── scoped_ptr.h │ ├── segmented_vector.h │ ├── set.h │ ├── shared_array.h │ ├── shared_ptr.h │ ├── slist.h │ ├── sort.h │ ├── span.h │ ├── stack.h │ ├── string.h │ ├── string_hash_map.h │ ├── string_map.h │ ├── string_view.h │ ├── tuple.h │ ├── type_traits.h │ ├── unique_ptr.h │ ├── unordered_map.h │ ├── unordered_set.h │ ├── utility.h │ ├── variant.h │ ├── vector.h │ ├── vector_map.h │ ├── vector_multimap.h │ ├── vector_multiset.h │ ├── vector_set.h │ ├── version.h │ └── weak_ptr.h ├── scripts/ │ ├── CMake/ │ │ └── CommonCppFlags.cmake │ └── build.sh ├── source/ │ ├── allocator_eastl.cpp │ ├── assert.cpp │ ├── atomic.cpp │ ├── fixed_pool.cpp │ ├── hashtable.cpp │ ├── intrusive_list.cpp │ ├── numeric_limits.cpp │ ├── red_black_tree.cpp │ ├── string.cpp │ └── thread_support.cpp └── test/ ├── CMakeLists.txt └── source/ ├── ConceptImpls.h ├── EASTLTest.cpp ├── EASTLTest.h ├── EASTLTestAllocator.cpp ├── EASTLTestAllocator.h ├── EASTLTestIterators.h ├── GetTypeName.h ├── TestAlgorithm.cpp ├── TestAllocator.cpp ├── TestAllocatorPropagate.cpp ├── TestAny.cpp ├── TestArray.cpp ├── TestAssociativeContainers.h ├── TestAtomicAsm.cpp ├── TestAtomicBasic.cpp ├── TestAtomicMultiThreaded.cpp ├── TestAtomicRaw.cpp ├── TestBit.cpp ├── TestBitVector.cpp ├── TestBitcast.cpp ├── TestBitset.cpp ├── TestCharTraits.cpp ├── TestChrono.cpp ├── TestConcepts.cpp ├── TestContainerBehaviour.cpp ├── TestCppCXTypeTraits.cpp ├── TestDeque.cpp ├── TestExpected.cpp ├── TestExtra.cpp ├── TestFinally.cpp ├── TestFixedFunction.cpp ├── TestFixedHash.cpp ├── TestFixedList.cpp ├── TestFixedMap.cpp ├── TestFixedSList.cpp ├── TestFixedSet.cpp ├── TestFixedString.cpp ├── TestFixedTupleVector.cpp ├── TestFixedVector.cpp ├── TestFlags.cpp ├── TestFunctional.cpp ├── TestHash.cpp ├── TestHeap.cpp ├── TestIntrusiveHash.cpp ├── TestIntrusiveList.cpp ├── TestIntrusiveSDList.cpp ├── TestIntrusiveSList.cpp ├── TestIterator.cpp ├── TestList.cpp ├── TestListMap.cpp ├── TestLruCache.cpp ├── TestMap.cpp ├── TestMap.h ├── TestMemory.cpp ├── TestMeta.cpp ├── TestNumericLimits.cpp ├── TestOptional.cpp ├── TestRandom.cpp ├── TestRatio.cpp ├── TestRingBuffer.cpp ├── TestSList.cpp ├── TestSegmentedVector.cpp ├── TestSet.cpp ├── TestSet.h ├── TestSmartPtr.cpp ├── TestSort.cpp ├── TestSpan.cpp ├── TestString.cpp ├── TestString.inl ├── TestStringHashMap.cpp ├── TestStringMap.cpp ├── TestStringView.cpp ├── TestStringView.inl ├── TestTuple.cpp ├── TestTupleVector.cpp ├── TestTypeTraits.cpp ├── TestUtility.cpp ├── TestVariant.cpp ├── TestVariant2.cpp ├── TestVector.cpp ├── TestVectorMap.cpp ├── TestVectorSet.cpp └── main.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: .clang-format ================================================ #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#- Language : Cpp BasedOnStyle : Google Standard : Auto #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#- AccessModifierOffset : -4 AlignTrailingComments : true AllowAllParametersOfDeclarationOnNextLine : false AllowShortBlocksOnASingleLine : true AllowShortFunctionsOnASingleLine : true AllowShortIfStatementsOnASingleLine : false AllowShortLoopsOnASingleLine : false BinPackParameters : false BreakBeforeBraces : Allman BreakBeforeTernaryOperators : false BreakConstructorInitializersBeforeComma : true ColumnLimit : 120 Cpp11BracedListStyle : true DerivePointerAlignment : true DerivePointerBinding : false IndentWidth : 4 KeepEmptyLinesAtTheStartOfBlocks : true MaxEmptyLinesToKeep : 2 NamespaceIndentation : All PointerBindsToType : true SpacesBeforeTrailingComments : 1 SpacesInAngles : false SpacesInSquareBrackets : false TabWidth : 4 UseTab : ForIndentation #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#- #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#- ================================================ FILE: .gitattributes ================================================ # Auto detect text files and perform LF normalization # http://git-scm.com/docs/gitattributes * text=auto .appveyor.yml -text eol=crlf .appveyor-mingw.yml -text eol=crlf ci-*.cmd -text eol=crlf ================================================ FILE: .github/workflows/c-cpp.yml ================================================ name: EASTL Build & Test Pipeline on: push: branches: [ master ] pull_request: branches: [ master ] jobs: checkout: name: Checkout EASTL runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: path: EASTL/ - name: Upload checked out code uses: actions/upload-artifact@v4 with: name: Code path: EASTL/ build: needs: checkout strategy: fail-fast: false matrix: os: [ windows-latest, ubuntu-latest ] compiler: [ clang, gcc, msvc ] configuration: [ Debug, Release ] std_iter_compatibility: [ std_iter_category_disabled, std_iter_category_enabled ] exclude: - os: windows-latest compiler: gcc - os: windows-latest compiler: clang - os: ubuntu-latest compiler: msvc include: - os: windows-latest compiler: msvc cxxflags: '/std:c++20 /Zc:char8_t' - os: ubuntu-latest compiler: clang cc: 'clang-18' cxx: 'clang++-18' cxxflags: '-std=c++20' - os: ubuntu-latest compiler: gcc cc: 'gcc-14' cxx: 'g++-14' cxxflags: '-std=c++2a' name: Build EASTL runs-on: ${{ matrix.os }} steps: - name: Download a Build Artifact uses: actions/download-artifact@v4 with: name: Code path: Code/ - run: mkdir build - run: cd build && cmake ../Code -DEASTL_BUILD_BENCHMARK:BOOL=ON -DEASTL_BUILD_TESTS:BOOL=ON -DEASTL_STD_ITERATOR_CATEGORY_ENABLED:BOOL=${{ contains(matrix.std_iter_compatibility, 'enabled') && 'ON' || 'OFF' }} env: CXXFLAGS: ${{ matrix.cxxflags }} CXX: ${{ matrix.cxx }} CC: ${{ matrix.cc }} - run: cd build && cmake --build . --config ${{ matrix.configuration }} - name: Upload binaries uses: actions/upload-artifact@v4 with: name: Binaries-${{ matrix.os }}-${{ matrix.compiler }}-${{ matrix.configuration }}-${{ matrix.std_iter_compatibility }} path: build/ test: needs: build name: Run EASTL tests strategy: fail-fast: false matrix: os: [ windows-latest, ubuntu-latest ] compiler: [ clang, msvc, gcc ] configuration: [ Debug, Release ] std_iter_compatibility: [ std_iter_category_disabled, std_iter_category_enabled ] exclude: - os: windows-latest compiler: gcc - os: windows-latest compiler: clang - os: ubuntu-latest compiler: msvc runs-on: ${{ matrix.os }} steps: - name: Download a Build Artifact uses: actions/download-artifact@v4 with: name: Binaries-${{ matrix.os }}-${{ matrix.compiler }}-${{ matrix.configuration }}-${{ matrix.std_iter_compatibility }} path: Binaries/ - if: matrix.os == 'ubuntu-latest' run: chmod 755 ./Binaries/test/EASTLTest - run: cd Binaries/test && ctest -C ${{ matrix.configuration }} -V benchmark: needs: build name: Run EASTL benchmarks strategy: fail-fast: false matrix: os: [ windows-latest, ubuntu-latest ] compiler: [ clang, msvc, gcc ] configuration: [ Release ] exclude: - os: windows-latest compiler: gcc - os: windows-latest compiler: clang - os: ubuntu-latest compiler: msvc runs-on: ${{ matrix.os }} steps: - name: Download a Build Artifact uses: actions/download-artifact@v4 with: name: Binaries-${{ matrix.os }}-${{ matrix.compiler }}-${{ matrix.configuration }}-std_iter_category_disabled path: Binaries/ - if: matrix.os == 'ubuntu-latest' run: chmod 755 ./Binaries/benchmark/EASTLBenchmarks - run: cd Binaries/benchmark && ctest -C ${{ matrix.configuration }} -V ================================================ FILE: .gitignore ================================================ tags cscope.out **/*.swp **/*.swo .swp *.swp .swo .TMP -.d eastl_build_out build_bench bench.bat build.bat .p4config ## CMake generated files CMakeCache.txt cmake_install.cmake ## Patch files *.patch ## For Visual Studio Generated projects *.sln **/*.vcxproj **/*.vcxproj.filters *.VC.opendb *.sdf **/*.suo **/*.user .vs/* **/Debug/* CMakeFiles/* EASTL.dir/** RelWithDebInfo/* Release/* Win32/* x64/* MinSizeRel/* build*/* Testing/* %ALLUSERSPROFILE%/* # Buck /buck-out/ /.buckd/ /buckaroo/ .buckconfig.local BUCKAROO_DEPS .vscode/settings.json ================================================ FILE: .p4ignore ================================================ /.git/ tags .gitignore cscope.out ================================================ FILE: 3RDPARTYLICENSES.TXT ================================================ Additional licenses also apply to this software package as detailed below. HP STL comes with the following license: /////////////////////////////////////////////////////////////////////////////// // Copyright (c) 1994 // Hewlett-Packard Company // // Permission to use, copy, modify, distribute and sell this software // and its documentation for any purpose is hereby granted without fee, // provided that the above copyright notice appear in all copies and // that both that copyright notice and this permission notice appear // in supporting documentation. Hewlett-Packard Company makes no // representations about the suitability of this software for any // purpose. It is provided "as is" without express or implied warranty. /////////////////////////////////////////////////////////////////////////////// libc++ comes with the following license: ============================================================================== libc++ License ============================================================================== The libc++ library is dual licensed under both the University of Illinois "BSD-Like" license and the MIT license. As a user of this code you may choose to use it under either license. As a contributor, you agree to allow your code to be used under both. Full text of the relevant licenses is included below. ============================================================================== University of Illinois/NCSA Open Source License Copyright (c) 2009-2015 by the contributors listed at http://llvm.org/svn/llvm-project/libcxx/trunk/CREDITS.TXT All rights reserved. Developed by: LLVM Team University of Illinois at Urbana-Champaign http://llvm.org Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution. * Neither the names of the LLVM Team, University of Illinois at Urbana-Champaign, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. ============================================================================== Copyright (c) 2009-2014 by the contributors listed at http://llvm.org/svn/llvm-project/libcxx/trunk/CREDITS.TXT Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ============================================================================== *No express or implied license to use PlayStation®4 libraries included. PlayStation®4 development tools and libraries are subject to separate license with Sony Interactive Entertainment LLC. ============================================================================== ================================================ FILE: CMakeLists.txt ================================================ #------------------------------------------------------------------------------------------- # Copyright (C) Electronic Arts Inc. All rights reserved. #------------------------------------------------------------------------------------------- cmake_minimum_required(VERSION 3.11) include(FetchContent) project(EASTL CXX) #------------------------------------------------------------------------------------------- # Options #------------------------------------------------------------------------------------------- option(EASTL_BUILD_BENCHMARK "Enable generation of build files for benchmark" OFF) option(EASTL_BUILD_TESTS "Enable generation of build files for tests" OFF) option(EASTL_STD_ITERATOR_CATEGORY_ENABLED "Enable compatibility with std:: iterator categories" OFF) option(EASTL_DISABLE_APRIL_2024_DEPRECATIONS "Enable use of API marked for removal in April 2024." OFF) option(EASTL_DISABLE_SEPT_2024_DEPRECATIONS "Enable use of API marked for removal in September 2024." OFF) option(EASTL_DISABLE_APRIL_2025_DEPRECATIONS "Enable use of API marked for removal in April 2025." OFF) #------------------------------------------------------------------------------------------- # Compiler Flags #------------------------------------------------------------------------------------------- set (CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/scripts/CMake") include(CommonCppFlags) #------------------------------------------------------------------------------------------- # Library definition #------------------------------------------------------------------------------------------- file(GLOB EASTL_SOURCES "source/*.cpp") file(GLOB_RECURSE EASTL_HEADERS "include/EASTL/**.h") add_library(EASTL ${EASTL_SOURCES} ${EASTL_HEADERS}) target_compile_features(EASTL PUBLIC cxx_std_14) # include both source and headers in the files tab in Visual Studio source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} PREFIX "Header Files" FILES ${EASTL_HEADERS}) if (MSVC) set(EASTL_NATVIS_DIR "doc") set(EASTL_NATVIS_FILE "${EASTL_NATVIS_DIR}/EASTL.natvis") target_sources(EASTL INTERFACE $ $ ) endif() if(EASTL_BUILD_BENCHMARK) add_subdirectory(benchmark) endif() if(EASTL_BUILD_TESTS) add_subdirectory(test) endif() #------------------------------------------------------------------------------------------- # Defines #------------------------------------------------------------------------------------------- add_definitions(-D_CHAR16T) add_definitions(-D_CRT_SECURE_NO_WARNINGS) add_definitions(-D_SCL_SECURE_NO_WARNINGS) add_definitions(-DEASTL_OPENSOURCE=1) if (EASTL_STD_ITERATOR_CATEGORY_ENABLED) add_definitions(-DEASTL_STD_ITERATOR_CATEGORY_ENABLED=1) endif() #------------------------------------------------------------------------------------------- # Include dirs #------------------------------------------------------------------------------------------- target_include_directories(EASTL PUBLIC include) #------------------------------------------------------------------------------------------- # Dependencies #------------------------------------------------------------------------------------------- FetchContent_Declare( EABase GIT_REPOSITORY https://github.com/electronicarts/EABase.git GIT_TAG 0699a15efdfd20b6cecf02153bfa5663decb653c GIT_SUBMODULES "" # This should be temporary until we update the cyclic submodule dependencies in EABase. ) FetchContent_MakeAvailable(EABase) target_link_libraries(EASTL EABase) #------------------------------------------------------------------------------------------- # Deprecations #------------------------------------------------------------------------------------------- if(EASTL_DISABLE_APRIL_2024_DEPRECATIONS) target_compile_definitions(EASTL PUBLIC EA_DEPRECATIONS_FOR_2024_APRIL=EA_DISABLED) endif() if(EASTL_DISABLE_SEPT_2024_DEPRECATIONS) target_compile_definitions(EASTL PUBLIC EA_DEPRECATIONS_FOR_2024_SEPT=EA_DISABLED) endif() if(EASTL_DISABLE_APRIL_2025_DEPRECATIONS) target_compile_definitions(EASTL PUBLIC EA_DEPRECATIONS_FOR_2025_APRIL=EA_DISABLED) endif() #------------------------------------------------------------------------------------------- # Installation #------------------------------------------------------------------------------------------- install(TARGETS EASTL DESTINATION lib) install(DIRECTORY include/EASTL DESTINATION include) if (MSVC) install(FILES ${EASTL_NATVIS_FILE} DESTINATION ${EASTL_NATVIS_DIR}) endif() ================================================ FILE: CONTRIBUTING.md ================================================ ## Contributing Before you can contribute, EA must have a Contributor License Agreement (CLA) on file that has been signed by each contributor. You can sign here: [Go to CLA](https://electronicarts.na1.echosign.com/public/esignWidget?wid=CBFCIBAA3AAABLblqZhByHRvZqmltGtliuExmuV-WNzlaJGPhbSRg2ufuPsM3P0QmILZjLpkGslg24-UJtek*) If you want to be recognized for your contributions to EASTL or have a project using EASTL be recognized; you can submit a pull request to the appropriate sections in [README.md](README.md). Some examples of what the format and information will look like is as follows. * John Smith - jsmith@domain.com * John Smith * Frostbite - Electronic Arts * My Project - [link to said project] ### Pull Request Policy All code contributions to EASTL are submitted as [Github pull requests](https://help.github.com/articles/using-pull-requests/). All pull requests will be reviewed by an EASTL maintainer according to the guidelines found in the next section. Your pull request should: * merge cleanly * come with tests * tests should be minimal and stable * fail before your fix is applied * pass the test suite * code formatting is encoded in clang format * limit using clang format on new code * do not deviate from style already established in the files ### Getting the Repository ```bash git clone https://github.com/electronicarts/EASTL ``` ### Running the Unit Tests EASTL uses CMake as its build system. * Create and navigate to "your_build_folder": * mkdir your_build_folder && cd your_build_folder * Generate build scripts: * cmake eastl_source_folder -DEASTL_BUILD_TESTS:BOOL=ON * Build unit tests for "your_config": * cmake --build . --config your_config * Run the unit tests for "your_config" from the test folder: * cd test && ctest -C your_config Here is an example batch file. ```batch set build_folder=out mkdir %build_folder% pushd %build_folder% call cmake .. -DEASTL_BUILD_TESTS:BOOL=ON -DEASTL_BUILD_BENCHMARK:BOOL=OFF call cmake --build . --config Release call cmake --build . --config Debug call cmake --build . --config RelWithDebInfo call cmake --build . --config MinSizeRel pushd test call ctest -C Release call ctest -C Debug call ctest -C RelWithDebInfo call ctest -C MinSizeRel popd popd ``` Here is an example bash file ```bash build_folder=out mkdir $build_folder pushd $build_folder cmake .. -DEASTL_BUILD_TESTS:BOOL=ON -DEASTL_BUILD_BENCHMARK:BOOL=OFF cmake --build . --config Release cmake --build . --config Debug cmake --build . --config RelWithDebInfo cmake --build . --config MinSizeRel pushd test ctest -C Release ctest -C Debug ctest -C RelWithDebInfo ctest -C MinSizeRel popd popd ``` The value of EASTL_BUILD_BENCHMARK can be toggled to `ON` in order to build projects that include the benchmark program. ================================================ FILE: LICENSE ================================================ BSD 3-Clause License Copyright (c) 2019, Electronic Arts All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ # EA Standard Template Library [![Build Status](https://travis-ci.org/electronicarts/EASTL.svg?branch=master)](https://travis-ci.org/electronicarts/EASTL) EASTL stands for Electronic Arts Standard Template Library. It is a C++ template library of containers, algorithms, and iterators useful for runtime and tool development across multiple platforms. It is a fairly extensive and robust implementation of such a library and has an emphasis on high performance above all other considerations. ## Usage If you are familiar with the C++ STL or have worked with other templated container/algorithm libraries, you probably don't need to read this. If you have no familiarity with C++ templates at all, then you probably will need more than this document to get you up to speed. In this case, you need to understand that templates, when used properly, are powerful vehicles for the ease of creation of optimized C++ code. A description of C++ templates is outside the scope of this documentation, but there is plenty of such documentation on the Internet. EASTL is suitable for any tools and shipping applications where the functionality of EASTL is useful. Modern compilers are capable of producing good code with templates and many people are using them in both current generation and future generation applications on multiple platforms from embedded systems to servers and mainframes. ## Package Managers You can download and install EASTL using the [Conan](https://github.com/conan-io/conan) package manager: conan install eastl/3.15.00@ The EASTL package in conan is kept up to date by Conan team members and community contributors. If the version is out-of-date, please [create an issue or pull request](https://github.com/conan-io/conan-center-index) on the Conan Center Index repository. You can download and install EASTL using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager: git clone https://github.com/Microsoft/vcpkg.git cd vcpkg ./bootstrap-vcpkg.sh ./vcpkg integrate install vcpkg install eastl The EASTL port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. ## Documentation Please see [EASTL Introduction](doc/Introduction.md). ## Compiling sources Please see [CONTRIBUTING.md](CONTRIBUTING.md) for details on compiling and testing the source. ## Credits And Maintainers EASTL was created by Paul Pedriana and he maintained the project for roughly 10 years. EASTL was subsequently maintained by Roberto Parolin for more than 8 years. He was the driver and proponent for getting EASTL opensourced. Rob was a mentor to all members of the team and taught us everything we ever wanted to know about C++ spookyness. After Rob, maintenance of EASTL passed to Max Winkler for roughly a year, then to Liam Mitchell for about another year. The current maintainers (since circa 2022) are Galo Rojo and Jonathan Hopkins. Significant EASTL contributions were made by (in alphabetical order): * Avery Lee * Claire Andrews * Galo Rojo * Jonathan Hopkins * JP Flouret * Liam Mitchell * Matt Newport * Max Winkler * Paul Pedriana * Roberto Parolin * Simon Everett ## Contributors ## Projects And Products Using EASTL * Frostbite - Electronic Arts - [https://www.ea.com/frostbite] ## License Modified BSD License (3-Clause BSD license) see the file LICENSE in the project root. ================================================ FILE: _config.yml ================================================ theme: jekyll-theme-minimal ================================================ FILE: benchmark/CMakeLists.txt ================================================ #------------------------------------------------------------------------------------------- # Copyright (C) Electronic Arts Inc. All rights reserved. #------------------------------------------------------------------------------------------- #------------------------------------------------------------------------------------------- # CMake info #------------------------------------------------------------------------------------------- cmake_minimum_required(VERSION 3.1) project(EASTLBenchmarks CXX) include(CTest) #------------------------------------------------------------------------------------------- # Defines #------------------------------------------------------------------------------------------- add_definitions(-D_CHAR16T) #------------------------------------------------------------------------------------------- # Include directories #------------------------------------------------------------------------------------------- include_directories(source) include_directories(../test/source) #------------------------------------------------------------------------------------------- # Compiler Flags #------------------------------------------------------------------------------------------- set (CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/../scripts/CMake") include(CommonCppFlags) # Libstdc++ calls new internally, since DLLs have no weak symbols, runtime symbol resolution fails and EASTL's new is not called. # Linking against static libstdc++ fixes this. # See https://github.com/electronicarts/EASTL/issues/40 for more info. if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND MINGW) set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -static-libstdc++") set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -static-libstdc++") set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} -static-libstdc++") endif() if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_BUILD_TYPE MATCHES "MinSizeRel" AND MINGW) message(FATAL_ERROR "FIXME: MinSizeRel on MingW-w64's Clang fails to link.") endif() # The benchmark suite fails to compile if char8_t is enabled, so disable it. if (EASTL_NO_CHAR8T_FLAG) add_compile_options(${EASTL_NO_CHAR8T_FLAG}) endif() #------------------------------------------------------------------------------------------- # Source files #------------------------------------------------------------------------------------------- file(GLOB EASTLBENCHMARK_SOURCES "source/*.cpp") file(GLOB EASTLTEST_SOURCES "../test/source/EASTLTestAllocator.cpp" "../test/source/EASTLTest.cpp") file(GLOB EASTLBENCHMARK_HEADERS "source/*.h") set(SOURCES ${EASTLBENCHMARK_SOURCES} ${EASTLTEST_SOURCES} ${EASTLBENCHMARK_HEADERS}) # include both source and headers in the files view in Visual Studio source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} PREFIX "Header Files" FILES ${EASTLBENCHMARK_HEADERS}) #------------------------------------------------------------------------------------------- # Defines #------------------------------------------------------------------------------------------- add_definitions(-D_CRT_SECURE_NO_WARNINGS) add_definitions(-D_SCL_SECURE_NO_WARNINGS) add_definitions(-DEASTL_THREAD_SUPPORT_AVAILABLE=0) add_definitions(-DEASTL_OPENSOURCE=1) add_definitions(-D_SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS) # silence std::hash_map deprecation warnings if (EASTL_STD_ITERATOR_CATEGORY_ENABLED) add_definitions(-DEASTL_STD_ITERATOR_CATEGORY_ENABLED=1) endif() if(NOT EASTL_BUILD_TESTS) add_subdirectory(../test/packages/EAStdC ../test/EAStdC) add_subdirectory(../test/packages/EAAssert ../test/EAAssert) add_subdirectory(../test/packages/EAThread ../test/EAThread) add_subdirectory(../test/packages/EATest ../test/EATest) add_subdirectory(../test/packages/EAMain ../test/EAMain) endif() #------------------------------------------------------------------------------------------- # Executable definition #------------------------------------------------------------------------------------------- add_executable(EASTLBenchmarks ${SOURCES}) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) set(EASTLBenchmark_Libraries EABase EAAssert EAMain EAThread EAStdC EASTL EATest) target_link_libraries(EASTLBenchmarks ${EASTLBenchmark_Libraries} Threads::Threads) #------------------------------------------------------------------------------------------- # Run Unit tests and verify the results. #------------------------------------------------------------------------------------------- add_test(EASTLBenchmarkRuns EASTLBenchmarks) set_tests_properties (EASTLBenchmarkRuns PROPERTIES PASS_REGULAR_EXPRESSION "RETURNCODE=0") ================================================ FILE: benchmark/source/BenchmarkAlgorithm.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // BenchmarkAlgorithm.cpp // // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #include "EASTLBenchmark.h" #include "EASTLTest.h" #include #include #include #include #include #include #include #include #include EA_DISABLE_ALL_VC_WARNINGS() #include #include #include #include #include #include EA_RESTORE_ALL_VC_WARNINGS() #ifdef _MSC_VER #pragma warning(disable: 4996) // Function call with parameters that may be unsafe #endif using namespace EA; typedef std::vector StdVectorUChar; typedef eastl::vector EaVectorUChar; typedef std::vector StdVectorSChar; typedef eastl::vector EaVectorSChar; typedef std::vector StdVectorUint32; typedef eastl::vector EaVectorUint32; typedef std::vector StdVectorUint64; typedef eastl::vector EaVectorUint64; typedef std::vector StdVectorTO; typedef eastl::vector EaVectorTO; // We make a fake version of C++11 std::next, as some C++ compilers don't currently // provide the C++11 next algorithm in their standard libraries. namespace std__ { template inline InputIterator next(InputIterator it, typename std::iterator_traits::difference_type n = 1) { std::advance(it, n); return it; } } namespace { // Exists for the purpose testing PODs that are larger than built-in types. template struct SizedPOD { char memory[kSize]; }; void TestFindEndStd(EA::StdC::Stopwatch& stopwatch, const std::string& sTest, const char* pSearchStringBegin, const char* pSearchStringEnd) { stopwatch.Restart(); std::string::const_iterator it = std::find_end(sTest.begin(), sTest.end(), pSearchStringBegin, pSearchStringEnd); stopwatch.Stop(); if(it != sTest.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%c", *it); } void TestFindEndEa(EA::StdC::Stopwatch& stopwatch, const eastl::string& sTest, const char* pSearchStringBegin, const char* pSearchStringEnd) { stopwatch.Restart(); eastl::string::const_iterator it = eastl::find_end(sTest.begin(), sTest.end(), pSearchStringBegin, pSearchStringEnd); stopwatch.Stop(); if(it != sTest.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%c", *it); } void TestSearchStd(EA::StdC::Stopwatch& stopwatch, const std::string& sTest, const char* pSearchStringBegin, const char* pSearchStringEnd) { stopwatch.Restart(); std::string::const_iterator it = std::search(sTest.begin(), sTest.end(), pSearchStringBegin, pSearchStringEnd); stopwatch.Stop(); if(it != sTest.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%c", *it); } void TestSearchEa(EA::StdC::Stopwatch& stopwatch, const eastl::string& sTest, const char* pSearchStringBegin, const char* pSearchStringEnd) { stopwatch.Restart(); eastl::string::const_iterator it = eastl::search(sTest.begin(), sTest.end(), pSearchStringBegin, pSearchStringEnd); stopwatch.Stop(); if(it != sTest.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%c", *it); } void TestSearchNStd(EA::StdC::Stopwatch& stopwatch, const std::string& sTest, int n, char c) { stopwatch.Restart(); std::string::const_iterator it = std::search_n(sTest.begin(), sTest.end(), n, c); stopwatch.Stop(); if(it != sTest.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%c", *it); } void TestSearchNEa(EA::StdC::Stopwatch& stopwatch, const eastl::string& sTest, int n, char c) { stopwatch.Restart(); eastl::string::const_iterator it = eastl::search_n(sTest.begin(), sTest.end(), n, c); stopwatch.Stop(); if(it != sTest.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%c", *it); } template void TestUniqueStd(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); typename Container::iterator it = std::unique(c.begin(), c.end()); stopwatch.Stop(); c.erase(it, c.end()); } template void TestUniqueEa(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); typename Container::iterator it = eastl::unique(c.begin(), c.end()); stopwatch.Stop(); c.erase(it, c.end()); } template void TestMinElementStd(EA::StdC::Stopwatch& stopwatch, const Container& c) { stopwatch.Restart(); const typename Container::const_iterator it = std::min_element(c.begin(), c.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &it); } template void TestMinElementEa(EA::StdC::Stopwatch& stopwatch, const Container& c) { stopwatch.Restart(); const typename Container::const_iterator it = eastl::min_element(c.begin(), c.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &it); } template void TestCountStd(EA::StdC::Stopwatch& stopwatch, const Container& c) { stopwatch.Restart(); const typename Container::difference_type n = std::count(c.begin(), c.end(), (typename Container::value_type)999999); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%d", (int)n); } template void TestCountEa(EA::StdC::Stopwatch& stopwatch, const Container& c) { stopwatch.Restart(); const typename Container::difference_type n = eastl::count(c.begin(), c.end(), (typename Container::value_type)999999); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%d", (int)n); } template void TestAdjacentFindStd(EA::StdC::Stopwatch& stopwatch, const Container& c) { stopwatch.Restart(); const typename Container::const_iterator it = std::adjacent_find(c.begin(), c.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &it); } template void TestAdjacentFindEa(EA::StdC::Stopwatch& stopwatch, const Container& c) { stopwatch.Restart(); const typename Container::const_iterator it = eastl::adjacent_find(c.begin(), c.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &it); } template void TestLowerBoundStd(EA::StdC::Stopwatch& stopwatch, const Container& c, const typename Container::value_type* pBegin, const typename Container::value_type* pEnd) { stopwatch.Restart(); while(pBegin != pEnd) { typename Container::const_iterator it = std::lower_bound(c.begin(), c.end(), *pBegin++); Benchmark::DoNothing(&it); } stopwatch.Stop(); } template void TestLowerBoundEa(EA::StdC::Stopwatch& stopwatch, Container& c, const typename Container::value_type* pBegin, const typename Container::value_type* pEnd) { stopwatch.Restart(); while(pBegin != pEnd) { typename Container::const_iterator it = eastl::lower_bound(c.begin(), c.end(), *pBegin++); Benchmark::DoNothing(&it); } stopwatch.Stop(); } template void TestUpperBoundStd(EA::StdC::Stopwatch& stopwatch, const Container& c, const typename Container::value_type* pBegin, const typename Container::value_type* pEnd) { stopwatch.Restart(); while(pBegin != pEnd) { typename Container::const_iterator it = std::upper_bound(c.begin(), c.end(), *pBegin++); Benchmark::DoNothing(&it); } stopwatch.Stop(); } template void TestUpperBoundEa(EA::StdC::Stopwatch& stopwatch, Container& c, const typename Container::value_type* pBegin, const typename Container::value_type* pEnd) { stopwatch.Restart(); while(pBegin != pEnd) { typename Container::const_iterator it = eastl::upper_bound(c.begin(), c.end(), *pBegin++); Benchmark::DoNothing(&it); } stopwatch.Stop(); } template void TestEqualRangeStd(EA::StdC::Stopwatch& stopwatch, const Container& c, const typename Container::value_type* pBegin, const typename Container::value_type* pEnd) { stopwatch.Restart(); while(pBegin != pEnd) { std::pair itPair = std::equal_range(c.begin(), c.end(), *pBegin++); Benchmark::DoNothing(&itPair); } stopwatch.Stop(); } template void TestEqualRangeEa(EA::StdC::Stopwatch& stopwatch, Container& c, const typename Container::value_type* pBegin, const typename Container::value_type* pEnd) { stopwatch.Restart(); while(pBegin != pEnd) { eastl::pair itPair = eastl::equal_range(c.begin(), c.end(), *pBegin++); Benchmark::DoNothing(&itPair); } stopwatch.Stop(); } template void TestLexicographicalCompareStd(EA::StdC::Stopwatch& stopwatch, Iterator1 first1, Iterator1 last1, Iterator2 first2, Iterator2 last2) { stopwatch.Restart(); const bool bResult = std::lexicographical_compare(first1, last1, first2, last2); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%d", bResult ? (int)1 : (int)0); } template void TestLexicographicalCompareEa(EA::StdC::Stopwatch& stopwatch, Iterator1 first1, Iterator1 last1, Iterator2 first2, Iterator2 last2) { stopwatch.Restart(); const bool bResult = eastl::lexicographical_compare(first1, last1, first2, last2); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%d", bResult ? (int)1 : (int)0); } template void TestCopyStd(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last, OutputIterator result) { stopwatch.Restart(); std::copy(first, last, result); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%d", (int)*first); } template void TestCopyEa(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last, OutputIterator result) { stopwatch.Restart(); eastl::copy(first, last, result); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%d", (int)*first); } template void TestCopyBackwardStd(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last, OutputIterator result) { stopwatch.Restart(); std::copy_backward(first, last, result); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%d", (int)*first); } template void TestCopyBackwardEa(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last, OutputIterator result) { stopwatch.Restart(); eastl::copy_backward(first, last, result); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%d", (int)*first); } template void TestFillStd(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last, const Value& v) { stopwatch.Restart(); std::fill(first, last, v); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &*first); } template void TestFillEa(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last, const Value& v) { stopwatch.Restart(); eastl::fill(first, last, v); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &*first); } template void TestFillNStd(EA::StdC::Stopwatch& stopwatch, Iterator first, int n, const Value& v) { stopwatch.Restart(); std::fill_n(first, n, v); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &*first); } template void TestFillNEa(EA::StdC::Stopwatch& stopwatch, Iterator first, int n, const Value& v) { stopwatch.Restart(); eastl::fill_n(first, n, v); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &*first); } template void TestReverseStd(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last) { stopwatch.Restart(); std::reverse(first, last); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &*first); } template void TestReverseEa(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last) { stopwatch.Restart(); eastl::reverse(first, last); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &*first); } template void TestRotateStd(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator middle, Iterator last) { stopwatch.Restart(); std::rotate(first, middle, last); // C++11 specifies that rotate has a return value, but not all std implementations return it. stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &*first); } template void TestRotateEa(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator middle, Iterator last) { stopwatch.Restart(); eastl::rotate(first, middle, last); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &*first); } template void TestMergeStd(EA::StdC::Stopwatch& stopwatch, Iterator firstIn1, Iterator lastIn1, Iterator firstIn2, Iterator lastIn2, Iterator out) { stopwatch.Restart(); std::merge(firstIn1, lastIn1, firstIn2, lastIn2, out); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &*out); } template void TestMergeEa(EA::StdC::Stopwatch& stopwatch, Iterator firstIn1, Iterator lastIn1, Iterator firstIn2, Iterator lastIn2, Iterator out) { stopwatch.Restart(); eastl::merge(firstIn1, lastIn1, firstIn2, lastIn2, out); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &*out); } } // namespace void BenchmarkAlgorithm1(EASTLTest_Rand& /*rng*/, EA::StdC::Stopwatch& stopwatch1, EA::StdC::Stopwatch& stopwatch2) { { std::string sTestStd; eastl::string sTestEa; const char* pSearchString1Begin = "AAA"; const char* pSearchString1End = pSearchString1Begin + strlen(pSearchString1Begin); const char* pSearchString2Begin = "BBB"; // This is something that doesn't exist searched string. const char* pSearchString2End = pSearchString2Begin + strlen(pSearchString2Begin); const char* pSearchString3Begin = "CCC"; const char* pSearchString3End = pSearchString3Begin + strlen(pSearchString3Begin); for(int i = 0; i < 10000; i++) sTestStd += "This is a test of the find_end algorithm. "; sTestEa.assign(sTestStd.data(), (eastl_size_t)sTestStd.length()); for(int i = 0; i < 2; i++) { /////////////////////////////// // Test find_end /////////////////////////////// sTestStd.insert(sTestStd.size() * 15 / 16, pSearchString1Begin); sTestEa.insert (sTestEa.size() * 15 / 16, pSearchString1Begin); TestFindEndStd(stopwatch1, sTestStd, pSearchString1Begin, pSearchString1End); TestFindEndEa (stopwatch2, sTestEa, pSearchString1Begin, pSearchString1End); if(i == 1) Benchmark::AddResult("algorithm/find_end/string/end", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); sTestStd.insert(sTestStd.size() / 2, pSearchString2Begin); sTestEa.insert (sTestEa.size() / 2, pSearchString2Begin); TestFindEndStd(stopwatch1, sTestStd, pSearchString2Begin, pSearchString2End); TestFindEndEa (stopwatch2, sTestEa, pSearchString2Begin, pSearchString2End); if(i == 1) Benchmark::AddResult("algorithm/find_end/string/middle", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestFindEndStd(stopwatch1, sTestStd, pSearchString3Begin, pSearchString3End); TestFindEndEa (stopwatch2, sTestEa, pSearchString3Begin, pSearchString3End); if(i == 1) Benchmark::AddResult("algorithm/find_end/string/none", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test search /////////////////////////////// TestSearchStd(stopwatch1, sTestStd, pSearchString1Begin, pSearchString1End); TestSearchEa (stopwatch2, sTestEa, pSearchString1Begin, pSearchString1End); if(i == 1) Benchmark::AddResult("algorithm/search/string", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test search_n /////////////////////////////// TestSearchNStd(stopwatch1, sTestStd, 3, 'A'); TestSearchNEa (stopwatch2, sTestEa, 3, 'A'); if(i == 1) Benchmark::AddResult("algorithm/search_n/string", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test adjacent_find /////////////////////////////// } } } void BenchmarkAlgorithm2(EASTLTest_Rand& rng, EA::StdC::Stopwatch& stopwatch1, EA::StdC::Stopwatch& stopwatch2) { { StdVectorUint32 stdVectorUint32; EaVectorUint32 eaVectorUint32; StdVectorUint64 stdVectorUint64; EaVectorUint64 eaVectorUint64; StdVectorTO stdVectorTO; EaVectorTO eaVectorTO; for(int i = 0; i < 2; i++) { stdVectorUint32.clear(); eaVectorUint32.clear(); for(int j = 0; j < 100000; j++) { stdVectorUint32.push_back(j); eaVectorUint32.push_back(j); stdVectorUint64.push_back(j); eaVectorUint64.push_back(j); stdVectorTO.push_back(TestObject(j)); eaVectorTO.push_back(TestObject(j)); if((rng() % 16) == 0) { stdVectorUint32.push_back(i); eaVectorUint32.push_back(i); stdVectorUint64.push_back(j); eaVectorUint64.push_back(j); stdVectorTO.push_back(TestObject(j)); eaVectorTO.push_back(TestObject(j)); if((rng() % 16) == 0) { stdVectorUint32.push_back(i); eaVectorUint32.push_back(i); stdVectorUint64.push_back(j); eaVectorUint64.push_back(j); stdVectorTO.push_back(TestObject(j)); eaVectorTO.push_back(TestObject(j)); } } } /////////////////////////////// // Test unique /////////////////////////////// TestUniqueStd(stopwatch1, stdVectorUint32); TestUniqueEa (stopwatch2, eaVectorUint32); if(i == 1) Benchmark::AddResult("algorithm/unique/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestUniqueStd(stopwatch1, stdVectorUint64); TestUniqueEa (stopwatch2, eaVectorUint64); if(i == 1) Benchmark::AddResult("algorithm/unique/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestUniqueStd(stopwatch1, stdVectorTO); TestUniqueEa (stopwatch2, eaVectorTO); if(i == 1) Benchmark::AddResult("algorithm/unique/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test min_element /////////////////////////////// TestMinElementStd(stopwatch1, stdVectorTO); TestMinElementEa (stopwatch2, eaVectorTO); if(i == 1) Benchmark::AddResult("algorithm/min_element/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test count /////////////////////////////// TestCountStd(stopwatch1, stdVectorUint64); TestCountEa (stopwatch2, eaVectorUint64); if(i == 1) Benchmark::AddResult("algorithm/count/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test adjacent_find /////////////////////////////// // Due to the above unique testing, the container should container unique elements. Let's change that. stdVectorTO[stdVectorTO.size() - 2] = stdVectorTO[stdVectorTO.size() - 1]; eaVectorTO[eaVectorTO.size() - 2] = eaVectorTO[eaVectorTO.size() - 1]; TestAdjacentFindStd(stopwatch1, stdVectorTO); TestAdjacentFindEa (stopwatch2, eaVectorTO); if(i == 1) Benchmark::AddResult("algorithm/adj_find/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test lower_bound /////////////////////////////// // Sort the containers for the following tests. std::sort(stdVectorTO.begin(), stdVectorTO.end()); eaVectorTO.assign(&stdVectorTO[0], &stdVectorTO[0] + stdVectorTO.size()); TestLowerBoundStd(stopwatch1, stdVectorTO, &stdVectorTO[0], &stdVectorTO[0] + stdVectorTO.size()); TestLowerBoundEa (stopwatch2, eaVectorTO, &eaVectorTO[0], &eaVectorTO[0] + eaVectorTO.size()); if(i == 1) Benchmark::AddResult("algorithm/lower_bound/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test upper_bound /////////////////////////////// std::sort(stdVectorUint32.begin(), stdVectorUint32.end()); eaVectorUint32.assign(&stdVectorUint32[0], &stdVectorUint32[0] + stdVectorUint32.size()); TestUpperBoundStd(stopwatch1, stdVectorUint32, &stdVectorUint32[0], &stdVectorUint32[0] + stdVectorUint32.size()); TestUpperBoundEa (stopwatch2, eaVectorUint32, &eaVectorUint32[0], &eaVectorUint32[0] + eaVectorUint32.size()); if(i == 1) Benchmark::AddResult("algorithm/upper_bound/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test equal_range /////////////////////////////// // VS2010 (and later versions?) is extremely slow executing this in debug builds. It can take minutes for a // single TestEqualRangeStd call to complete. It's so slow that it's nearly pointless to execute. #if !defined(_MSC_VER) || (_MSC_VER < 1600) || !defined(_ITERATOR_DEBUG_LEVEL) || (_ITERATOR_DEBUG_LEVEL < 2) std::sort(stdVectorUint64.begin(), stdVectorUint64.end()); eaVectorUint64.assign(&stdVectorUint64[0], &stdVectorUint64[0] + stdVectorUint64.size()); TestEqualRangeStd(stopwatch1, stdVectorUint64, &stdVectorUint64[0], &stdVectorUint64[0] + stdVectorUint64.size()); TestEqualRangeEa (stopwatch2, eaVectorUint64, &eaVectorUint64[0], &eaVectorUint64[0] + eaVectorUint64.size()); if(i == 1) Benchmark::AddResult("algorithm/equal_range/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); #endif } } } void BenchmarkAlgorithm3(EASTLTest_Rand& /*rng*/, EA::StdC::Stopwatch& stopwatch1, EA::StdC::Stopwatch& stopwatch2) { { StdVectorUChar stdVectorUChar1(100000); StdVectorUChar stdVectorUChar2(100000); EaVectorUChar eaVectorUChar1(100000); EaVectorUChar eaVectorUChar2(100000); StdVectorSChar stdVectorSChar1(100000); StdVectorSChar stdVectorSChar2(100000); EaVectorSChar eaVectorSChar1(100000); EaVectorSChar eaVectorSChar2(100000); StdVectorTO stdVectorTO1(100000); StdVectorTO stdVectorTO2(100000); EaVectorTO eaVectorTO1(100000); EaVectorTO eaVectorTO2(100000); // All these containers should have values of zero in them. for(int i = 0; i < 2; i++) { /////////////////////////////// // Test lexicographical_compare /////////////////////////////// TestLexicographicalCompareStd(stopwatch1, stdVectorUChar1.begin(), stdVectorUChar1.end(), stdVectorUChar2.begin(), stdVectorUChar2.end()); TestLexicographicalCompareEa (stopwatch2, eaVectorUChar1.begin(), eaVectorUChar2.end(), eaVectorUChar2.begin(), eaVectorUChar2.end()); if(i == 1) Benchmark::AddResult("algorithm/lex_cmp/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestLexicographicalCompareStd(stopwatch1, &stdVectorSChar1[0], &stdVectorSChar1[0] + stdVectorSChar1.size(), &stdVectorSChar2[0], &stdVectorSChar2[0] + stdVectorSChar2.size()); TestLexicographicalCompareEa (stopwatch2, &eaVectorSChar1[0], &eaVectorSChar1[0] + eaVectorSChar1.size(), &eaVectorSChar2[0], &eaVectorSChar2[0] + eaVectorSChar2.size()); if(i == 1) Benchmark::AddResult("algorithm/lex_cmp/schar[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestLexicographicalCompareStd(stopwatch1, stdVectorTO1.begin(), stdVectorTO1.end(), stdVectorTO2.begin(), stdVectorTO2.end()); TestLexicographicalCompareEa (stopwatch2, eaVectorTO1.begin(), eaVectorTO1.end(), eaVectorTO2.begin(), eaVectorTO2.end()); if(i == 1) Benchmark::AddResult("algorithm/lex_cmp/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } void BenchmarkAlgorithm4(EASTLTest_Rand& /*rng*/, EA::StdC::Stopwatch& stopwatch1, EA::StdC::Stopwatch& stopwatch2) { { std::vector stdVectorUint321(10000); std::vector stdVectorUint322(10000); eastl::vector eaVectorUint321(10000); eastl::vector eaVectorUint322(10000); std::vector stdVectorUint64(100000); eastl::vector eaVectorUint64(100000); for(int i = 0; i < 2; i++) { /////////////////////////////// // Test copy /////////////////////////////// TestCopyStd(stopwatch1, stdVectorUint321.begin(), stdVectorUint321.end(), stdVectorUint322.begin()); TestCopyEa (stopwatch2, eaVectorUint321.begin(), eaVectorUint321.end(), eaVectorUint322.begin()); if(i == 1) Benchmark::AddResult("algorithm/copy/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test copy_backward /////////////////////////////// TestCopyBackwardStd(stopwatch1, stdVectorUint321.begin(), stdVectorUint321.end(), stdVectorUint322.end()); TestCopyBackwardEa (stopwatch2, eaVectorUint321.begin(), eaVectorUint321.end(), eaVectorUint322.end()); if(i == 1) Benchmark::AddResult("algorithm/copy_backward/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test fill /////////////////////////////// TestFillStd(stopwatch1, stdVectorUint64.begin(), stdVectorUint64.end(), UINT64_C(37)); TestFillEa (stopwatch2, eaVectorUint64.begin(), eaVectorUint64.end(), UINT64_C(37)); TestFillStd(stopwatch1, stdVectorUint64.begin(), stdVectorUint64.end(), UINT64_C(37)); // Intentionally do this a second time, as we are finding TestFillEa (stopwatch2, eaVectorUint64.begin(), eaVectorUint64.end(), UINT64_C(37)); // the results are inconsistent otherwise. if(EA::StdC::Memcheck64(&eaVectorUint64[0], UINT64_C(37), eaVectorUint64.size())) EA::UnitTest::Report("eastl algorithm 64 bit fill failure."); if(i == 1) Benchmark::AddResult("algorithm/fill/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test fill_n /////////////////////////////// TestFillNStd(stopwatch1, stdVectorUint64.begin(), (int)stdVectorUint64.size(), UINT64_C(37)); TestFillNEa (stopwatch2, eaVectorUint64.begin(), (int) eaVectorUint64.size(), UINT64_C(37)); TestFillNStd(stopwatch1, stdVectorUint64.begin(), (int)stdVectorUint64.size(), UINT64_C(37)); // Intentionally do this a second time, as we are finding TestFillNEa (stopwatch2, eaVectorUint64.begin(), (int) eaVectorUint64.size(), UINT64_C(37)); // the results are inconsistent otherwise. if(i == 1) Benchmark::AddResult("algorithm/fill_n/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } void BenchmarkAlgorithm5(EASTLTest_Rand& /*rng*/, EA::StdC::Stopwatch& stopwatch1, EA::StdC::Stopwatch& stopwatch2) { { std::vector stdVectorVoid(100000); eastl::vector eaVectorVoid(100000); std::vector stdVectorChar(100000); eastl::vector eaVectorChar(100000); std::vector stdVectorBool(100000); eastl::vector eaVectorBool(100000); for(int i = 0; i < 2; i++) { TestFillStd(stopwatch1, stdVectorVoid.begin(), stdVectorVoid.end(), (void*)NULL); TestFillEa (stopwatch2, eaVectorVoid.begin(), eaVectorVoid.end(), (void*)NULL); if(i == 1) Benchmark::AddResult("algorithm/fill/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestFillStd(stopwatch1, &stdVectorChar[0], &stdVectorChar[0] + stdVectorChar.size(), 'd'); // Intentionally use ' ' and not casted to any type. TestFillEa (stopwatch2, eaVectorChar.data(), eaVectorChar.data() + eaVectorChar.size(), 'd'); TestFillStd(stopwatch1, &stdVectorChar[0], &stdVectorChar[0] + stdVectorChar.size(), 'd'); // Intentionally do this a second time, as we are finding TestFillEa (stopwatch2, eaVectorChar.data(), eaVectorChar.data() + eaVectorChar.size(), 'd'); // the results are inconsistent otherwise. if(i == 1) Benchmark::AddResult("algorithm/fill/char[]/'d'", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestFillStd(stopwatch1, stdVectorChar.begin(), stdVectorChar.end(), (char)'d'); TestFillEa (stopwatch2, eaVectorChar.begin(), eaVectorChar.end(), (char)'d'); TestFillStd(stopwatch1, stdVectorChar.begin(), stdVectorChar.end(), (char)'d'); // Intentionally do this a second time, as we are finding TestFillEa (stopwatch2, eaVectorChar.begin(), eaVectorChar.end(), (char)'d'); // the results are inconsistent otherwise. if(i == 1) Benchmark::AddResult("algorithm/fill/vector/'d'", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestFillStd(stopwatch1, stdVectorChar.begin(), stdVectorChar.end(), (char)0); TestFillEa (stopwatch2, eaVectorChar.begin(), eaVectorChar.end(), (char)0); TestFillStd(stopwatch1, stdVectorChar.begin(), stdVectorChar.end(), (char)0); // Intentionally do this a second time, as we are finding TestFillEa (stopwatch2, eaVectorChar.begin(), eaVectorChar.end(), (char)0); // the results are inconsistent otherwise. if(i == 1) Benchmark::AddResult("algorithm/fill/vector/0", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestFillStd(stopwatch1, eaVectorBool.data(), eaVectorBool.data() + eaVectorBool.size(), false); // Intentionally use eaVectorBool for the array. TestFillEa (stopwatch2, eaVectorBool.data(), eaVectorBool.data() + eaVectorBool.size(), false); TestFillStd(stopwatch1, eaVectorBool.data(), eaVectorBool.data() + eaVectorBool.size(), false); TestFillEa (stopwatch2, eaVectorBool.data(), eaVectorBool.data() + eaVectorBool.size(), false); if(i == 1) Benchmark::AddResult("algorithm/fill/bool[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test fill_n /////////////////////////////// TestFillNStd(stopwatch1, eaVectorChar.data(), (int) eaVectorChar.size(), 'd'); // Intentionally use eaVectorBool for the array. TestFillNEa (stopwatch2, eaVectorChar.data(), (int) eaVectorChar.size(), 'd'); TestFillNStd(stopwatch1, eaVectorChar.data(), (int) eaVectorChar.size(), 'd'); // Intentionally do this a second time, as we are finding TestFillNEa (stopwatch2, eaVectorChar.data(), (int) eaVectorChar.size(), 'd'); // the results are inconsistent otherwise. if(i == 1) Benchmark::AddResult("algorithm/fill_n/char[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestFillNStd(stopwatch1, eaVectorBool.data(), (int) eaVectorBool.size(), false); // Intentionally use eaVectorBool for the array. TestFillNEa (stopwatch2, eaVectorBool.data(), (int) eaVectorBool.size(), false); TestFillNStd(stopwatch1, eaVectorBool.data(), (int) eaVectorBool.size(), false); // Intentionally do this a second time, as we are finding TestFillNEa (stopwatch2, eaVectorBool.data(), (int) eaVectorBool.size(), false); // the results are inconsistent otherwise. if(i == 1) Benchmark::AddResult("algorithm/fill_n/bool[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } void BenchmarkAlgorithm6(EASTLTest_Rand& /*rng*/, EA::StdC::Stopwatch& stopwatch1, EA::StdC::Stopwatch& stopwatch2) { // We allocate this on the heap because some platforms don't always have enough stack space for this. std::vector* pstdVectorLP1 = new std::vector(100); std::vector* pstdVectorLP2 = new std::vector(100); eastl::vector* peaVectorLP1 = new eastl::vector(100); eastl::vector* peaVectorLP2 = new eastl::vector(100); // Aliases. std::vector& stdVectorLP1 = *pstdVectorLP1; std::vector& stdVectorLP2 = *pstdVectorLP2; eastl::vector& eaVectorLP1 = *peaVectorLP1; eastl::vector& eaVectorLP2 = *peaVectorLP2; for(int i = 0; i < 2; i++) { /////////////////////////////// // Test copy /////////////////////////////// TestCopyStd(stopwatch1, stdVectorLP1.begin(), stdVectorLP1.end(), stdVectorLP2.begin()); TestCopyEa (stopwatch2, eaVectorLP1.begin(), eaVectorLP1.end(), eaVectorLP2.begin()); if(i == 1) Benchmark::AddResult("algorithm/copy/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test copy_backward /////////////////////////////// TestCopyBackwardStd(stopwatch1, stdVectorLP1.begin(), stdVectorLP1.end(), stdVectorLP2.end()); TestCopyBackwardEa (stopwatch2, eaVectorLP1.begin(), eaVectorLP1.end(), eaVectorLP2.end()); if(i == 1) Benchmark::AddResult("algorithm/copy_backward/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } delete pstdVectorLP1; delete pstdVectorLP2; delete peaVectorLP1; delete peaVectorLP2; } void BenchmarkAlgorithm7(EASTLTest_Rand& /*rng*/, EA::StdC::Stopwatch& stopwatch1, EA::StdC::Stopwatch& stopwatch2) { { std::list stdListTO(10000); eastl::list eaListTO(10000); std::vector stdVectorTO(10000); eastl::vector eaVectorTO(10000); for(int i = 0; i < 2; i++) { /////////////////////////////// // Test reverse /////////////////////////////// TestReverseStd(stopwatch1, stdListTO.begin(), stdListTO.end()); TestReverseEa (stopwatch2, eaListTO.begin(), eaListTO.end()); if(i == 1) Benchmark::AddResult("algorithm/reverse/list", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestReverseStd(stopwatch1, stdVectorTO.begin(), stdVectorTO.end()); TestReverseEa (stopwatch2, eaVectorTO.begin(), eaVectorTO.end()); if(i == 1) Benchmark::AddResult("algorithm/reverse/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } { // Create some containers and seed them with incremental values (i.e. 0, 1, 2, 3...). eastl::slist eaSlistIntLarge(10000); eastl::generate(eaSlistIntLarge.begin(), eaSlistIntLarge.end(), GenerateIncrementalIntegers()); std::vector< SizedPOD<32> > stdVectorLargePod32(10000); for(std::vector< SizedPOD<32> >::iterator it = stdVectorLargePod32.begin(); it != stdVectorLargePod32.end(); ++it) memset(&*it, 0, sizeof(SizedPOD<32>)); eastl::vector< SizedPOD<32> > eaVectorLargePod32(10000); for(eastl::vector< SizedPOD<32> >::iterator it = eaVectorLargePod32.begin(); it != eaVectorLargePod32.end(); ++it) memset(&*it, 0, sizeof(SizedPOD<32>)); std::list stdListIntLarge(10000); eastl::generate(stdListIntLarge.begin(), stdListIntLarge.end(), GenerateIncrementalIntegers()); eastl::list eaListIntLarge(10000); eastl::generate(eaListIntLarge.begin(), eaListIntLarge.end(), GenerateIncrementalIntegers()); std::vector stdVectorIntLarge(10000); eastl::generate(stdVectorIntLarge.begin(), stdVectorIntLarge.end(), GenerateIncrementalIntegers()); eastl::vector eaVectorIntLarge(10000); eastl::generate(eaVectorIntLarge.begin(), eaVectorIntLarge.end(), GenerateIncrementalIntegers()); std::list stdListIntSmall(10); eastl::generate(stdListIntLarge.begin(), stdListIntLarge.end(), GenerateIncrementalIntegers()); eastl::list eaListIntSmall(10); eastl::generate(eaListIntLarge.begin(), eaListIntLarge.end(), GenerateIncrementalIntegers()); std::vector stdVectorIntSmall(10); eastl::generate(stdVectorIntLarge.begin(), stdVectorIntLarge.end(), GenerateIncrementalIntegers()); eastl::vector eaVectorIntSmall(10); eastl::generate(eaVectorIntLarge.begin(), eaVectorIntLarge.end(), GenerateIncrementalIntegers()); std::list stdListTOLarge(10000); eastl::generate(stdListTOLarge.begin(), stdListTOLarge.end(), GenerateIncrementalIntegers()); eastl::list eaListTOLarge(10000); eastl::generate(eaListTOLarge.begin(), eaListTOLarge.end(), GenerateIncrementalIntegers()); std::vector stdVectorTOLarge(10000); eastl::generate(stdVectorTOLarge.begin(), stdVectorTOLarge.end(), GenerateIncrementalIntegers()); eastl::vector eaVectorTOLarge(10000); eastl::generate(eaVectorTOLarge.begin(), eaVectorTOLarge.end(), GenerateIncrementalIntegers()); std::list stdListTOSmall(10); eastl::generate(stdListTOSmall.begin(), stdListTOSmall.end(), GenerateIncrementalIntegers()); eastl::list eaListTOSmall(10); eastl::generate(eaListTOSmall.begin(), eaListTOSmall.end(), GenerateIncrementalIntegers()); std::vector stdVectorTOSmall(10); eastl::generate(stdVectorTOSmall.begin(), stdVectorTOSmall.end(), GenerateIncrementalIntegers()); eastl::vector eaVectorTOSmall(10); eastl::generate(eaVectorTOSmall.begin(), eaVectorTOSmall.end(), GenerateIncrementalIntegers()); for(int i = 0; i < 2; i++) { /////////////////////////////// // Test reverse /////////////////////////////// // There is no guaranteed Standard Library forward_list or slist. TestRotateEa (stopwatch2, eaSlistIntLarge.begin(), eastl::next( eaSlistIntLarge.begin(), (eaSlistIntLarge.size() / 2) - 1), eaSlistIntLarge.end()); if(i == 1) Benchmark::AddResult("algorithm/rotate/slist large", stopwatch1.GetUnits(), 0 /* untested */, stopwatch2.GetElapsedTime()); TestRotateStd(stopwatch1, stdVectorLargePod32.begin(), std__::next(stdVectorLargePod32.begin(), (stdVectorLargePod32.size() / 2) - 1), stdVectorLargePod32.end()); TestRotateEa (stopwatch2, eaVectorLargePod32.begin(), eastl::next( eaVectorLargePod32.begin(), (eaVectorLargePod32.size() / 2) - 1), eaVectorLargePod32.end()); if(i == 1) Benchmark::AddResult("algorithm/rotate/vector> large", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestRotateStd(stopwatch1, stdListIntLarge.begin(), std__::next(stdListIntLarge.begin(), (stdListIntLarge.size() / 2) - 1), stdListIntLarge.end()); TestRotateEa (stopwatch2, eaListIntLarge.begin(), eastl::next( eaListIntLarge.begin(), (eaListIntLarge.size() / 2) - 1), eaListIntLarge.end()); if(i == 1) Benchmark::AddResult("algorithm/rotate/list large", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestRotateStd(stopwatch1, stdVectorIntLarge.begin(), std__::next(stdVectorIntLarge.begin(), (stdVectorIntLarge.size() / 2) - 1), stdVectorIntLarge.end()); TestRotateEa (stopwatch2, eaVectorIntLarge.begin(), eastl::next( eaVectorIntLarge.begin(), (eaVectorIntLarge.size() / 2) - 1), eaVectorIntLarge.end()); if(i == 1) Benchmark::AddResult("algorithm/rotate/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestRotateStd(stopwatch1, stdListIntSmall.begin(), std__::next(stdListIntSmall.begin(), (stdListIntSmall.size() / 2) - 1), stdListIntSmall.end()); TestRotateEa (stopwatch2, eaListIntSmall.begin(), eastl::next( eaListIntSmall.begin(), (eaListIntSmall.size() / 2) - 1), eaListIntSmall.end()); if(i == 1) Benchmark::AddResult("algorithm/rotate/list small", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestRotateStd(stopwatch1, stdVectorIntSmall.begin(), std__::next(stdVectorIntSmall.begin(), (stdVectorIntSmall.size() / 2) - 1), stdVectorIntSmall.end()); TestRotateEa (stopwatch2, eaVectorIntSmall.begin(), eastl::next( eaVectorIntSmall.begin(), (eaVectorIntSmall.size() / 2) - 1), eaVectorIntSmall.end()); if(i == 1) Benchmark::AddResult("algorithm/rotate/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestRotateStd(stopwatch1, stdListTOLarge.begin(), std__::next(stdListTOLarge.begin(), (stdListTOLarge.size() / 2) - 1), stdListTOLarge.end()); TestRotateEa (stopwatch2, eaListTOLarge.begin(), eastl::next( eaListTOLarge.begin(), (eaListTOLarge.size() / 2) - 1), eaListTOLarge.end()); if(i == 1) Benchmark::AddResult("algorithm/rotate/list", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestRotateStd(stopwatch1, stdVectorTOLarge.begin(), std__::next(stdVectorTOLarge.begin(), (stdVectorTOLarge.size() / 2) - 1), stdVectorTOLarge.end()); TestRotateEa (stopwatch2, eaVectorTOLarge.begin(), eastl::next( eaVectorTOLarge.begin(), (eaVectorTOLarge.size() / 2) - 1), eaVectorTOLarge.end()); if(i == 1) Benchmark::AddResult("algorithm/rotate/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestRotateStd(stopwatch1, stdListTOSmall.begin(), std__::next(stdListTOSmall.begin(), (stdListTOSmall.size() / 2) - 1), stdListTOSmall.end()); TestRotateEa (stopwatch2, eaListTOSmall.begin(), eastl::next( eaListTOSmall.begin(), (eaListTOSmall.size() / 2) - 1), eaListTOSmall.end()); if(i == 1) Benchmark::AddResult("algorithm/rotate/list", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestRotateStd(stopwatch1, stdVectorTOSmall.begin(), std__::next(stdVectorTOSmall.begin(), (stdVectorTOSmall.size() / 2) - 1), stdVectorTOSmall.end()); TestRotateEa (stopwatch2, eaVectorTOSmall.begin(), eastl::next( eaVectorTOSmall.begin(), (eaVectorTOSmall.size() / 2) - 1), eaVectorTOSmall.end()); if(i == 1) Benchmark::AddResult("algorithm/rotate/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } void BenchmarkAlgorithm8(EASTLTest_Rand& rng, EA::StdC::Stopwatch& stopwatch1, EA::StdC::Stopwatch& stopwatch2) { const uint32_t ElementCount = 10000; eastl::vector srcVecA(ElementCount); eastl::vector srcVecB(ElementCount); std::vector stdVecAInt(ElementCount); std::vector stdVecBInt(ElementCount); std::vector stdVecOutInt(2 * ElementCount); std::vector stdVecATestObject(ElementCount); std::vector stdVecBTestObject(ElementCount); std::vector stdVecOutTestObject(2 * ElementCount); eastl::vector eaVecAInt(ElementCount); eastl::vector eaVecBInt(ElementCount); eastl::vector eaVecOutInt(2 * ElementCount); eastl::vector eaVecATestObject(ElementCount); eastl::vector eaVecBTestObject(ElementCount); eastl::vector eaVecOutTestObject(2 * ElementCount); // Note: // In some cases the compiler may generate branch free code for the loop body of merge. // In this situation the performance of merging data that has a random merge selection (i.e. the chance that the smallest // element is taken from the first or second list is essentially random) is the same as merging data where the choice of // which list has the smallest element is predictable. // However, if the compiler doesn't generate branch free code, then the performance of merge will suffer from branch // misprediction when merging random data and will benefit greatly when misprediction is rare. // This benchmark is aimed at highlighting what sort of code is being generated, and also showing the impact of // predictability of the comparisons performed during merge. The branch predictablity /can/ have a large impact // on merge sort performance. // 'unpred' is the case where the comparison is unpredictable // 'pred' is the case where the comparison is mostly predictable const char* patternDescriptions[][2] = { { "algorithm/merge/vector (unpred)", "algorithm/merge/vector (pred)", }, { "algorithm/merge/vector (unpred)", "algorithm/merge/vector (pred)", }, }; enum Pattern { P_Random, P_Predictable, P_Count }; for (int pattern = 0; pattern < P_Count; pattern++) { if (pattern == P_Random) { eastl::generate(srcVecA.begin(), srcVecA.end(), [&]{ return int(rng()); }); eastl::sort(srcVecA.begin(), srcVecA.end()); eastl::generate(srcVecB.begin(), srcVecB.end(), [&] { return int(rng()); }); eastl::sort(srcVecB.begin(), srcVecB.end()); } else if (pattern == P_Predictable) { // The data pattern means that a simple/naive algorithm will select 'runLen' values // from one list, and then 'runLen' values from the other list (alternating back and forth). // Of course, a merge algorithm that is more complicated might have a different order of // comparison. const int runLen = 32; for (int i = 0; i < ElementCount; i++) { int baseValue = ((i / runLen) * 2 * runLen) + (i % (runLen)); srcVecA[i] = baseValue; srcVecB[i] = baseValue + runLen; } } /////////////////////////////// // Test merge /////////////////////////////// for (int i = 0; i < 2; i++) { eastl::copy(srcVecA.begin(), srcVecA.end(), stdVecAInt.begin()); eastl::copy(srcVecB.begin(), srcVecB.end(), stdVecBInt.begin()); eastl::copy(srcVecA.begin(), srcVecA.end(), eaVecAInt.begin()); eastl::copy(srcVecB.begin(), srcVecB.end(), eaVecBInt.begin()); TestMergeStd(stopwatch1, stdVecAInt.begin(), stdVecAInt.end(), stdVecBInt.begin(), stdVecBInt.end(), stdVecOutInt.begin()); TestMergeEa(stopwatch2, eaVecAInt.begin(), eaVecAInt.end(), eaVecBInt.begin(), eaVecBInt.end(), eaVecOutInt.begin()); if (i == 1) { Benchmark::AddResult(patternDescriptions[0][pattern], stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } for (int j = 0; j < ElementCount; j++) { stdVecATestObject[j] = TestObject(srcVecA[j]); stdVecBTestObject[j] = TestObject(srcVecB[j]); eaVecATestObject[j] = TestObject(srcVecA[j]); eaVecBTestObject[j] = TestObject(srcVecB[j]); } TestMergeStd(stopwatch1, stdVecATestObject.begin(), stdVecATestObject.end(), stdVecBTestObject.begin(), stdVecBTestObject.end(), stdVecOutTestObject.begin()); TestMergeEa(stopwatch2, eaVecATestObject.begin(), eaVecATestObject.end(), eaVecBTestObject.begin(), eaVecBTestObject.end(), eaVecOutTestObject.begin()); if (i == 1) { Benchmark::AddResult(patternDescriptions[1][pattern], stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } } void BenchmarkAlgorithm() { EASTLTest_Printf("Algorithm\n"); EASTLTest_Rand rng(EA::UnitTest::GetRandSeed()); EA::StdC::Stopwatch stopwatch1(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatch2(EA::StdC::Stopwatch::kUnitsCPUCycles); BenchmarkAlgorithm1(rng, stopwatch1, stopwatch2); BenchmarkAlgorithm2(rng, stopwatch1, stopwatch2); BenchmarkAlgorithm3(rng, stopwatch1, stopwatch2); BenchmarkAlgorithm4(rng, stopwatch1, stopwatch2); BenchmarkAlgorithm5(rng, stopwatch1, stopwatch2); BenchmarkAlgorithm6(rng, stopwatch1, stopwatch2); BenchmarkAlgorithm7(rng, stopwatch1, stopwatch2); BenchmarkAlgorithm8(rng, stopwatch1, stopwatch2); } ================================================ FILE: benchmark/source/BenchmarkBitset.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER // Microsoft STL generates warnings. #pragma warning(disable: 4267) // 'initializing' : conversion from 'size_t' to 'const int', possible loss of data #endif #include "EASTLBenchmark.h" #include "EASTLTest.h" #include #include EA_DISABLE_ALL_VC_WARNINGS() #include EA_RESTORE_ALL_VC_WARNINGS() using namespace EA; namespace { template void TestSet(EA::StdC::Stopwatch& stopwatch, Bitset& b) { stopwatch.Restart(); for(int i = 0; i < 100000; i++) { b.set(); Benchmark::DoNothing(&b); } stopwatch.Stop(); } template void TestSetIndex(EA::StdC::Stopwatch& stopwatch, Bitset& b, size_t index) { stopwatch.Restart(); for(int i = 0; i < 100000; i++) { b.set(index); Benchmark::DoNothing(&b); } stopwatch.Stop(); } template void TestReset(EA::StdC::Stopwatch& stopwatch, Bitset& b) { stopwatch.Restart(); for(int i = 0; i < 100000; i++) { b.reset(); Benchmark::DoNothing(&b); } stopwatch.Stop(); } template void TestFlip(EA::StdC::Stopwatch& stopwatch, Bitset& b) { stopwatch.Restart(); for(int i = 0; i < 100000; i++) { b.flip(); Benchmark::DoNothing(&b); } stopwatch.Stop(); } template void TestTest(EA::StdC::Stopwatch& stopwatch, Bitset& b, unsigned nANDValue) { stopwatch.Restart(); for(unsigned i = 0; i < 100000; i++) Benchmark::DoNothing(b.test(i & nANDValue)); // We use & instead of % because the former is always fast due to forced power of 2. stopwatch.Stop(); } template void TestCount(EA::StdC::Stopwatch& stopwatch, Bitset& b) { size_t temp = 0; stopwatch.Restart(); for(int i = 0; i < 100000; i++) { temp += b.count(); Benchmark::DoNothing(&temp); } stopwatch.Stop(); } template void TestRightShift(EA::StdC::Stopwatch& stopwatch, Bitset& b, size_t n) { size_t temp = 0; stopwatch.Restart(); for(int i = 0; i < 100000; i++) { b >>= n; Benchmark::DoNothing(&temp); } stopwatch.Stop(); } } // namespace void BenchmarkBitset() { EASTLTest_Printf("Bitset\n"); EA::StdC::Stopwatch stopwatch1(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatch2(EA::StdC::Stopwatch::kUnitsCPUCycles); { std::bitset<15> stdBitset15; eastl::bitset<15> eaBitset15; std::bitset<35> stdBitset35; eastl::bitset<35> eaBitset35; std::bitset<75> stdBitset75; eastl::bitset<75> eaBitset75; std::bitset<1500> stdBitset1500; eastl::bitset<1500> eaBitset1500; for(int i = 0; i < 2; i++) { /////////////////////////////// // Test set() /////////////////////////////// TestSet(stopwatch1, stdBitset15); TestSet(stopwatch2, eaBitset15); if(i == 1) Benchmark::AddResult("bitset<15>/set()", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestSet(stopwatch1, stdBitset35); TestSet(stopwatch2, eaBitset35); if(i == 1) Benchmark::AddResult("bitset<35>/set()", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestSet(stopwatch1, stdBitset75); TestSet(stopwatch2, eaBitset75); if(i == 1) Benchmark::AddResult("bitset<75>/set()", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestSet(stopwatch1, stdBitset1500); TestSet(stopwatch2, eaBitset1500); if(i == 1) Benchmark::AddResult("bitset<1500>/set()", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test set(index) /////////////////////////////// TestSetIndex(stopwatch1, stdBitset15, 13); TestSetIndex(stopwatch2, eaBitset15, 13); if(i == 1) Benchmark::AddResult("bitset<15>/set(i)", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestSetIndex(stopwatch1, stdBitset35, 33); TestSetIndex(stopwatch2, eaBitset35, 33); if(i == 1) Benchmark::AddResult("bitset<35>/set(i)", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestSetIndex(stopwatch1, stdBitset75, 73); TestSetIndex(stopwatch2, eaBitset75, 73); if(i == 1) Benchmark::AddResult("bitset<75>/set(i)", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestSetIndex(stopwatch1, stdBitset1500, 730); TestSetIndex(stopwatch2, eaBitset1500, 730); if(i == 1) Benchmark::AddResult("bitset<1500>/set(i)", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test reset() /////////////////////////////// TestReset(stopwatch1, stdBitset15); TestReset(stopwatch2, eaBitset15); if(i == 1) Benchmark::AddResult("bitset<15>/reset", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestReset(stopwatch1, stdBitset35); TestReset(stopwatch2, eaBitset35); if(i == 1) Benchmark::AddResult("bitset<35>/reset", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestReset(stopwatch1, stdBitset75); TestReset(stopwatch2, eaBitset75); if(i == 1) Benchmark::AddResult("bitset<75>/reset", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestReset(stopwatch1, stdBitset1500); TestReset(stopwatch2, eaBitset1500); if(i == 1) Benchmark::AddResult("bitset<1500>/reset", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test flip /////////////////////////////// TestFlip(stopwatch1, stdBitset15); TestFlip(stopwatch2, eaBitset15); if(i == 1) Benchmark::AddResult("bitset<15>/flip", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestFlip(stopwatch1, stdBitset35); TestFlip(stopwatch2, eaBitset35); if(i == 1) Benchmark::AddResult("bitset<35>/flip", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestFlip(stopwatch1, stdBitset75); TestFlip(stopwatch2, eaBitset75); if(i == 1) Benchmark::AddResult("bitset<75>/flip", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestFlip(stopwatch1, stdBitset1500); TestFlip(stopwatch2, eaBitset1500); if(i == 1) Benchmark::AddResult("bitset<1500>/flip", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test test /////////////////////////////// TestTest(stopwatch1, stdBitset15, 7); TestTest(stopwatch2, eaBitset15, 7); if(i == 1) Benchmark::AddResult("bitset<15>/test", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestTest(stopwatch1, stdBitset35, 31); TestTest(stopwatch2, eaBitset35, 31); if(i == 1) Benchmark::AddResult("bitset<35>/test", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestTest(stopwatch1, stdBitset75, 63); TestTest(stopwatch2, eaBitset75, 63); if(i == 1) Benchmark::AddResult("bitset<75>/test", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestTest(stopwatch1, stdBitset1500, 1023); TestTest(stopwatch2, eaBitset1500, 1023); if(i == 1) Benchmark::AddResult("bitset<1500>/test", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test count /////////////////////////////// TestCount(stopwatch1, stdBitset15); TestCount(stopwatch2, eaBitset15); if(i == 1) Benchmark::AddResult("bitset<15>/count", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestCount(stopwatch1, stdBitset35); TestCount(stopwatch2, eaBitset35); if(i == 1) Benchmark::AddResult("bitset<35>/count", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestCount(stopwatch1, stdBitset75); TestCount(stopwatch2, eaBitset75); if(i == 1) Benchmark::AddResult("bitset<75>/count", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestCount(stopwatch1, stdBitset1500); TestCount(stopwatch2, eaBitset1500); if(i == 1) Benchmark::AddResult("bitset<1500>/count", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test >>= /////////////////////////////// TestRightShift(stopwatch1, stdBitset15, 1); TestRightShift(stopwatch2, eaBitset15, 1); if(i == 1) Benchmark::AddResult("bitset<15>/>>=/1", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime(), GetStdSTLType() == kSTLPort ? "STLPort is broken, neglects wraparound check." : NULL); TestRightShift(stopwatch1, stdBitset35, 1); TestRightShift(stopwatch2, eaBitset35, 1); if(i == 1) Benchmark::AddResult("bitset<35>/>>=/1", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime(), GetStdSTLType() == kSTLPort ? "STLPort is broken, neglects wraparound check." : NULL); TestRightShift(stopwatch1, stdBitset75, 1); TestRightShift(stopwatch2, eaBitset75, 1); if(i == 1) Benchmark::AddResult("bitset<75>/>>=/1", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime(), GetStdSTLType() == kSTLPort ? "STLPort is broken, neglects wraparound check." : NULL); TestRightShift(stopwatch1, stdBitset1500, 1); TestRightShift(stopwatch2, eaBitset1500, 1); if(i == 1) Benchmark::AddResult("bitset<1500>/>>=/1", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime(), GetStdSTLType() == kSTLPort ? "STLPort is broken, neglects wraparound check." : NULL); } } } ================================================ FILE: benchmark/source/BenchmarkDeque.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #include "EASTLBenchmark.h" #include "EASTLTest.h" #include #include #include #include #include #ifdef _MSC_VER #pragma warning(push, 0) #pragma warning(disable: 4350) // behavior change: X called instead of Y #endif #include #include #include #include #include #ifdef _MSC_VER #pragma warning(pop) #endif using namespace EA; namespace { struct ValuePair { uint32_t key; uint32_t v; }; struct VPCompare { bool operator()(const ValuePair& vp1, const ValuePair& vp2) const { return (vp1.key == vp2.key) ? (vp1.v < vp2.v) : (vp1.key < vp2.key); } }; bool operator<(const ValuePair& vp1, const ValuePair& vp2) { return (vp1.key == vp2.key) ? (vp1.v < vp2.v) : (vp1.key < vp2.key); } bool operator==(const ValuePair& vp1, const ValuePair& vp2) { return (vp1.key == vp2.key) && (vp1.v == vp2.v); } } typedef std::deque StdDeque; typedef eastl::deque EaDeque; // What value do we pick for the subarray size to make the comparison fair? Using the default isn't ideal because it results in this test measuring speed efficiency and ignoring memory efficiency. namespace { template void TestPushBack(EA::StdC::Stopwatch& stopwatch, Container& c, eastl::vector& intVector) { stopwatch.Restart(); for(eastl_size_t j = 0, jEnd = intVector.size(); j < jEnd; j++) { const ValuePair vp = { intVector[j], intVector[j] }; c.push_back(vp); } stopwatch.Stop(); } template void TestPushFront(EA::StdC::Stopwatch& stopwatch, Container& c, eastl::vector& intVector) { stopwatch.Restart(); for(eastl_size_t j = 0, jEnd = intVector.size(); j < jEnd; j++) { const ValuePair vp = { intVector[j], intVector[j] }; c.push_front(vp); } stopwatch.Stop(); } template void TestBracket(EA::StdC::Stopwatch& stopwatch, Container& c) { uint64_t temp = 0; stopwatch.Restart(); for(typename Container::size_type j = 0, jEnd = c.size(); j < jEnd; j++) temp += c[j].key; stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)(temp & 0xffffffff)); } template void TestIteration(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::iterator it = c.begin(), itEnd = c.end(); stopwatch.Restart(); while(it != itEnd) ++it; stopwatch.Stop(); if(it != c.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)(*it).key); /* Alternative way to measure: const eastl_size_t n = c.size(); stopwatch.Restart(); for(eastl_size_t i = 0; i < n; ++i) ++it; stopwatch.Stop(); if(it != c.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)(*it).key); */ } template void TestFind(EA::StdC::Stopwatch& stopwatch, Container& c) { // Intentionally use eastl find in order to measure just // vector access speed and not be polluted by sort speed. const ValuePair vp = { 0xffffffff, 0 }; stopwatch.Restart(); typename Container::iterator it = eastl::find(c.begin(), c.end(), vp); stopwatch.Stop(); if(it != c.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)(*it).key); } template void TestSort(EA::StdC::Stopwatch& stopwatch, Container& c) { // Intentionally use eastl sort in order to measure just // vector access speed and not be polluted by sort speed. VPCompare vpCompare; stopwatch.Restart(); eastl::quick_sort(c.begin(), c.end(), vpCompare); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c[0].key); } template void TestInsert(EA::StdC::Stopwatch& stopwatch, Container& c) { const ValuePair vp = { 0xffffffff, 0 }; typename Container::size_type j, jEnd; typename Container::iterator it; stopwatch.Restart(); for(j = 0, jEnd = 2000, it = c.begin(); j < jEnd; ++j) { it = c.insert(it, vp); if(it == c.end()) // Try to safely increment the iterator three times. it = c.begin(); if(++it == c.end()) it = c.begin(); if(++it == c.end()) it = c.begin(); } stopwatch.Stop(); } template void TestErase(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it; stopwatch.Restart(); for(j = 0, jEnd = 2000, it = c.begin(); j < jEnd; ++j) { it = c.erase(it); if(it == c.end()) // Try to safely increment the iterator three times. it = c.begin(); if(++it == c.end()) it = c.begin(); if(++it == c.end()) it = c.begin(); } stopwatch.Stop(); } } // namespace void BenchmarkDeque() { EASTLTest_Printf("Deque\n"); EA::UnitTest::RandGenT rng(EA::UnitTest::GetRandSeed()); EA::StdC::Stopwatch stopwatch1(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatch2(EA::StdC::Stopwatch::kUnitsCPUCycles); { // Exercise some declarations int nErrorCount = 0; ValuePair vp1 = { 0, 0 }, vp2 = { 0, 0 }; VPCompare c1, c2; VERIFY(c1.operator()(vp1, vp2) == c2.operator()(vp1, vp2)); VERIFY((vp1 < vp2) || (vp1 == vp2) || !(vp1 == vp2)); } { eastl::vector intVector(100000); eastl::generate(intVector.begin(), intVector.end(), rng); for(int i = 0; i < 2; i++) { StdDeque stdDeque; EaDeque eaDeque; /////////////////////////////// // Test push_back /////////////////////////////// TestPushBack(stopwatch1, stdDeque, intVector); TestPushBack(stopwatch2, eaDeque, intVector); if(i == 1) Benchmark::AddResult("deque/push_back", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test push_front /////////////////////////////// TestPushFront(stopwatch1, stdDeque, intVector); TestPushFront(stopwatch2, eaDeque, intVector); if(i == 1) Benchmark::AddResult("deque/push_front", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test operator[] /////////////////////////////// TestBracket(stopwatch1, stdDeque); TestBracket(stopwatch2, eaDeque); if(i == 1) Benchmark::AddResult("deque/operator[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test iteration /////////////////////////////// TestIteration(stopwatch1, stdDeque); TestIteration(stopwatch2, eaDeque); if(i == 1) Benchmark::AddResult("deque/iteration", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test find() /////////////////////////////// TestFind(stopwatch1, stdDeque); TestFind(stopwatch2, eaDeque); if(i == 1) Benchmark::AddResult("deque/find", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test sort /////////////////////////////// // Currently VC++ complains about our sort function decrementing std::iterator that is already at begin(). In the strictest sense, // that's a valid complaint, but we aren't testing std STL here. We will want to revise our sort function eventually. #if !defined(_MSC_VER) || !defined(_ITERATOR_DEBUG_LEVEL) || (_ITERATOR_DEBUG_LEVEL < 2) TestSort(stopwatch1, stdDeque); TestSort(stopwatch2, eaDeque); if(i == 1) Benchmark::AddResult("deque/sort", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); #endif /////////////////////////////// // Test insert /////////////////////////////// TestInsert(stopwatch1, stdDeque); TestInsert(stopwatch2, eaDeque); if(i == 1) Benchmark::AddResult("deque/insert", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase /////////////////////////////// TestErase(stopwatch1, stdDeque); TestErase(stopwatch2, eaDeque); if(i == 1) Benchmark::AddResult("deque/erase", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } ================================================ FILE: benchmark/source/BenchmarkHash.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #include "EASTLBenchmark.h" #include "EASTLTest.h" #include #include #include #include #include EA_DISABLE_ALL_VC_WARNINGS() #include #include #include #include EA_RESTORE_ALL_VC_WARNINGS() using namespace EA; // HashString8 // // We define a string // template struct HashString8 { // Defined for EASTL, STLPort, SGI, etc. and Metrowerks-related hash tables: size_t operator()(const String& s) const { const uint8_t* p = (const uint8_t*) s.c_str(); uint32_t c, stringHash = UINT32_C(2166136261); while((c = *p++) != 0) stringHash = (stringHash * 16777619) ^ c; return stringHash; } // Defined for Dinkumware-related (e.g. MS STL) hash tables: bool operator()(const String& s1, const String& s2) const { return s1 < s2; } // Defined for Dinkumware-related (e.g. MS STL) hash tables: enum { bucket_size = 7, min_buckets = 8 }; }; using StdMapUint32TO = std::unordered_map; using StdMapStrUint32 = std::unordered_map>; using EaMapUint32TO = eastl::hash_map; using EaMapStrUint32 = eastl::hash_map>; namespace { template void TestInsert(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { stopwatch.Restart(); c.insert(pArrayBegin, pArrayEnd); stopwatch.Stop(); } template void TestIteration(EA::StdC::Stopwatch& stopwatch, const Container& c, const Value& findValue) { stopwatch.Restart(); typename Container::const_iterator it = eastl::find(c.begin(), c.end(), findValue); // It shouldn't matter what find implementation we use here, as it merely iterates values. stopwatch.Stop(); if(it != c.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &*it); } template void TestBracket(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { Benchmark::DoNothing(&c[pArrayBegin->first]); ++pArrayBegin; } stopwatch.Stop(); } template void TestFind(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { typename Container::iterator it = c.find(pArrayBegin->first); Benchmark::DoNothing(&it); ++pArrayBegin; } stopwatch.Stop(); } template void TestFindAsStd(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { typename Container::iterator it = c.find(pArrayBegin->first.c_str()); Benchmark::DoNothing(&it); ++pArrayBegin; } stopwatch.Stop(); } template void TestFindAsEa(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { typename Container::iterator it = c.find_as(pArrayBegin->first.c_str()); Benchmark::DoNothing(&it); ++pArrayBegin; } stopwatch.Stop(); } template void TestCount(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { typename Container::size_type temp = 0; stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { temp += c.count(pArrayBegin->first); ++pArrayBegin; } stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)temp); } template void TestEraseValue(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { c.erase(pArrayBegin->first); ++pArrayBegin; } stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.size()); } template void TestErasePosition(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it; stopwatch.Restart(); for(j = 0, jEnd = c.size() / 3, it = c.begin(); j < jEnd; ++j) { // The erase fucntion is supposed to return an iterator, but the C++ standard was // not initially clear about it and some STL implementations don't do it correctly. #if (defined(_MSC_VER) || defined(_CPPLIB_VER)) // _CPPLIB_VER is something defined by Dinkumware STL. it = c.erase(it); #else // This pathway may execute at a slightly different speed than the // standard behaviour, but that's fine for the benchmark because the // benchmark is measuring the speed of erasing while iterating, and // however it needs to get done by the given STL is how it is measured. const typename Container::iterator itErase(it++); c.erase(itErase); #endif ++it; ++it; } stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p %p", &c, &it); } template void TestEraseRange(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it1 = c.begin(); typename Container::iterator it2 = c.begin(); for(j = 0, jEnd = c.size() / 3; j < jEnd; ++j) ++it2; stopwatch.Restart(); c.erase(it1, it2); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p %p %p", &c, &it1, &it2); } template void TestClear(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); c.clear(); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.size()); } } // namespace void BenchmarkHash() { EASTLTest_Printf("HashMap\n"); EA::UnitTest::Rand rng(EA::UnitTest::GetRandSeed()); EA::StdC::Stopwatch stopwatch1(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatch2(EA::StdC::Stopwatch::kUnitsCPUCycles); { eastl::vector< std::pair > stdVectorUT(10000); eastl::vector< eastl::pair > eaVectorUT(10000); eastl::vector< std::pair< std::string, uint32_t> > stdVectorSU(10000); eastl::vector< eastl::pair > eaVectorSU(10000); for(eastl_size_t i = 0, iEnd = stdVectorUT.size(); i < iEnd; i++) { const uint32_t n1 = rng.RandLimit((uint32_t)(iEnd / 2)); const uint32_t n2 = rng.RandValue(); stdVectorUT[i] = std::pair(n1, TestObject(n2)); eaVectorUT[i] = eastl::pair(n1, TestObject(n2)); char str_n1[32]; sprintf(str_n1, "%u", (unsigned)n1); stdVectorSU[i] = std::pair< std::string, uint32_t>( std::string(str_n1), n2); eaVectorSU[i] = eastl::pair(eastl::string(str_n1), n2); } for(int i = 0; i < 2; i++) { StdMapUint32TO stdMapUint32TO; EaMapUint32TO eaMapUint32TO; StdMapStrUint32 stdMapStrUint32; EaMapStrUint32 eaMapStrUint32; /////////////////////////////// // Test insert(const value_type&) /////////////////////////////// TestInsert(stopwatch1, stdMapUint32TO, stdVectorUT.data(), stdVectorUT.data() + stdVectorUT.size()); TestInsert(stopwatch2, eaMapUint32TO, eaVectorUT.data(), eaVectorUT.data() + eaVectorUT.size()); if(i == 1) Benchmark::AddResult("hash_map/insert", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestInsert(stopwatch1, stdMapStrUint32, stdVectorSU.data(), stdVectorSU.data() + stdVectorSU.size()); TestInsert(stopwatch2, eaMapStrUint32, eaVectorSU.data(), eaVectorSU.data() + eaVectorSU.size()); if(i == 1) Benchmark::AddResult("hash_map/insert", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test iteration /////////////////////////////// TestIteration(stopwatch1, stdMapUint32TO, StdMapUint32TO::value_type(9999999, TestObject(9999999))); TestIteration(stopwatch2, eaMapUint32TO, EaMapUint32TO::value_type(9999999, TestObject(9999999))); if(i == 1) Benchmark::AddResult("hash_map/iteration", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestIteration(stopwatch1, stdMapStrUint32, StdMapStrUint32::value_type( std::string("9999999"), 9999999)); TestIteration(stopwatch2, eaMapStrUint32, EaMapStrUint32::value_type(eastl::string("9999999"), 9999999)); if(i == 1) Benchmark::AddResult("hash_map/iteration", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test operator[] /////////////////////////////// TestBracket(stopwatch1, stdMapUint32TO, stdVectorUT.data(), stdVectorUT.data() + stdVectorUT.size()); TestBracket(stopwatch2, eaMapUint32TO, eaVectorUT.data(), eaVectorUT.data() + eaVectorUT.size()); if(i == 1) Benchmark::AddResult("hash_map/operator[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestBracket(stopwatch1, stdMapStrUint32, stdVectorSU.data(), stdVectorSU.data() + stdVectorSU.size()); TestBracket(stopwatch2, eaMapStrUint32, eaVectorSU.data(), eaVectorSU.data() + eaVectorSU.size()); if(i == 1) Benchmark::AddResult("hash_map/operator[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test find /////////////////////////////// TestFind(stopwatch1, stdMapUint32TO, stdVectorUT.data(), stdVectorUT.data() + stdVectorUT.size()); TestFind(stopwatch2, eaMapUint32TO, eaVectorUT.data(), eaVectorUT.data() + eaVectorUT.size()); if(i == 1) Benchmark::AddResult("hash_map/find", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestFind(stopwatch1, stdMapStrUint32, stdVectorSU.data(), stdVectorSU.data() + stdVectorSU.size()); TestFind(stopwatch2, eaMapStrUint32, eaVectorSU.data(), eaVectorSU.data() + eaVectorSU.size()); if(i == 1) Benchmark::AddResult("hash_map/find", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test find_as /////////////////////////////// TestFindAsStd(stopwatch1, stdMapStrUint32, stdVectorSU.data(), stdVectorSU.data() + stdVectorSU.size()); TestFindAsEa(stopwatch2, eaMapStrUint32, eaVectorSU.data(), eaVectorSU.data() + eaVectorSU.size()); if(i == 1) Benchmark::AddResult("hash_map/find_as/char*", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test count /////////////////////////////// TestCount(stopwatch1, stdMapUint32TO, stdVectorUT.data(), stdVectorUT.data() + stdVectorUT.size()); TestCount(stopwatch2, eaMapUint32TO, eaVectorUT.data(), eaVectorUT.data() + eaVectorUT.size()); if(i == 1) Benchmark::AddResult("hash_map/count", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestCount(stopwatch1, stdMapStrUint32, stdVectorSU.data(), stdVectorSU.data() + stdVectorSU.size()); TestCount(stopwatch2, eaMapStrUint32, eaVectorSU.data(), eaVectorSU.data() + eaVectorSU.size()); if(i == 1) Benchmark::AddResult("hash_map/count", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase(const key_type& key) /////////////////////////////// TestEraseValue(stopwatch1, stdMapUint32TO, stdVectorUT.data(), stdVectorUT.data() + (stdVectorUT.size() / 2)); TestEraseValue(stopwatch2, eaMapUint32TO, eaVectorUT.data(), eaVectorUT.data() + (eaVectorUT.size() / 2)); if(i == 1) Benchmark::AddResult("hash_map/erase val", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestEraseValue(stopwatch1, stdMapStrUint32, stdVectorSU.data(), stdVectorSU.data() + (stdVectorSU.size() / 2)); TestEraseValue(stopwatch2, eaMapStrUint32, eaVectorSU.data(), eaVectorSU.data() + (eaVectorSU.size() / 2)); if(i == 1) Benchmark::AddResult("hash_map/erase val", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase(iterator position) /////////////////////////////// TestErasePosition(stopwatch1, stdMapUint32TO); TestErasePosition(stopwatch2, eaMapUint32TO); if(i == 1) Benchmark::AddResult("hash_map/erase pos", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestErasePosition(stopwatch1, stdMapStrUint32); TestErasePosition(stopwatch2, eaMapStrUint32); if(i == 1) Benchmark::AddResult("hash_map/erase pos", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase(iterator first, iterator last) /////////////////////////////// TestEraseRange(stopwatch1, stdMapUint32TO); TestEraseRange(stopwatch2, eaMapUint32TO); if(i == 1) Benchmark::AddResult("hash_map/erase range", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestEraseRange(stopwatch1, stdMapStrUint32); TestEraseRange(stopwatch2, eaMapStrUint32); if(i == 1) Benchmark::AddResult("hash_map/erase range", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test clear() /////////////////////////////// // Clear the containers of whatever they happen to have. We want the containers to have full data. TestClear(stopwatch1, stdMapUint32TO); TestClear(stopwatch2, eaMapUint32TO); TestClear(stopwatch1, stdMapStrUint32); TestClear(stopwatch2, eaMapStrUint32); // Re-set the containers with full data. TestInsert(stopwatch1, stdMapUint32TO, stdVectorUT.data(), stdVectorUT.data() + stdVectorUT.size()); TestInsert(stopwatch2, eaMapUint32TO, eaVectorUT.data(), eaVectorUT.data() + eaVectorUT.size()); TestInsert(stopwatch1, stdMapStrUint32, stdVectorSU.data(), stdVectorSU.data() + stdVectorSU.size()); TestInsert(stopwatch2, eaMapStrUint32, eaVectorSU.data(), eaVectorSU.data() + eaVectorSU.size()); // Now clear the data again, this time measuring it. TestClear(stopwatch1, stdMapUint32TO); TestClear(stopwatch2, eaMapUint32TO); if(i == 1) Benchmark::AddResult("hash_map/clear", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestClear(stopwatch1, stdMapStrUint32); TestClear(stopwatch2, eaMapStrUint32); if(i == 1) Benchmark::AddResult("hash_map/clear", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } ================================================ FILE: benchmark/source/BenchmarkHeap.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #include "EASTLBenchmark.h" #include "EASTLTest.h" #include #include #include #include #ifdef _MSC_VER #pragma warning(push, 0) #pragma warning(disable: 4350) // behavior change: X called instead of Y #endif #include #include #ifdef _MSC_VER #pragma warning(pop) #endif using namespace EA; namespace { template void TestMakeHeapStd(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last) { stopwatch.Restart(); std::make_heap(first, last); stopwatch.Stop(); } template void TestMakeHeapEa(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last) { stopwatch.Restart(); eastl::make_heap(first, last); stopwatch.Stop(); } template void TestPushHeapStd(EA::StdC::Stopwatch& stopwatch, Iterator1 first1, Iterator1 last1, Iterator2 first2, Iterator2 last2) { stopwatch.Restart(); while(first2 != last2) { *last1++ = *first2++; std::push_heap(first1, last1); } stopwatch.Stop(); } template void TestPushHeapEa(EA::StdC::Stopwatch& stopwatch, Iterator1 first1, Iterator1 last1, Iterator2 first2, Iterator2 last2) { stopwatch.Restart(); while(first2 != last2) { *last1++ = *first2++; eastl::push_heap(first1, last1); } stopwatch.Stop(); } template void TestPopHeapStd(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last, Iterator popEnd) { stopwatch.Restart(); while(last != popEnd) std::pop_heap(first, last--); stopwatch.Stop(); } template void TestPopHeapEa(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last, Iterator popEnd) { stopwatch.Restart(); while(last != popEnd) eastl::pop_heap(first, last--); stopwatch.Stop(); } template void TestSortHeapStd(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last) { stopwatch.Restart(); std::sort_heap(first, last); stopwatch.Stop(); } template void TestSortHeapEa(EA::StdC::Stopwatch& stopwatch, Iterator first, Iterator last) { stopwatch.Restart(); eastl::sort_heap(first, last); stopwatch.Stop(); } } // namespace void BenchmarkHeap() { EASTLTest_Printf("Heap (Priority Queue)\n"); EA::UnitTest::RandGenT rng(EA::UnitTest::GetRandSeed()); EA::StdC::Stopwatch stopwatch1(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatch2(EA::StdC::Stopwatch::kUnitsCPUCycles); { const int kArraySize = 100000; // uint32[] uint32_t* const pIntArrayS = new uint32_t[kArraySize * 2]; // * 2 because we will be adding more items via push_heap. uint32_t* const pIntArrayE = new uint32_t[kArraySize * 2]; // S means Std; E means EA. uint32_t* const pIntArray2 = new uint32_t[kArraySize]; // This will be used for pop_heap. eastl::generate(pIntArrayS, pIntArrayS + kArraySize, rng); eastl::copy(pIntArrayS, pIntArrayS + kArraySize, pIntArrayE); eastl::copy(pIntArrayS, pIntArrayS + kArraySize, pIntArray2); // vector std::vector stdVectorTO(kArraySize * 2); std::vector stdVectorTO2(kArraySize); eastl::vector eaVectorTO(kArraySize * 2); eastl::vector eaVectorTO2(kArraySize); for(int k = 0; k < kArraySize; k++) { stdVectorTO[k] = TestObject(pIntArrayS[k]); stdVectorTO2[k] = TestObject(pIntArrayS[k]); eaVectorTO[k] = TestObject(pIntArrayS[k]); eaVectorTO2[k] = TestObject(pIntArrayS[k]); } for(int i = 0; i < 2; i++) { /////////////////////////////// // Test make_heap /////////////////////////////// TestMakeHeapStd(stopwatch1, pIntArrayS, pIntArrayS + kArraySize); TestMakeHeapEa (stopwatch2, pIntArrayE, pIntArrayE + kArraySize); if(i == 1) Benchmark::AddResult("heap (uint32_t[])/make_heap", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestMakeHeapStd(stopwatch1, stdVectorTO.begin(), stdVectorTO.begin() + kArraySize); TestMakeHeapEa (stopwatch2, eaVectorTO.begin(), eaVectorTO.begin() + kArraySize); if(i == 1) Benchmark::AddResult("heap (vector)/make_heap", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test push_heap /////////////////////////////// TestPushHeapStd(stopwatch1, pIntArrayS, pIntArrayS + kArraySize, pIntArray2, pIntArray2 + kArraySize); TestPushHeapEa (stopwatch2, pIntArrayE, pIntArrayE + kArraySize, pIntArray2, pIntArray2 + kArraySize); if(i == 1) Benchmark::AddResult("heap (uint32_t[])/push_heap", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestPushHeapStd(stopwatch1, stdVectorTO.begin(), stdVectorTO.begin() + kArraySize, stdVectorTO2.begin(), stdVectorTO2.begin() + kArraySize); TestPushHeapEa (stopwatch2, eaVectorTO.begin(), eaVectorTO.begin() + kArraySize, eaVectorTO2.begin(), eaVectorTO2.begin() + kArraySize); if(i == 1) Benchmark::AddResult("heap (vector)/push_heap", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test pop_heap /////////////////////////////// TestPopHeapStd(stopwatch1, pIntArrayS, pIntArrayS + (kArraySize * 2), pIntArrayS + kArraySize); // * 2 because we used push_heap above to add more items. TestPopHeapEa (stopwatch2, pIntArrayE, pIntArrayE + (kArraySize * 2), pIntArrayE + kArraySize); if(i == 1) Benchmark::AddResult("heap (uint32_t[])/pop_heap", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestPopHeapStd(stopwatch1, stdVectorTO.begin(), stdVectorTO.begin() + (kArraySize * 2), stdVectorTO.begin() + kArraySize); // * 2 because we used push_heap above to add more items. TestPopHeapEa (stopwatch2, eaVectorTO.begin(), eaVectorTO.begin() + (kArraySize * 2), eaVectorTO.begin() + kArraySize); if(i == 1) Benchmark::AddResult("heap (vector)/pop_heap", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test sort_heap /////////////////////////////// TestSortHeapStd(stopwatch1, pIntArrayS, pIntArrayS + kArraySize); TestSortHeapEa (stopwatch2, pIntArrayE, pIntArrayE + kArraySize); if(i == 1) Benchmark::AddResult("heap (uint32_t[])/sort_heap", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestSortHeapStd(stopwatch1, stdVectorTO.begin(), stdVectorTO.begin() + kArraySize); TestSortHeapEa (stopwatch2, eaVectorTO.begin(), eaVectorTO.begin() + kArraySize); if(i == 1) Benchmark::AddResult("heap (vector)/sort_heap", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } delete[] pIntArrayS; delete[] pIntArrayE; delete[] pIntArray2; } } ================================================ FILE: benchmark/source/BenchmarkList.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #include "EASTLBenchmark.h" #include "EASTLTest.h" #include #include #include #include #include #ifdef _MSC_VER #pragma warning(push, 0) #pragma warning(disable: 4555) // expression has no effect; expected expression with side-effect #pragma warning(disable: 4350) // behavior change: X called instead of Y #endif #include #ifdef _MSC_VER #pragma warning(pop) #endif using namespace EA; using namespace eastl; typedef std::list StdListTO; typedef eastl::list EaListTO; namespace { void DoNothing(void*) { // Empty } template void TestCtorIterator(EA::StdC::Stopwatch& stopwatch, const ContainerSource& cs, Container*) // Dummy Container argument because of GCC 2.X limitations. { stopwatch.Restart(); Container c(cs.begin(), cs.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.back().mX); } template void TestCtorN(EA::StdC::Stopwatch& stopwatch, Container*) // Dummy Container argument because of GCC 2.X limitations. { stopwatch.Restart(); Container c(10000); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.back().mX); } template void TestPushBack(EA::StdC::Stopwatch& stopwatch, Container& c, const TestObject* pTOBegin, const TestObject* const pTOEnd) { stopwatch.Restart(); while(pTOBegin != pTOEnd) c.push_back(*pTOBegin++); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.back().mX); } template void TestInsert(EA::StdC::Stopwatch& stopwatch, Container& c, const TestObject* pTOBegin, const TestObject* const pTOEnd) { typename Container::iterator it = c.begin(); stopwatch.Restart(); while(pTOBegin != pTOEnd) { it = c.insert(it, *pTOBegin++); if(++it == c.end()) // Try to safely increment the iterator a couple times it = c.begin(); if(++it == c.end()) it = c.begin(); } stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.back().mX); } template void TestSize(EA::StdC::Stopwatch& stopwatch, Container& c, void (*pFunction)(...)) { stopwatch.Restart(); for(int i = 0; (i < 10000) && c.size(); i++) (*pFunction)(&c); stopwatch.Stop(); } template void TestFind(EA::StdC::Stopwatch& stopwatch, Container& c, const TestObject& to) { EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.size()); stopwatch.Restart(); typename Container::iterator it = eastl::find(c.begin(), c.end(), to); stopwatch.Stop(); if(it != c.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%d", (*it).mX); } template void TestReverse(EA::StdC::Stopwatch& stopwatch, Container& c) { EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.size()); stopwatch.Restart(); c.reverse(); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.back().mX); } template void TestRemove(EA::StdC::Stopwatch& stopwatch, Container& c, const TestObject* pTOBegin, const TestObject* const pTOEnd) { EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.size()); stopwatch.Restart(); while(pTOBegin != pTOEnd) c.remove(*pTOBegin++); stopwatch.Stop(); if(!c.empty()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.back().mX); } template void TestSplice(EA::StdC::Stopwatch& stopwatch, Container& c, Container& cSource) { typename Container::iterator it = c.begin(); int i = 0, iEnd = (int)cSource.size() - 5; EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.size()); stopwatch.Restart(); while(i++ != iEnd) c.splice(it, cSource, cSource.begin()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.back().mX); } template void TestErase(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::iterator it = c.begin(); int i = 0, iEnd = (int)c.size() - 5; EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.size()); stopwatch.Restart(); while(i++ != iEnd) { it = c.erase(it); if(it == c.end()) // Try to safely increment the iterator a couple times it = c.begin(); if(++it == c.end()) it = c.begin(); } stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.back().mX); } } // namespace void BenchmarkList() { EASTLTest_Printf("List\n"); EASTLTest_Rand rng(EA::UnitTest::GetRandSeed()); EA::StdC::Stopwatch stopwatch1(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatch2(EA::StdC::Stopwatch::kUnitsCPUCycles); EaListTO eaListTO_1(1); EaListTO eaListTO_10(10); EaListTO eaListTO_100(100); StdListTO stdListTO_1(1); StdListTO stdListTO_10(10); StdListTO stdListTO_100(100); { char buffer[32]; sprintf(buffer, "%p", &DoNothing); } { eastl::vector toVector(100000); for(eastl_size_t i = 0, iEnd = toVector.size(); i < iEnd; ++i) toVector[i] = TestObject((int)i); random_shuffle(toVector.begin(), toVector.end(), rng); for(int i = 0; i < 2; i++) { StdListTO stdListTO; EaListTO eaListTO; /////////////////////////////// // Test list(InputIterator first, InputIterator last) /////////////////////////////// TestCtorIterator(stopwatch1, toVector, &stdListTO); TestCtorIterator(stopwatch2, toVector, &eaListTO); if(i == 1) Benchmark::AddResult("list/ctor(it)", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test list(size_type n) /////////////////////////////// TestCtorN(stopwatch1, &stdListTO); TestCtorN(stopwatch2, &eaListTO); if(i == 1) Benchmark::AddResult("list/ctor(n)", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test push_back() /////////////////////////////// TestPushBack(stopwatch1, stdListTO, toVector.data(), toVector.data() + toVector.size()); TestPushBack(stopwatch2, eaListTO, toVector.data(), toVector.data() + toVector.size()); if(i == 1) Benchmark::AddResult("list/push_back", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test insert() /////////////////////////////// TestInsert(stopwatch1, stdListTO, toVector.data(), toVector.data() + toVector.size()); TestInsert(stopwatch2, eaListTO, toVector.data(), toVector.data() + toVector.size()); if(i == 1) Benchmark::AddResult("list/insert", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test size() /////////////////////////////// TestSize(stopwatch1, stdListTO_1, Benchmark::DoNothing); TestSize(stopwatch2, eaListTO_1, Benchmark::DoNothing); if(i == 1) Benchmark::AddResult("list/size/1", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestSize(stopwatch1, stdListTO_10, Benchmark::DoNothing); TestSize(stopwatch2, eaListTO_10, Benchmark::DoNothing); if(i == 1) Benchmark::AddResult("list/size/10", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime() #if !EASTL_LIST_SIZE_CACHE , "EASTL is configured to not cache the list size." #endif ); TestSize(stopwatch1, stdListTO_100, Benchmark::DoNothing); TestSize(stopwatch2, eaListTO_100, Benchmark::DoNothing); if(i == 1) Benchmark::AddResult("list/size/100", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime() #if !EASTL_LIST_SIZE_CACHE , "EASTL is configured to not cache the list size." #endif ); /////////////////////////////// // Test find() /////////////////////////////// TestFind(stopwatch1, stdListTO, TestObject(99999999)); TestFind(stopwatch2, eaListTO, TestObject(99999999)); if(i == 1) Benchmark::AddResult("list/find", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test reverse() /////////////////////////////// TestReverse(stopwatch1, stdListTO); TestReverse(stopwatch2, eaListTO); if(i == 1) Benchmark::AddResult("list/reverse", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test remove() /////////////////////////////// random_shuffle(toVector.begin(), toVector.end(), rng); TestRemove(stopwatch1, stdListTO, &toVector[0], &toVector[20]); TestRemove(stopwatch2, eaListTO, &toVector[0], &toVector[20]); if(i == 1) Benchmark::AddResult("list/remove", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test splice() /////////////////////////////// StdListTO listCopyStd(stdListTO); EaListTO listCopyEa(eaListTO); TestSplice(stopwatch1, stdListTO, listCopyStd); TestSplice(stopwatch2, eaListTO, listCopyEa); if(i == 1) Benchmark::AddResult("list/splice", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase() /////////////////////////////// TestErase(stopwatch1, stdListTO); TestErase(stopwatch2, eaListTO); if(i == 1) Benchmark::AddResult("list/erase", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } ================================================ FILE: benchmark/source/BenchmarkMap.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #include "EASTLBenchmark.h" #include "EASTLTest.h" #include #include #include #include EA_DISABLE_ALL_VC_WARNINGS() #include #include EA_RESTORE_ALL_VC_WARNINGS() using namespace EA; typedef std::map StdMapTOUint32; typedef eastl::map EaMapTOUint32; namespace { template void TestInsert(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd, const Value& highValue) { stopwatch.Restart(); c.insert(pArrayBegin, pArrayEnd); stopwatch.Stop(); c.insert(highValue); } template void TestIteration(EA::StdC::Stopwatch& stopwatch, const Container& c, const Value& findValue) { stopwatch.Restart(); typename Container::const_iterator it = eastl::find(c.begin(), c.end(), findValue); // It shouldn't matter what find implementation we use here, as it merely iterates values. stopwatch.Stop(); if(it != c.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p", &*it); } template void TestBracket(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { Benchmark::DoNothing(c[pArrayBegin->first]); ++pArrayBegin; } stopwatch.Stop(); } template void TestFind(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { Benchmark::DoNothing(c.find(pArrayBegin->first)->second); ++pArrayBegin; } stopwatch.Stop(); } template void TestCount(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { typename Container::size_type temp = 0; stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { temp += c.count(pArrayBegin->first); ++pArrayBegin; } stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)temp); } template void TestLowerBound(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { Benchmark::DoNothing(c.lower_bound(pArrayBegin->first)->second); ++pArrayBegin; } stopwatch.Stop(); } template void TestUpperBound(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { Benchmark::DoNothing(c.upper_bound(pArrayBegin->first)->second); ++pArrayBegin; } stopwatch.Stop(); } template void TestEqualRange(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { Benchmark::DoNothing(c.equal_range(pArrayBegin->first).second->second); ++pArrayBegin; } stopwatch.Stop(); } template void TestEraseValue(EA::StdC::Stopwatch& stopwatch, Container& c, const Value* pArrayBegin, const Value* pArrayEnd) { stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { c.erase(pArrayBegin->first); ++pArrayBegin; } stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.size()); } template void TestErasePosition(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it; stopwatch.Restart(); for(j = 0, jEnd = c.size() / 3, it = c.begin(); j < jEnd; ++j) { // The erase fucntion is supposed to return an iterator, but the C++ standard was // not initially clear about it and some STL implementations don't do it correctly. #if (((defined(_MSC_VER) || defined(_CPPLIB_VER)) && !defined(_HAS_STRICT_CONFORMANCE))) // _CPPLIB_VER is something defined by Dinkumware STL. it = c.erase(it); // Standard behavior. #else // This pathway may execute at a slightly different speed than the // standard behaviour, but that's fine for the benchmark because the // benchmark is measuring the speed of erasing while iterating, and // however it needs to get done by the given STL is how it is measured. const typename Container::iterator itErase(it++); c.erase(itErase); #endif ++it; ++it; } stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p %p", &c, &it); } template void TestEraseRange(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it1 = c.begin(); typename Container::iterator it2 = c.begin(); for(j = 0, jEnd = c.size() / 3; j < jEnd; ++j) ++it2; stopwatch.Restart(); c.erase(it1, it2); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%p %p %p", &c, &it1, &it2); } template void TestClear(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); c.clear(); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.size()); } } // namespace void BenchmarkMap() { EASTLTest_Printf("Map\n"); EA::UnitTest::Rand rng(EA::UnitTest::GetRandSeed()); EA::StdC::Stopwatch stopwatch1(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatch2(EA::StdC::Stopwatch::kUnitsCPUCycles); { eastl::vector< std::pair > stdVector(10000); eastl::vector< eastl::pair > eaVector(10000); for(eastl_size_t i = 0, iEnd = stdVector.size(); i < iEnd; i++) { const uint32_t n1 = rng.RandLimit(((uint32_t)iEnd / 2)); const uint32_t n2 = rng.RandValue(); stdVector[i] = std::pair(TestObject(n1), n2); eaVector[i] = eastl::pair(TestObject(n1), n2); } for(int i = 0; i < 2; i++) { StdMapTOUint32 stdMapTOUint32; EaMapTOUint32 eaMapTOUint32; /////////////////////////////// // Test insert(const value_type&) /////////////////////////////// const std::pair stdHighValue(TestObject(0x7fffffff), 0x7fffffff); const eastl::pair eaHighValue(TestObject(0x7fffffff), 0x7fffffff); TestInsert(stopwatch1, stdMapTOUint32, stdVector.data(), stdVector.data() + stdVector.size(), stdHighValue); TestInsert(stopwatch2, eaMapTOUint32, eaVector.data(), eaVector.data() + eaVector.size(), eaHighValue); if(i == 1) Benchmark::AddResult("map/insert", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test iteration /////////////////////////////// TestIteration(stopwatch1, stdMapTOUint32, StdMapTOUint32::value_type(TestObject(9999999), 9999999)); TestIteration(stopwatch2, eaMapTOUint32, EaMapTOUint32::value_type(TestObject(9999999), 9999999)); if(i == 1) Benchmark::AddResult("map/iteration", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test operator[] /////////////////////////////// TestBracket(stopwatch1, stdMapTOUint32, stdVector.data(), stdVector.data() + stdVector.size()); TestBracket(stopwatch2, eaMapTOUint32, eaVector.data(), eaVector.data() + eaVector.size()); if(i == 1) Benchmark::AddResult("map/operator[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test find /////////////////////////////// TestFind(stopwatch1, stdMapTOUint32, stdVector.data(), stdVector.data() + stdVector.size()); TestFind(stopwatch2, eaMapTOUint32, eaVector.data(), eaVector.data() + eaVector.size()); if(i == 1) Benchmark::AddResult("map/find", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test count /////////////////////////////// TestCount(stopwatch1, stdMapTOUint32, stdVector.data(), stdVector.data() + stdVector.size()); TestCount(stopwatch2, eaMapTOUint32, eaVector.data(), eaVector.data() + eaVector.size()); if(i == 1) Benchmark::AddResult("map/count", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test lower_bound /////////////////////////////// TestLowerBound(stopwatch1, stdMapTOUint32, stdVector.data(), stdVector.data() + stdVector.size()); TestLowerBound(stopwatch2, eaMapTOUint32, eaVector.data(), eaVector.data() + eaVector.size()); if(i == 1) Benchmark::AddResult("map/lower_bound", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test upper_bound /////////////////////////////// TestUpperBound(stopwatch1, stdMapTOUint32, stdVector.data(), stdVector.data() + stdVector.size()); TestUpperBound(stopwatch2, eaMapTOUint32, eaVector.data(), eaVector.data() + eaVector.size()); if(i == 1) Benchmark::AddResult("map/upper_bound", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test equal_range /////////////////////////////// TestEqualRange(stopwatch1, stdMapTOUint32, stdVector.data(), stdVector.data() + stdVector.size()); TestEqualRange(stopwatch2, eaMapTOUint32, eaVector.data(), eaVector.data() + eaVector.size()); if(i == 1) Benchmark::AddResult("map/equal_range", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase(const key_type& key) /////////////////////////////// TestEraseValue(stopwatch1, stdMapTOUint32, stdVector.data(), stdVector.data() + (stdVector.size() / 2)); TestEraseValue(stopwatch2, eaMapTOUint32, eaVector.data(), eaVector.data() + (eaVector.size() / 2)); if(i == 1) Benchmark::AddResult("map/erase/key", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase(iterator position) /////////////////////////////// TestErasePosition(stopwatch1, stdMapTOUint32); TestErasePosition(stopwatch2, eaMapTOUint32); if(i == 1) Benchmark::AddResult("map/erase/pos", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime(), GetStdSTLType() == kSTLMS ? "MS uses a code bloating implementation of erase." : NULL); /////////////////////////////// // Test erase(iterator first, iterator last) /////////////////////////////// TestEraseRange(stopwatch1, stdMapTOUint32); TestEraseRange(stopwatch2, eaMapTOUint32); if(i == 1) Benchmark::AddResult("map/erase/range", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test clear() /////////////////////////////// TestClear(stopwatch1, stdMapTOUint32); TestClear(stopwatch2, eaMapTOUint32); if(i == 1) Benchmark::AddResult("map/clear", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } ================================================ FILE: benchmark/source/BenchmarkSet.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #include "EASTLBenchmark.h" #include "EASTLTest.h" #include #include #include #include EA_DISABLE_ALL_VC_WARNINGS() #include #include EA_RESTORE_ALL_VC_WARNINGS() using namespace EA; typedef std::set StdSetUint32; typedef eastl::set EaSetUint32; namespace { template void TestInsert(EA::StdC::Stopwatch& stopwatch, Container& c, const uint32_t* pArrayBegin, const uint32_t* pArrayEnd) { stopwatch.Restart(); c.insert(pArrayBegin, pArrayEnd); stopwatch.Stop(); // Intentionally push back a high uint32_t value. We do this so that // later upper_bound, lower_bound and equal_range never return end(). c.insert(0xffffffff); } template void TestIteration(EA::StdC::Stopwatch& stopwatch, const Container& c) { stopwatch.Restart(); typename Container::const_iterator it = eastl::find(c.begin(), c.end(), uint32_t(9999999)); stopwatch.Stop(); if(it != c.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)*it); } template void TestFind(EA::StdC::Stopwatch& stopwatch, Container& c, const uint32_t* pArrayBegin, const uint32_t* pArrayEnd) { uint32_t temp = 0; typename Container::iterator it; stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { it = c.find(*pArrayBegin++); temp += *it; } stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)temp); } template void TestCount(EA::StdC::Stopwatch& stopwatch, Container& c, const uint32_t* pArrayBegin, const uint32_t* pArrayEnd) { typename Container::size_type temp = 0; stopwatch.Restart(); while(pArrayBegin != pArrayEnd) temp += c.count(*pArrayBegin++); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)temp); } template void TestLowerBound(EA::StdC::Stopwatch& stopwatch, Container& c, const uint32_t* pArrayBegin, const uint32_t* pArrayEnd) { uint32_t temp = 0; typename Container::iterator it; stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { it = c.lower_bound(*pArrayBegin++); temp += *it; // We know that it != end because earlier we inserted 0xffffffff. } stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)temp); } template void TestUpperBound(EA::StdC::Stopwatch& stopwatch, Container& c, const uint32_t* pArrayBegin, const uint32_t* pArrayEnd) { uint32_t temp = 0; typename Container::iterator it; stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { it = c.upper_bound(*pArrayBegin++); temp += *it; // We know that it != end because earlier we inserted 0xffffffff. } stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)temp); } template void TestEqualRange(EA::StdC::Stopwatch& stopwatch, Container& c, const uint32_t* pArrayBegin, const uint32_t* pArrayEnd) { uint32_t temp = 0; stopwatch.Restart(); while(pArrayBegin != pArrayEnd) { temp += *(c.equal_range(*pArrayBegin++).first); // We know that it != end because earlier we inserted 0xffffffff. } stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)temp); } template void TestEraseValue(EA::StdC::Stopwatch& stopwatch, Container& c, const uint32_t* pArrayBegin, const uint32_t* pArrayEnd) { stopwatch.Restart(); while(pArrayBegin != pArrayEnd) c.erase(*pArrayBegin++); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.size()); } template void TestErasePosition(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it; stopwatch.Restart(); for(j = 0, jEnd = c.size() / 3, it = c.begin(); j < jEnd; ++j) { // The erase fucntion is supposed to return an iterator, but the C++ standard was // not initially clear about it and some STL implementations don't do it correctly. #if (((defined(_MSC_VER) || defined(_CPPLIB_VER)) && !defined(_HAS_STRICT_CONFORMANCE))) // _CPPLIB_VER is something defined by Dinkumware STL. it = c.erase(it); #else // This pathway may execute at a slightly different speed than the // standard behaviour, but that's fine for the benchmark because the // benchmark is measuring the speed of erasing while iterating, and // however it needs to get done by the given STL is how it is measured. const typename Container::iterator itErase(it++); c.erase(itErase); #endif ++it; ++it; } stopwatch.Stop(); } template void TestEraseRange(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it1 = c.begin(); typename Container::iterator it2 = c.begin(); for(j = 0, jEnd = c.size() / 3; j < jEnd; ++j) ++it2; stopwatch.Restart(); c.erase(it1, it2); stopwatch.Stop(); } template void TestClear(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); c.clear(); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)c.size()); } } // namespace void BenchmarkSet() { EASTLTest_Printf("Set\n"); EA::UnitTest::Rand rng(EA::UnitTest::GetRandSeed()); EA::StdC::Stopwatch stopwatch1(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatch2(EA::StdC::Stopwatch::kUnitsCPUCycles); { eastl::vector intVector(10000); for(eastl_size_t i = 0, iEnd = intVector.size(); i < iEnd; i++) intVector[i] = (uint32_t)rng.RandLimit(((uint32_t)iEnd / 2)); // This will result in duplicates and even a few triplicates. for(int i = 0; i < 2; i++) { StdSetUint32 stdSetUint32; EaSetUint32 eaSetUint32; /////////////////////////////// // Test insert(const value_type&) /////////////////////////////// TestInsert(stopwatch1, stdSetUint32, intVector.data(), intVector.data() + intVector.size()); TestInsert(stopwatch2, eaSetUint32, intVector.data(), intVector.data() + intVector.size()); if(i == 1) Benchmark::AddResult("set/insert", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test iteration /////////////////////////////// TestIteration(stopwatch1, stdSetUint32); TestIteration(stopwatch2, eaSetUint32); if(i == 1) Benchmark::AddResult("set/iteration", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test find /////////////////////////////// TestFind(stopwatch1, stdSetUint32, intVector.data(), intVector.data() + intVector.size()); TestFind(stopwatch2, eaSetUint32, intVector.data(), intVector.data() + intVector.size()); if(i == 1) Benchmark::AddResult("set/find", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test count /////////////////////////////// TestCount(stopwatch1, stdSetUint32, intVector.data(), intVector.data() + intVector.size()); TestCount(stopwatch2, eaSetUint32, intVector.data(), intVector.data() + intVector.size()); if(i == 1) Benchmark::AddResult("set/count", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test lower_bound /////////////////////////////// TestLowerBound(stopwatch1, stdSetUint32, intVector.data(), intVector.data() + intVector.size()); TestLowerBound(stopwatch2, eaSetUint32, intVector.data(), intVector.data() + intVector.size()); if(i == 1) Benchmark::AddResult("set/lower_bound", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test upper_bound /////////////////////////////// TestUpperBound(stopwatch1, stdSetUint32, intVector.data(), intVector.data() + intVector.size()); TestUpperBound(stopwatch2, eaSetUint32, intVector.data(), intVector.data() + intVector.size()); if(i == 1) Benchmark::AddResult("set/upper_bound", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test equal_range /////////////////////////////// TestEqualRange(stopwatch1, stdSetUint32, intVector.data(), intVector.data() + intVector.size()); TestEqualRange(stopwatch2, eaSetUint32, intVector.data(), intVector.data() + intVector.size()); if(i == 1) Benchmark::AddResult("set/equal_range", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase(const key_type& key) /////////////////////////////// TestEraseValue(stopwatch1, stdSetUint32, &intVector[0], &intVector[intVector.size() / 2]); TestEraseValue(stopwatch2, eaSetUint32, &intVector[0], &intVector[intVector.size() / 2]); if(i == 1) Benchmark::AddResult("set/erase/val", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase(iterator position) /////////////////////////////// TestErasePosition(stopwatch1, stdSetUint32); TestErasePosition(stopwatch2, eaSetUint32); if(i == 1) Benchmark::AddResult("set/erase/pos", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime(), GetStdSTLType() == kSTLMS ? "MS uses a code bloating implementation of erase." : NULL); /////////////////////////////// // Test erase(iterator first, iterator last) /////////////////////////////// TestEraseRange(stopwatch1, stdSetUint32); TestEraseRange(stopwatch2, eaSetUint32); if(i == 1) Benchmark::AddResult("set/erase range", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test clear() /////////////////////////////// TestClear(stopwatch1, stdSetUint32); TestClear(stopwatch2, eaSetUint32); if(i == 1) Benchmark::AddResult("set/clear", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } ================================================ FILE: benchmark/source/BenchmarkSort.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #include #include #include #include #include "EASTLBenchmark.h" #include "EASTLTest.h" EA_DISABLE_ALL_VC_WARNINGS() #include #include #include #include EA_RESTORE_ALL_VC_WARNINGS() using namespace EA; namespace { struct ValuePair { uint32_t key; uint32_t v; }; struct VPCompare { bool operator()(const ValuePair& vp1, const ValuePair& vp2) const { // return *(const uint64_t*)&vp1 < *(const uint64_t*)&vp2; return (vp1.key == vp2.key) ? (vp1.v < vp2.v) : (vp1.key < vp2.key); } }; bool operator<(const ValuePair& vp1, const ValuePair& vp2) { // return *(const uint64_t*)&vp1 < *(const uint64_t*)&vp2; return (vp1.key == vp2.key) ? (vp1.v < vp2.v) : (vp1.key < vp2.key); } bool operator==(const ValuePair& vp1, const ValuePair& vp2) { // return *(const uint64_t*)&vp1 == *(const uint64_t*)&vp2; return (vp1.key == vp2.key) && (vp1.v == vp2.v); } } // VPCompareC // Useful for testing the the C qsort function. int VPCompareC(const void* elem1, const void* elem2) { return (int)(*(const uint64_t*)elem1 - *(const uint64_t*)elem2); } typedef std::vector StdVectorVP; typedef eastl::vector EaVectorVP; typedef std::vector StdVectorInt; typedef eastl::vector EaVectorInt; typedef std::vector StdVectorTO; typedef eastl::vector EaVectorTO; namespace { #ifndef EA_PREFIX_NO_INLINE #ifdef _MSC_VER #define EA_PREFIX_NO_INLINE EA_NO_INLINE #define EA_POSTFIX_NO_INLINE #else #define EA_PREFIX_NO_INLINE #define EA_POSTFIX_NO_INLINE EA_NO_INLINE #endif #endif EA_PREFIX_NO_INLINE void TestQuickSortStdVP (EA::StdC::Stopwatch& stopwatch, StdVectorVP& stdVectorVP) EA_POSTFIX_NO_INLINE; EA_PREFIX_NO_INLINE void TestQuickSortEaVP (EA::StdC::Stopwatch& stopwatch, EaVectorVP& eaVectorVP) EA_POSTFIX_NO_INLINE; EA_PREFIX_NO_INLINE void TestQuickSortStdInt(EA::StdC::Stopwatch& stopwatch, StdVectorInt& stdVectorInt) EA_POSTFIX_NO_INLINE; EA_PREFIX_NO_INLINE void TestQuickSortEaInt (EA::StdC::Stopwatch& stopwatch, EaVectorInt& eaVectorInt) EA_POSTFIX_NO_INLINE; EA_PREFIX_NO_INLINE void TestQuickSortStdTO (EA::StdC::Stopwatch& stopwatch, StdVectorTO& stdVectorTO) EA_POSTFIX_NO_INLINE; EA_PREFIX_NO_INLINE void TestQuickSortEaTO (EA::StdC::Stopwatch& stopwatch, EaVectorTO& eaVectorTO) EA_POSTFIX_NO_INLINE; void TestQuickSortStdVP(EA::StdC::Stopwatch& stopwatch, StdVectorVP& stdVectorVP) { stopwatch.Restart(); std::sort(stdVectorVP.begin(), stdVectorVP.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)stdVectorVP[0].key); } void TestQuickSortEaVP(EA::StdC::Stopwatch& stopwatch, EaVectorVP& eaVectorVP) { stopwatch.Restart(); eastl::quick_sort(eaVectorVP.begin(), eaVectorVP.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)eaVectorVP[0].key); } void TestQuickSortStdInt(EA::StdC::Stopwatch& stopwatch, StdVectorInt& stdVectorInt) { stopwatch.Restart(); std::sort(stdVectorInt.begin(), stdVectorInt.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)stdVectorInt[0]); } void TestQuickSortEaInt(EA::StdC::Stopwatch& stopwatch, EaVectorInt& eaVectorInt) { stopwatch.Restart(); eastl::quick_sort(eaVectorInt.begin(), eaVectorInt.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)eaVectorInt[0]); } void TestQuickSortStdTO(EA::StdC::Stopwatch& stopwatch, StdVectorTO& stdVectorTO) { stopwatch.Restart(); std::sort(stdVectorTO.begin(), stdVectorTO.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)stdVectorTO[0].mX); } void TestQuickSortEaTO(EA::StdC::Stopwatch& stopwatch, EaVectorTO& eaVectorTO) { stopwatch.Restart(); eastl::quick_sort(eaVectorTO.begin(), eaVectorTO.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)eaVectorTO[0].mX); } } // namespace namespace { enum SortFunctionType { sf_qsort, // C qsort sf_shell_sort, // eastl::shell_sort. sf_heap_sort, // eastl::heap_sort sf_merge_sort, // eastl::merge_sort sf_merge_sort_buffer, // eastl::merge_sort_buffer sf_comb_sort, // eastl::comb_sort sf_bubble_sort, // eastl::bubble_sort sf_selection_sort, // eastl::selection_sort sf_shaker_sort, // eastl::shaker_sort sf_quick_sort, // eastl::quick_sort sf_tim_sort, // eastl::tim_sort sf_insertion_sort, // eastl::insertion_sort sf_std_sort, // std::sort sf_std_stable_sort, // std::stable_sort sf_radix_sort, // eastl::radix_sort (unconventional sort) sf_count // }; const char* GetSortFunctionName(int sortFunctionType) { switch (sortFunctionType) { case sf_quick_sort: return "eastl::sort"; case sf_tim_sort: return "eastl::tim_sort"; case sf_insertion_sort: return "eastl::insertion_sort"; case sf_shell_sort: return "eastl::shell_sort"; case sf_heap_sort: return "eastl::heap_sort"; case sf_merge_sort: return "eastl::merge_sort"; case sf_merge_sort_buffer: return "eastl::merge_sort_buffer"; case sf_comb_sort: return "eastl::comb_sort"; case sf_bubble_sort: return "eastl::bubble_sort"; case sf_selection_sort: return "eastl::selection_sort"; case sf_shaker_sort: return "eastl::shaker_sort"; case sf_radix_sort: return "eastl::radix_sort"; case sf_qsort: return "qsort"; case sf_std_sort: return "std::sort"; case sf_std_stable_sort: return "std::stable_sort"; default: return "unknown"; } } enum RandomizationType { kRandom, // Completely random data. kRandomSorted, // Random values already sorted. kOrdered, // Already sorted. kMostlyOrdered, // Partly sorted already. kRandomizationTypeCount }; const char* GetRandomizationTypeName(int randomizationType) { switch (randomizationType) { case kRandom: return "random"; case kRandomSorted: return "random sorted"; case kOrdered: return "ordered"; case kMostlyOrdered: return "mostly ordered"; default: return "unknown"; } } template void Randomize(eastl::vector& v, EA::UnitTest::RandGenT& rng, RandomizationType type) { typedef RandomType value_type; switch (type) { default: case kRandomizationTypeCount: // We specify this only to avoid a compiler warning about not testing for it. case kRandom: { eastl::generate(v.begin(), v.end(), rng); break; } case kRandomSorted: { // This randomization type differs from kOrdered because the set of values is random (but sorted), in the kOrdered // case the set of values is contiguous (i.e. 0, 1, ..., n) which can have different performance characteristics. // For example, radix_sort performs poorly for kOrdered. eastl::generate(v.begin(), v.end(), rng); eastl::sort(v.begin(), v.end()); break; } case kOrdered: { for(eastl_size_t i = 0; i < v.size(); ++i) v[i] = value_type((value_type)i); // Note that value_type may be a struct and not an integer. Thus the casting and construction here. break; } case kMostlyOrdered: { for(eastl_size_t i = 0; i < v.size(); ++i) v[i] = value_type((value_type)i); // Note that value_type may be a struct and not an integer. Thus the casting and construction here. // We order random segments. // The algorithm below in practice will make slightly more than kPercentOrdered be ordered. const eastl_size_t kPercentOrdered = 80; // In actuality, due to statistics, the actual ordered percent will be about 82-85%. for(eastl_size_t n = 0, s = v.size(), nEnd = ((s < (100 - kPercentOrdered)) ? 1 : (s / (100 - kPercentOrdered))); n < nEnd; n++) { eastl_size_t i = rng.mRand.RandLimit((uint32_t)s); eastl_size_t j = rng.mRand.RandLimit((uint32_t)s); eastl::swap(v[i], v[j]); } break; } } } char gSlowAssignBuffer1[256] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* ... */}; char gSlowAssignBuffer2[256] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* ... */}; // SlowAssign // Implements an object which has slow assign performance. template struct SlowAssign { typedef T key_type; T x; static int nAssignCount; SlowAssign() { x = 0; memcpy(gSlowAssignBuffer1, gSlowAssignBuffer2, sizeof(gSlowAssignBuffer1)); } SlowAssign(const SlowAssign& sa) { ++nAssignCount; x = sa.x; memcpy(gSlowAssignBuffer1, gSlowAssignBuffer2, sizeof(gSlowAssignBuffer1)); } SlowAssign& operator=(const SlowAssign& sa) { ++nAssignCount; x = sa.x; memcpy(gSlowAssignBuffer1, gSlowAssignBuffer2, sizeof(gSlowAssignBuffer1)); return *this; } SlowAssign& operator=(int a) { x = (T)a; return *this; } static void Reset() { nAssignCount = 0; } }; template<> int SlowAssign::nAssignCount = 0; template bool operator <(const SlowAssign& a, const SlowAssign& b) { return a.x < b.x; } // SlowCompare // Implements a compare which is N time slower than a simple integer compare. template struct SlowCompare { static int nCompareCount; bool operator()(T a, T b) { ++nCompareCount; return (a < b) && // It happens that gSlowAssignBuffer1 is always zeroed. (gSlowAssignBuffer1[0] == 0) && (gSlowAssignBuffer1[1] == 0) && (gSlowAssignBuffer1[1] == 0) && (gSlowAssignBuffer1[2] == 0) && (gSlowAssignBuffer1[4] == 0) && (gSlowAssignBuffer1[5] == 0); } static void Reset() { nCompareCount = 0; } }; template <> int SlowCompare::nCompareCount = 0; // qsort callback functions // qsort compare function returns negative if b > a and positive if a > b. template int CompareInteger(const void* a, const void* b) { // Even though you see the following in Internet example code, it doesn't work! // The reason is that it works only if a and b are both >= 0, otherwise large // values can cause integer register wraparound. A similar kind of problem happens // if you try to do the same thing with floating point value compares. // See http://www.akalin.cx/2006/06/23/on-the-qsort-comparison-function/ // Internet exmaple code: // return *(const int32_t*)a - *(const int32_t*)b; // This double comparison might seem like it's crippling qsort against the // STL-based sorts which do a single compare. But consider that the returning // of -1, 0, +1 gives qsort more information, and its logic takes advantage // of that. if (*(const T*)a < *(const T*)b) return -1; if (*(const T*)a > *(const T*)b) return +1; return 0; } int SlowCompareInt32(const void* a, const void* b) { ++SlowCompare::nCompareCount; // This code is similar in performance to the C++ SlowCompare template functor above. if((gSlowAssignBuffer1[0] == 0) && (gSlowAssignBuffer1[1] == 0) && (gSlowAssignBuffer1[1] == 0) && (gSlowAssignBuffer1[2] == 0) && (gSlowAssignBuffer1[4] == 0) && (gSlowAssignBuffer1[5] == 0)) { if (*(const int32_t*)a < *(const int32_t*)b) return -1; if (*(const int32_t*)a > *(const int32_t*)b) return +1; } return 0; } template struct slow_assign_extract_radix_key { typedef typename slow_assign_type::key_type radix_type; const radix_type operator()(const slow_assign_type& obj) const { return obj.x; } }; template struct identity_extract_radix_key { typedef integer_type radix_type; const radix_type operator()(const integer_type& x) const { return x; } }; } // namespace struct BenchmarkResult { uint64_t mTime; uint64_t mCompareCount; uint64_t mAssignCount; BenchmarkResult() : mTime(0), mCompareCount(0), mAssignCount(0) {} }; int CompareSortPerformance() { // Sizes of arrays to be sorted. const eastl_size_t kSizes[] = { 10, 100, 1000, 10000 }; const eastl_size_t kSizesCount = EAArrayCount(kSizes); static BenchmarkResult sResults[kRandomizationTypeCount][kSizesCount][sf_count]; int nErrorCount = 0; EA::UnitTest::ReportVerbosity(2, "Sort comparison\n"); EA::UnitTest::ReportVerbosity(2, "Random seed = %u\n", (unsigned)EA::UnitTest::GetRandSeed()); EA::UnitTest::RandGenT rng(EA::UnitTest::GetRandSeed()); EA::StdC::Stopwatch stopwatch(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatchGlobal(EA::StdC::Stopwatch::kUnitsSeconds); const eastl_size_t kArraySizeMax = *eastl::max_element(eastl::begin(kSizes), eastl::end(kSizes)); const int kRunCount = 4; #if !defined(EA_DEBUG) EA::UnitTest::SetHighThreadPriority(); #endif eastl::vector allSortFunctions; for (int i = 0; i < sf_count; i++) { allSortFunctions.push_back(SortFunctionType(i)); } { auto& sortFunctions = allSortFunctions; // Regular speed test. // In this case we test the sorting of integral values. // This is probably the most common type of comparison. EA::UnitTest::ReportVerbosity(2, "Sort comparison: Regular speed test\n"); typedef uint32_t ElementType; typedef eastl::less CompareFunction; eastl::string sOutput; sOutput.set_capacity(100000); ElementType* pBuffer = new ElementType[kArraySizeMax]; memset(sResults, 0, sizeof(sResults)); stopwatchGlobal.Restart(); for (int c = 0; c < kRunCount; c++) { for (int i = 0; i < kRandomizationTypeCount; i++) { for (size_t sizeType = 0; sizeType < EAArrayCount(kSizes); sizeType++) { const eastl_size_t size = kSizes[sizeType]; for (SortFunctionType sortFunction : sortFunctions) { eastl::vector v(size); rng.SetSeed(EA::UnitTest::GetRandSeed()); Randomize(v, rng, (RandomizationType)i); switch (sortFunction) { case sf_quick_sort: stopwatch.Restart(); eastl::quick_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_tim_sort: stopwatch.Restart(); eastl::tim_sort_buffer(v.begin(), v.end(), pBuffer, CompareFunction()); stopwatch.Stop(); break; case sf_insertion_sort: stopwatch.Restart(); eastl::insertion_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_shell_sort: stopwatch.Restart(); eastl::shell_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_heap_sort: stopwatch.Restart(); eastl::heap_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_merge_sort: stopwatch.Restart(); eastl::merge_sort(v.begin(), v.end(), *get_default_allocator((EASTLAllocatorType*)NULL), CompareFunction()); stopwatch.Stop(); break; case sf_merge_sort_buffer: stopwatch.Restart(); eastl::merge_sort_buffer(v.begin(), v.end(), pBuffer, CompareFunction()); stopwatch.Stop(); break; case sf_comb_sort: stopwatch.Restart(); eastl::comb_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_bubble_sort: stopwatch.Restart(); eastl::bubble_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_selection_sort: stopwatch.Restart(); eastl::selection_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_shaker_sort: stopwatch.Restart(); eastl::shaker_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_radix_sort: stopwatch.Restart(); eastl::radix_sort>(v.begin(), v.end(), pBuffer); stopwatch.Stop(); break; case sf_qsort: stopwatch.Restart(); qsort(&v[0], (size_t)v.size(), sizeof(ElementType), CompareInteger); stopwatch.Stop(); break; case sf_std_sort: stopwatch.Restart(); std::sort(v.data(), v.data() + v.size(), std::less()); stopwatch.Stop(); break; case sf_std_stable_sort: stopwatch.Restart(); std::stable_sort(v.data(), v.data() + v.size(), std::less()); stopwatch.Stop(); break; case sf_count: default: // unsupported break; } const uint64_t elapsedTime = (uint64_t)stopwatch.GetElapsedTime(); // If this result was faster than a previously fastest result, record this one instead. if ((c == 0) || (elapsedTime < sResults[i][sizeType][sortFunction].mTime)) sResults[i][sizeType][sortFunction].mTime = elapsedTime; VERIFY(eastl::is_sorted(v.begin(), v.end())); } // for each sort function... } // for each size type... } // for each randomization type... } // for each run EA::UnitTest::ReportVerbosity(2, "Total time: %.2f s\n", stopwatchGlobal.GetElapsedTimeFloat()); delete[] pBuffer; // Now print the results. for (int i = 0; i < kRandomizationTypeCount; i++) { for (size_t sizeType = 0; sizeType < EAArrayCount(kSizes); sizeType++) { const eastl_size_t size = kSizes[sizeType]; for (SortFunctionType sortFunction : sortFunctions) { sOutput.append_sprintf("%25s, %14s, Size: %8u, Time: %14" PRIu64 " ticks %0.2f ticks/elem\n", GetSortFunctionName(sortFunction), GetRandomizationTypeName(i), (unsigned)size, sResults[i][sizeType][sortFunction].mTime, float(sResults[i][sizeType][sortFunction].mTime)/float(size)); } sOutput.append("\n"); } } EA::UnitTest::ReportVerbosity(2, "%s\n\n", sOutput.c_str()); } { // Do a speed test for the case of slow compares. // By this we mean to compare sorting speeds when the comparison of elements is slow. // Sort functions use element comparison to tell where elements go and use element // movement to get them there. But some sorting functions accomplish sorting performance by // minimizing the amount of movement, some minimize the amount of comparisons, and the // best do a good job of minimizing both. auto sortFunctions = allSortFunctions; // We can't test this radix_sort because what we need isn't exposed. sortFunctions.erase(eastl::remove(sortFunctions.begin(), sortFunctions.end(), sf_radix_sort), sortFunctions.end()); EA::UnitTest::ReportVerbosity(2, "Sort comparison: Slow compare speed test\n"); typedef int32_t ElementType; typedef SlowCompare CompareFunction; eastl::string sOutput; sOutput.set_capacity(100000); ElementType* pBuffer = new ElementType[kArraySizeMax]; memset(sResults, 0, sizeof(sResults)); stopwatchGlobal.Restart(); for (int c = 0; c < kRunCount; c++) { for (int i = 0; i < kRandomizationTypeCount; i++) { for (size_t sizeType = 0; sizeType < EAArrayCount(kSizes); sizeType++) { const eastl_size_t size = kSizes[sizeType]; for (SortFunctionType sortFunction : sortFunctions) { eastl::vector v(size); rng.SetSeed(EA::UnitTest::GetRandSeed()); Randomize(v, rng, (RandomizationType)i); CompareFunction::Reset(); switch (sortFunction) { case sf_quick_sort: stopwatch.Restart(); eastl::quick_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_tim_sort: stopwatch.Restart(); eastl::tim_sort_buffer(v.begin(), v.end(), pBuffer, CompareFunction()); stopwatch.Stop(); break; case sf_insertion_sort: stopwatch.Restart(); eastl::insertion_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_shell_sort: stopwatch.Restart(); eastl::shell_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_heap_sort: stopwatch.Restart(); eastl::heap_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_merge_sort: stopwatch.Restart(); eastl::merge_sort(v.begin(), v.end(), *get_default_allocator((EASTLAllocatorType*)NULL), CompareFunction()); stopwatch.Stop(); break; case sf_merge_sort_buffer: stopwatch.Restart(); eastl::merge_sort_buffer(v.begin(), v.end(), pBuffer, CompareFunction()); stopwatch.Stop(); break; case sf_comb_sort: stopwatch.Restart(); eastl::comb_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_bubble_sort: stopwatch.Restart(); eastl::bubble_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_selection_sort: stopwatch.Restart(); eastl::selection_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_shaker_sort: stopwatch.Restart(); eastl::shaker_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_qsort: stopwatch.Restart(); qsort(&v[0], (size_t)v.size(), sizeof(ElementType), SlowCompareInt32); stopwatch.Stop(); break; case sf_std_sort: stopwatch.Restart(); std::sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_std_stable_sort: stopwatch.Restart(); std::stable_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_radix_sort: case sf_count: default: // unsupported break; } const uint64_t elapsedTime = (uint64_t)stopwatch.GetElapsedTime(); // If this result was faster than a previously fastest result, record this one instead. if ((c == 0) || (elapsedTime < sResults[i][sizeType][sortFunction].mTime)) { sResults[i][sizeType][sortFunction].mTime = elapsedTime; sResults[i][sizeType][sortFunction].mCompareCount = (uint64_t)CompareFunction::nCompareCount; } VERIFY(eastl::is_sorted(v.begin(), v.end())); } // for each sort function... } // for each size type... } // for each randomization type... } // for each run EA::UnitTest::ReportVerbosity(2, "Total time: %.2f s\n", stopwatchGlobal.GetElapsedTimeFloat()); delete[] pBuffer; // Now print the results. for (int i = 0; i < kRandomizationTypeCount; i++) { for (size_t sizeType = 0; sizeType < EAArrayCount(kSizes); sizeType++) { const eastl_size_t size = kSizes[sizeType]; for (SortFunctionType sortFunction : sortFunctions) { sOutput.append_sprintf("%25s, %14s, Size: %6u, Time: %11" PRIu64 " ticks, Compares: %11" PRIu64 "\n", GetSortFunctionName(sortFunction), GetRandomizationTypeName(i), (unsigned)size, sResults[i][sizeType][sortFunction].mTime, sResults[i][sizeType][sortFunction].mCompareCount); } sOutput.append("\n"); } } EA::UnitTest::ReportVerbosity(2, "%s\n\n", sOutput.c_str()); } { // Do a speed test for the case of slow assignment. // By this we mean to compare sorting speeds when the movement of elements is slow. // Sort functions use element comparison to tell where elements go and use element // movement to get them there. But some sorting functions accomplish sorting performance by // minimizing the amount of movement, some minimize the amount of comparisons, and the // best do a good job of minimizing both. auto sortFunctions = allSortFunctions; // Can't implement this for qsort because the C standard library doesn't expose it. // We could implement it by copying and modifying the source code. sortFunctions.erase(eastl::remove(sortFunctions.begin(), sortFunctions.end(), sf_qsort), sortFunctions.end()); EA::UnitTest::ReportVerbosity(2, "Sort comparison: Slow assignment speed test\n"); typedef SlowAssign ElementType; typedef eastl::less CompareFunction; eastl::string sOutput; sOutput.set_capacity(100000); ElementType* pBuffer = new ElementType[kArraySizeMax]; memset(sResults, 0, sizeof(sResults)); stopwatchGlobal.Restart(); for (int c = 0; c < kRunCount; c++) { for (int i = 0; i < kRandomizationTypeCount; i++) { for (size_t sizeType = 0; sizeType < EAArrayCount(kSizes); sizeType++) { const eastl_size_t size = kSizes[sizeType]; for (SortFunctionType sortFunction : sortFunctions) { eastl::vector v(size); Randomize(v, rng, (RandomizationType)i); ElementType::Reset(); switch (sortFunction) { case sf_quick_sort: stopwatch.Restart(); eastl::quick_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_tim_sort: stopwatch.Restart(); eastl::tim_sort_buffer(v.begin(), v.end(), pBuffer, CompareFunction()); stopwatch.Stop(); break; case sf_insertion_sort: stopwatch.Restart(); eastl::insertion_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_shell_sort: stopwatch.Restart(); eastl::shell_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_heap_sort: stopwatch.Restart(); eastl::heap_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_merge_sort: stopwatch.Restart(); eastl::merge_sort(v.begin(), v.end(), *get_default_allocator((EASTLAllocatorType*)NULL), CompareFunction()); stopwatch.Stop(); break; case sf_merge_sort_buffer: stopwatch.Restart(); eastl::merge_sort_buffer(v.begin(), v.end(), pBuffer, CompareFunction()); stopwatch.Stop(); break; case sf_comb_sort: stopwatch.Restart(); eastl::comb_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_bubble_sort: stopwatch.Restart(); eastl::bubble_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_selection_sort: stopwatch.Restart(); eastl::selection_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_shaker_sort: stopwatch.Restart(); eastl::shaker_sort(v.begin(), v.end(), CompareFunction()); stopwatch.Stop(); break; case sf_radix_sort: stopwatch.Restart(); eastl::radix_sort>(v.begin(), v.end(), pBuffer); stopwatch.Stop(); break; case sf_std_sort: stopwatch.Restart(); std::sort(v.begin(), v.end(), std::less()); stopwatch.Stop(); break; case sf_std_stable_sort: stopwatch.Restart(); std::stable_sort(v.begin(), v.end(), std::less()); stopwatch.Stop(); break; case sf_qsort: case sf_count: default: // unsupported break; } const uint64_t elapsedTime = (uint64_t)stopwatch.GetElapsedTime(); // If this result was faster than a previously fastest result, record this one instead. if ((c == 0) || (elapsedTime < sResults[i][sizeType][sortFunction].mTime)) { sResults[i][sizeType][sortFunction].mTime = elapsedTime; sResults[i][sizeType][sortFunction].mAssignCount = (uint64_t)ElementType::nAssignCount; } VERIFY(eastl::is_sorted(v.begin(), v.end())); } // for each sort function... } // for each size type... } // for each randomization type... } // for each run EA::UnitTest::ReportVerbosity(2, "Total time: %.2f s\n", stopwatchGlobal.GetElapsedTimeFloat()); delete[] pBuffer; // Now print the results. for (int i = 0; i < kRandomizationTypeCount; i++) { for (size_t sizeType = 0; sizeType < EAArrayCount(kSizes); sizeType++) { const eastl_size_t size = kSizes[sizeType]; for (SortFunctionType sortFunction : sortFunctions) { sOutput.append_sprintf("%25s, %14s, Size: %6u, Time: %11" PRIu64 " ticks, Assignments: %11" PRIu64 "\n", GetSortFunctionName(sortFunction), GetRandomizationTypeName(i), (unsigned)size, sResults[i][sizeType][sortFunction].mTime, sResults[i][sizeType][sortFunction].mAssignCount); } sOutput.append("\n"); } } EA::UnitTest::ReportVerbosity(2, "%s\n", sOutput.c_str()); } #if !defined(EA_DEBUG) EA::UnitTest::SetNormalThreadPriority(); #endif return nErrorCount; } typedef eastl::function OutputResultCallback; typedef eastl::function PostExecuteCallback; typedef eastl::function PreExecuteCallback; template static int CompareSmallInputSortPerformanceHelper(eastl::vector &arraySizes, eastl::vector &sortFunctions, const PreExecuteCallback &preExecuteCallback, const PostExecuteCallback &postExecuteCallback, const OutputResultCallback &outputResultCallback) { int nErrorCount = 0; EA::UnitTest::RandGenT rng(EA::UnitTest::GetRandSeed()); EA::StdC::Stopwatch stopwatch(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatchGlobal(EA::StdC::Stopwatch::kUnitsSeconds); const eastl_size_t kArraySizeMax = *eastl::max_element(eastl::begin(arraySizes), eastl::end(arraySizes)); const int kRunCount = 4; const int numSubArrays = 128; eastl::string sOutput; sOutput.set_capacity(100000); ElementType* pBuffer = new ElementType[kArraySizeMax]; stopwatchGlobal.Restart(); for (int i = 0; i < kRandomizationTypeCount; i++) { for (size_t size : arraySizes) { for (SortFunctionType sortFunction : sortFunctions) { BenchmarkResult bestResult{}; for (int c = 0; c < kRunCount; c++) { eastl::vector v(size * numSubArrays); rng.SetSeed(EA::UnitTest::GetRandSeed()); Randomize(v, rng, (RandomizationType)i); preExecuteCallback(); switch (sortFunction) { case sf_quick_sort: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { eastl::quick_sort(begin, begin + size, CompareFunction()); } stopwatch.Stop(); break; case sf_tim_sort: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { eastl::tim_sort_buffer(begin, begin + size, pBuffer, CompareFunction()); } stopwatch.Stop(); break; case sf_insertion_sort: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { eastl::insertion_sort(begin, begin + size, CompareFunction()); } stopwatch.Stop(); break; case sf_shell_sort: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { eastl::shell_sort(begin, begin + size, CompareFunction()); } stopwatch.Stop(); break; case sf_heap_sort: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { eastl::heap_sort(begin, begin + size, CompareFunction()); } stopwatch.Stop(); break; case sf_merge_sort: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { eastl::merge_sort(begin, begin + size, *get_default_allocator((EASTLAllocatorType*)NULL), CompareFunction()); } stopwatch.Stop(); break; case sf_merge_sort_buffer: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { eastl::merge_sort_buffer(begin, begin + size, pBuffer, CompareFunction()); } stopwatch.Stop(); break; case sf_comb_sort: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { eastl::comb_sort(begin, begin + size, CompareFunction()); } stopwatch.Stop(); break; case sf_bubble_sort: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { eastl::bubble_sort(begin, begin + size, CompareFunction()); } stopwatch.Stop(); break; case sf_selection_sort: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { eastl::selection_sort(begin, begin + size, CompareFunction()); } stopwatch.Stop(); break; case sf_shaker_sort: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { eastl::shaker_sort(begin, begin + size, CompareFunction()); } stopwatch.Stop(); break; case sf_std_sort: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { std::sort(begin, begin + size, CompareFunction()); } stopwatch.Stop(); break; case sf_std_stable_sort: stopwatch.Restart(); for (auto begin = v.begin(); begin != v.end(); begin += size) { std::stable_sort(begin, begin + size, CompareFunction()); } stopwatch.Stop(); break; case sf_qsort: case sf_radix_sort: case sf_count: default: EATEST_VERIFY_F(false, "Missing case statement for sort function %s.", GetSortFunctionName(sortFunction)); break; } BenchmarkResult result {}; result.mTime = (uint64_t)stopwatch.GetElapsedTime(); postExecuteCallback(result); // If this result was faster than a previously fastest result, record this one instead. if ((c == 0) || (result.mTime < bestResult.mTime)) bestResult = result; for (auto begin = v.begin(); begin != v.end(); begin += size) { VERIFY(eastl::is_sorted(begin, begin + size)); } } // for each run outputResultCallback(sOutput, GetSortFunctionName(sortFunction), GetRandomizationTypeName(i), size, numSubArrays, bestResult); } // for each sort function... sOutput.append("\n"); } // for each size type... } // for each randomization type... EA::UnitTest::ReportVerbosity(2, "Total time: %.2f s\n", stopwatchGlobal.GetElapsedTimeFloat()); EA::UnitTest::ReportVerbosity(2, "%s\n", sOutput.c_str()); delete[] pBuffer; return nErrorCount; } static int CompareSmallInputSortPerformance() { int nErrorCount = 0; eastl::vector arraySizes{1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 64, 128, 256}; // Test quick sort and merge sort to provide a "base line" for performance. The other sort algorithms are mostly // O(n^2) and they are benchmarked to determine what sorts are ideal for sorting small arrays or sub-arrays. (i.e. // this is useful to determine good algorithms to choose as a base case for some of the recursive sorts). eastl::vector sortFunctions{sf_quick_sort, sf_merge_sort_buffer, sf_bubble_sort, sf_comb_sort, sf_insertion_sort, sf_selection_sort, sf_shell_sort, sf_shaker_sort}; EA::UnitTest::ReportVerbosity(2, "Small Sub-array Sort comparison: Regular speed test\n"); nErrorCount += CompareSmallInputSortPerformanceHelper>( arraySizes, sortFunctions, PreExecuteCallback([]() {}), PostExecuteCallback([](BenchmarkResult&) {}), OutputResultCallback([](eastl::string& output, const char* sortFunction, const char* randomizationType, size_t size, size_t numSubArrays, const BenchmarkResult& result) { output.append_sprintf("%25s, %14s, Size: %8u, Time: %0.1f ticks %0.2f ticks/elem\n", sortFunction, randomizationType, (unsigned)size, float(result.mTime) / float(numSubArrays), float(result.mTime) / float(size * numSubArrays)); })); EA::UnitTest::ReportVerbosity(2, "Small Sub-array Sort comparison: Slow compare speed test\n"); nErrorCount += CompareSmallInputSortPerformanceHelper>( arraySizes, sortFunctions, PreExecuteCallback([]() { SlowCompare::Reset(); }), PostExecuteCallback( [](BenchmarkResult& result) { result.mCompareCount = (uint64_t)SlowCompare::nCompareCount; }), OutputResultCallback([](eastl::string& output, const char* sortFunction, const char* randomizationType, size_t size, size_t numSubArrays, const BenchmarkResult& result) { output.append_sprintf("%25s, %14s, Size: %6u, Time: %0.2f ticks, Compares: %0.2f\n", sortFunction, randomizationType, (unsigned)size, float(result.mTime) / float(numSubArrays), float(result.mCompareCount) / float(numSubArrays)); })); EA::UnitTest::ReportVerbosity(2, "Small Sub-array Sort comparison: Slow assignment speed test\n"); nErrorCount += CompareSmallInputSortPerformanceHelper, eastl::less>>( arraySizes, sortFunctions, PreExecuteCallback([]() { SlowAssign::Reset(); }), PostExecuteCallback([](BenchmarkResult& result) { result.mCompareCount = (uint64_t)SlowCompare::nCompareCount; result.mAssignCount = (uint64_t)SlowAssign::nAssignCount; }), OutputResultCallback([](eastl::string& output, const char* sortFunction, const char* randomizationType, size_t size, size_t numSubArrays, const BenchmarkResult& result) { output.append_sprintf("%25s, %14s, Size: %6u, Time: %0.2f ticks, Assignments: %0.2f\n", sortFunction, randomizationType, (unsigned)size, float(result.mTime) / float(numSubArrays), float(result.mAssignCount) / float(numSubArrays)); })); return nErrorCount; } void BenchmarkSort() { EASTLTest_Printf("Sort\n"); EA::UnitTest::RandGenT rng(12345678); // For debugging sort code we should use 12345678, for normal testing use EA::UnitTest::GetRandSeed(). EA::StdC::Stopwatch stopwatch1(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatch2(EA::StdC::Stopwatch::kUnitsCPUCycles); if (EA::UnitTest::GetVerbosity() >= 3) { CompareSortPerformance(); CompareSmallInputSortPerformance(); } { // Exercise some declarations int nErrorCount = 0; ValuePair vp1 = {0, 0}, vp2 = {0, 0}; VPCompare c1, c2; VERIFY(c1.operator()(vp1, vp2) == c2.operator()(vp1, vp2)); VERIFY((vp1 < vp2) || (vp1 == vp2) || !(vp1 == vp2)); } { eastl::vector intVector(10000); eastl::generate(intVector.begin(), intVector.end(), rng); for (int i = 0; i < 2; i++) { /////////////////////////////// // Test quick_sort/vector/ValuePair /////////////////////////////// StdVectorVP stdVectorVP(intVector.size()); EaVectorVP eaVectorVP(intVector.size()); for (eastl_size_t j = 0, jEnd = intVector.size(); j < jEnd; j++) { const ValuePair vp = {intVector[j], intVector[j]}; stdVectorVP[j] = vp; eaVectorVP[j] = vp; } TestQuickSortStdVP(stopwatch1, stdVectorVP); TestQuickSortEaVP (stopwatch2, eaVectorVP); if(i == 1) Benchmark::AddResult("sort/q_sort/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); // Benchmark the sorting of something that is already sorted. TestQuickSortStdVP(stopwatch1, stdVectorVP); TestQuickSortEaVP (stopwatch2, eaVectorVP); if(i == 1) Benchmark::AddResult("sort/q_sort/vector/sorted", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test quick_sort/vector/Int /////////////////////////////// StdVectorInt stdVectorInt(intVector.size()); EaVectorInt eaVectorInt (intVector.size()); for(eastl_size_t j = 0, jEnd = intVector.size(); j < jEnd; j++) { stdVectorInt[j] = intVector[j]; eaVectorInt[j] = intVector[j]; } TestQuickSortStdInt(stopwatch1, stdVectorInt); TestQuickSortEaInt (stopwatch2, eaVectorInt); if(i == 1) Benchmark::AddResult("sort/q_sort/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); // Benchmark the sorting of something that is already sorted. TestQuickSortStdInt(stopwatch1, stdVectorInt); TestQuickSortEaInt (stopwatch2, eaVectorInt); if(i == 1) Benchmark::AddResult("sort/q_sort/vector/sorted", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test quick_sort/vector/TestObject /////////////////////////////// StdVectorTO stdVectorTO(intVector.size()); EaVectorTO eaVectorTO(intVector.size()); for (eastl_size_t j = 0, jEnd = intVector.size(); j < jEnd; j++) { stdVectorTO[j] = TestObject(intVector[j]); eaVectorTO[j] = TestObject(intVector[j]); } TestQuickSortStdTO(stopwatch1, stdVectorTO); TestQuickSortEaTO(stopwatch2, eaVectorTO); if (i == 1) Benchmark::AddResult("sort/q_sort/vector", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); // Benchmark the sorting of something that is already sorted. TestQuickSortStdTO(stopwatch1, stdVectorTO); TestQuickSortEaTO(stopwatch2, eaVectorTO); if (i == 1) Benchmark::AddResult("sort/q_sort/vector/sorted", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test quick_sort/TestObject[] /////////////////////////////// // Reset the values back to the unsorted state. for(eastl_size_t j = 0, jEnd = intVector.size(); j < jEnd; j++) { stdVectorTO[j] = TestObject(intVector[j]); eaVectorTO[j] = TestObject(intVector[j]); } TestQuickSortStdTO(stopwatch1, stdVectorTO); TestQuickSortEaTO (stopwatch2, eaVectorTO); if(i == 1) Benchmark::AddResult("sort/q_sort/TestObject[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); // Benchmark the sorting of something that is already sorted. TestQuickSortStdTO(stopwatch1, stdVectorTO); TestQuickSortEaTO (stopwatch2, eaVectorTO); if(i == 1) Benchmark::AddResult("sort/q_sort/TestObject[]/sorted", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } ================================================ FILE: benchmark/source/BenchmarkString.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #include "EASTLBenchmark.h" #include "EASTLTest.h" #include #include #include #include EA_DISABLE_ALL_VC_WARNINGS() #include #include #include #include EA_RESTORE_ALL_VC_WARNINGS() using namespace EA; namespace { template void TestPushBack(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); for(int i = 0; i < 100000; i++) c.push_back((typename Container::value_type)(i & ((typename Container::value_type)~0))); stopwatch.Stop(); } template void TestInsert1(EA::StdC::Stopwatch& stopwatch, Container& c, T* p) { const typename Container::size_type s = c.size(); stopwatch.Restart(); for(int i = 0; i < 100; i++) c.insert(s - (typename Container::size_type)(i * 317), p); stopwatch.Stop(); } template void TestErase1(EA::StdC::Stopwatch& stopwatch, Container& c) { const typename Container::size_type s = c.size(); stopwatch.Restart(); for(int i = 0; i < 100; i++) c.erase(s - (typename Container::size_type)(i * 339), 7); stopwatch.Stop(); } template void TestReplace1(EA::StdC::Stopwatch& stopwatch, Container& c, T* p, int n) { const typename Container::size_type s = c.size(); stopwatch.Restart(); for(int i = 0; i < 1000; i++) c.replace(s - (typename Container::size_type)(i * 5), ((n - 2) + (i & 3)), p, n); // The second argument rotates through n-2, n-1, n, n+1, n-2, etc. stopwatch.Stop(); } template void TestReserve(EA::StdC::Stopwatch& stopwatch, Container& c) { const typename Container::size_type s = c.capacity(); stopwatch.Restart(); for(int i = 0; i < 1000; i++) c.reserve((s - 2) + (i & 3)); // The second argument rotates through n-2, n-1, n, n+1, n-2, etc. stopwatch.Stop(); } template void TestSize(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); for(int i = 0; i < 1000; i++) Benchmark::DoNothing(&c, c.size()); stopwatch.Stop(); } template void TestBracket(EA::StdC::Stopwatch& stopwatch, Container& c) { int32_t temp = 0; stopwatch.Restart(); for(typename Container::size_type j = 0, jEnd = c.size(); j < jEnd; j++) temp += c[j]; stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)temp); } template void TestFind(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); for(int i = 0; i < 1000; i++) Benchmark::DoNothing(&c, *eastl::find(c.begin(), c.end(), (typename Container::value_type)~0)); stopwatch.Stop(); } template void TestFind1(EA::StdC::Stopwatch& stopwatch, Container& c, T* p, int pos, int n) { stopwatch.Restart(); for(int i = 0; i < 1000; i++) Benchmark::DoNothing(&c, c.find(p, (typename Container::size_type)pos, (typename Container::size_type)n)); stopwatch.Stop(); } template void TestRfind1(EA::StdC::Stopwatch& stopwatch, Container& c, T* p, int pos, int n) { stopwatch.Restart(); for(int i = 0; i < 1000; i++) Benchmark::DoNothing(&c, c.rfind(p, (typename Container::size_type)pos, (typename Container::size_type)n)); stopwatch.Stop(); } template void TestFirstOf1(EA::StdC::Stopwatch& stopwatch, Container& c, T* p, int pos, int n) { stopwatch.Restart(); for(int i = 0; i < 1000; i++) Benchmark::DoNothing(&c, c.find_first_of(p, (typename Container::size_type)pos, (typename Container::size_type)n)); stopwatch.Stop(); } template void TestLastOf1(EA::StdC::Stopwatch& stopwatch, Container& c, T* p, int pos, int n) { stopwatch.Restart(); for(int i = 0; i < 1000; i++) Benchmark::DoNothing(&c, c.find_last_of(p, (typename Container::size_type)pos, (typename Container::size_type)n)); stopwatch.Stop(); } template void TestFirstNotOf1(EA::StdC::Stopwatch& stopwatch, Container& c, T* p, int pos, int n) { stopwatch.Restart(); for(int i = 0; i < 1000; i++) Benchmark::DoNothing(&c, c.find_first_not_of(p, (typename Container::size_type)pos, (typename Container::size_type)n)); stopwatch.Stop(); } template void TestLastNotOf1(EA::StdC::Stopwatch& stopwatch, Container& c, T* p, int pos, int n) { stopwatch.Restart(); for(int i = 0; i < 1000; i++) Benchmark::DoNothing(&c, c.find_last_not_of(p, (typename Container::size_type)pos, (typename Container::size_type)n)); stopwatch.Stop(); } template void TestCompare(EA::StdC::Stopwatch& stopwatch, Container& c1, Container& c2) // size() { stopwatch.Restart(); for(int i = 0; i < 500; i++) Benchmark::DoNothing(&c1, c1.compare(c2)); stopwatch.Stop(); } template void TestSwap(EA::StdC::Stopwatch& stopwatch, Container& c1, Container& c2) // size() { stopwatch.Restart(); for(int i = 0; i < 10000; i++) // Make sure this is an even count so that when done things haven't changed. { c1.swap(c2); Benchmark::DoNothing(&c1); } stopwatch.Stop(); } } // namespace void BenchmarkString() { EASTLTest_Printf("String\n"); EA::StdC::Stopwatch stopwatch1(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatch2(EA::StdC::Stopwatch::kUnitsCPUCycles); { for(int i = 0; i < 2; i++) { std::basic_string ss8(16, 0); // We initialize to size of 16 because different implementations may make eastl::basic_string es8(16, 0); // different tradeoffs related to startup size. Initial operations are faster // when strings start with a higher reserve, but they use more memory. std::basic_string ss16(16, 0); // We try to nullify this tradeoff for the tests below by starting all at eastl::basic_string es16(16, 0); // the same baseline allocation. /////////////////////////////// // Test push_back /////////////////////////////// TestPushBack(stopwatch1, ss8); TestPushBack(stopwatch2, es8); if(i == 1) Benchmark::AddResult("string/push_back", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestPushBack(stopwatch1, ss16); TestPushBack(stopwatch2, es16); if(i == 1) Benchmark::AddResult("string/push_back", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test insert(size_type position, const value_type* p) /////////////////////////////// const char8_t pInsert1_8[] = { 'a', 0 }; TestInsert1(stopwatch1, ss8, pInsert1_8); TestInsert1(stopwatch2, es8, pInsert1_8); if(i == 1) Benchmark::AddResult("string/insert/pos,p", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); const char16_t pInsert1_16[] = { 'a', 0 }; TestInsert1(stopwatch1, ss16, pInsert1_16); TestInsert1(stopwatch2, es16, pInsert1_16); if(i == 1) Benchmark::AddResult("string/insert/pos,p", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase(size_type position, size_type n) /////////////////////////////// TestErase1(stopwatch1, ss8); TestErase1(stopwatch2, es8); if(i == 1) Benchmark::AddResult("string/erase/pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestErase1(stopwatch1, ss16); TestErase1(stopwatch2, es16); if(i == 1) Benchmark::AddResult("string/erase/pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test replace(size_type position, size_type n1, const value_type* p, size_type n2) /////////////////////////////// const int kReplace1Size = 8; const char8_t pReplace1_8[kReplace1Size] = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h' }; TestReplace1(stopwatch1, ss8, pReplace1_8, kReplace1Size); TestReplace1(stopwatch2, es8, pReplace1_8, kReplace1Size); if(i == 1) Benchmark::AddResult("string/replace/pos,n,p,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); const char16_t pReplace1_16[kReplace1Size] = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h' }; TestReplace1(stopwatch1, ss16, pReplace1_16, kReplace1Size); TestReplace1(stopwatch2, es16, pReplace1_16, kReplace1Size); if(i == 1) Benchmark::AddResult("string/replace/pos,n,p,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test reserve(size_type) /////////////////////////////// TestReserve(stopwatch1, ss8); TestReserve(stopwatch2, es8); if(i == 1) Benchmark::AddResult("string/reserve", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestReserve(stopwatch1, ss16); TestReserve(stopwatch2, es16); if(i == 1) Benchmark::AddResult("string/reserve", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test size() /////////////////////////////// TestSize(stopwatch1, ss8); TestSize(stopwatch2, es8); if(i == 1) Benchmark::AddResult("string/size", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestSize(stopwatch1, ss16); TestSize(stopwatch2, es16); if(i == 1) Benchmark::AddResult("string/size", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test operator[]. /////////////////////////////// TestBracket(stopwatch1, ss8); TestBracket(stopwatch2, es8); if(i == 1) Benchmark::AddResult("string/operator[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestBracket(stopwatch1, ss16); TestBracket(stopwatch2, es16); if(i == 1) Benchmark::AddResult("string/operator[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test iteration via find(). /////////////////////////////// TestFind(stopwatch1, ss8); TestFind(stopwatch2, es8); if(i == 1) Benchmark::AddResult("string/iteration", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestFind(stopwatch1, ss16); TestFind(stopwatch2, es16); if(i == 1) Benchmark::AddResult("string/iteration", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test find(const value_type* p, size_type position, size_type n) /////////////////////////////// const int kFind1Size = 7; const char8_t pFind1_8[kFind1Size] = { 'p', 'a', 't', 't', 'e', 'r', 'n' }; ss8.insert(ss8.size() / 2, pFind1_8); es8.insert(es8.size() / 2, pFind1_8); TestFind1(stopwatch1, ss8, pFind1_8, 15, kFind1Size); TestFind1(stopwatch2, es8, pFind1_8, 15, kFind1Size); if(i == 1) Benchmark::AddResult("string/find/p,pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); const char16_t pFind1_16[kFind1Size] = { 'p', 'a', 't', 't', 'e', 'r', 'n' }; #if !defined(EA_PLATFORM_IPHONE) && (!defined(EA_COMPILER_CLANG) && defined(EA_PLATFORM_MINGW)) // Crashes on iPhone. ss16.insert(ss8.size() / 2, pFind1_16); #endif es16.insert(es8.size() / 2, pFind1_16); TestFind1(stopwatch1, ss16, pFind1_16, 15, kFind1Size); TestFind1(stopwatch2, es16, pFind1_16, 15, kFind1Size); if(i == 1) Benchmark::AddResult("string/find/p,pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test rfind(const value_type* p, size_type position, size_type n) /////////////////////////////// TestRfind1(stopwatch1, ss8, pFind1_8, 15, kFind1Size); TestRfind1(stopwatch2, es8, pFind1_8, 15, kFind1Size); if(i == 1) Benchmark::AddResult("string/rfind/p,pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestRfind1(stopwatch1, ss16, pFind1_16, 15, kFind1Size); TestRfind1(stopwatch2, es16, pFind1_16, 15, kFind1Size); if(i == 1) Benchmark::AddResult("string/rfind/p,pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); //NOTICE (RASHIN): //FindFirstOf variants are incredibly slow on palm pixi debug builds. //Disabling for now... #if !defined(EA_DEBUG) /////////////////////////////// // Test find_first_of(const value_type* p, size_type position, size_type n /////////////////////////////// const int kFindOf1Size = 7; const char8_t pFindOf1_8[kFindOf1Size] = { '~', '~', '~', '~', '~', '~', '~' }; TestFirstOf1(stopwatch1, ss8, pFindOf1_8, 15, kFindOf1Size); TestFirstOf1(stopwatch2, es8, pFindOf1_8, 15, kFindOf1Size); if(i == 1) Benchmark::AddResult("string/find_first_of/p,pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); const char16_t pFindOf1_16[kFindOf1Size] = { '~', '~', '~', '~', '~', '~', '~' }; TestFirstOf1(stopwatch1, ss16, pFindOf1_16, 15, kFindOf1Size); TestFirstOf1(stopwatch2, es16, pFindOf1_16, 15, kFindOf1Size); if(i == 1) Benchmark::AddResult("string/find_first_of/p,pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test find_last_of(const value_type* p, size_type position, size_type n /////////////////////////////// TestLastOf1(stopwatch1, ss8, pFindOf1_8, 15, kFindOf1Size); TestLastOf1(stopwatch2, es8, pFindOf1_8, 15, kFindOf1Size); if(i == 1) Benchmark::AddResult("string/find_last_of/p,pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestLastOf1(stopwatch1, ss16, pFindOf1_16, 15, kFindOf1Size); TestLastOf1(stopwatch2, es16, pFindOf1_16, 15, kFindOf1Size); if(i == 1) Benchmark::AddResult("string/find_last_of/p,pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test find_first_not_of(const value_type* p, size_type position, size_type n /////////////////////////////// TestFirstNotOf1(stopwatch1, ss8, pFind1_8, 15, kFind1Size); TestFirstNotOf1(stopwatch2, es8, pFind1_8, 15, kFind1Size); if(i == 1) Benchmark::AddResult("string/find_first_not_of/p,pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestFirstNotOf1(stopwatch1, ss16, pFind1_16, 15, kFind1Size); TestFirstNotOf1(stopwatch2, es16, pFind1_16, 15, kFind1Size); if(i == 1) Benchmark::AddResult("string/find_first_not_of/p,pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test find_last_of(const value_type* p, size_type position, size_type n /////////////////////////////// TestLastNotOf1(stopwatch1, ss8, pFind1_8, 15, kFind1Size); TestLastNotOf1(stopwatch2, es8, pFind1_8, 15, kFind1Size); if(i == 1) Benchmark::AddResult("string/find_last_of/p,pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestLastNotOf1(stopwatch1, ss16, pFind1_16, 15, kFind1Size); TestLastNotOf1(stopwatch2, es16, pFind1_16, 15, kFind1Size); if(i == 1) Benchmark::AddResult("string/find_last_of/p,pos,n", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); #endif /////////////////////////////// // Test compare() /////////////////////////////// std::basic_string ss8X(ss8); eastl::basic_string es8X(es8); std::basic_string ss16X(ss16); eastl::basic_string es16X(es16); TestCompare(stopwatch1, ss8, ss8X); TestCompare(stopwatch2, es8, es8X); if(i == 1) Benchmark::AddResult("string/compare", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestCompare(stopwatch1, ss16, ss16X); TestCompare(stopwatch2, es16, es16X); if(i == 1) Benchmark::AddResult("string/compare", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test swap() /////////////////////////////// TestSwap(stopwatch1, ss8, ss8X); TestSwap(stopwatch2, es8, es8X); if(i == 1) Benchmark::AddResult("string/swap", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestSwap(stopwatch1, ss16, ss16X); TestSwap(stopwatch2, es16, es16X); if(i == 1) Benchmark::AddResult("string/swap", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } ================================================ FILE: benchmark/source/BenchmarkTupleVector.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #include "EASTLBenchmark.h" #include "EASTLTest.h" #include #include #include #include #ifdef _MSC_VER #pragma warning(push, 0) #pragma warning(disable: 4350) #endif #include #include #include #include #ifdef _MSC_VER #pragma warning(pop) #endif using namespace EA; typedef std::vector StdVectorUint64; typedef eastl::tuple_vector EaTupleVectorUint64; struct PaddingStruct { char padding[56] = { 0 }; }; static const PaddingStruct DefaultPadding; typedef eastl::tuple PaddedTuple; typedef std::vector StdVectorUint64Padded; typedef eastl::tuple_vector EaTupleVectorUint64Padded; namespace { ////////////////////////////////////////////////////////////////////////////// // MovableType // struct MovableType { int8_t* mpData; enum { kDataSize = 128 }; MovableType() : mpData(new int8_t[kDataSize]) { memset(mpData, 0, kDataSize); } MovableType(const MovableType& x) : mpData(new int8_t[kDataSize]) { memcpy(mpData, x.mpData, kDataSize); } MovableType& operator=(const MovableType& x) { if(!mpData) mpData = new int8_t[kDataSize]; memcpy(mpData, x.mpData, kDataSize); return *this; } #if EASTL_MOVE_SEMANTICS_ENABLED MovableType(MovableType&& x) EA_NOEXCEPT : mpData(x.mpData) { x.mpData = NULL; } MovableType& operator=(MovableType&& x) { eastl::swap(mpData, x.mpData); // In practice it may not be right to do a swap, depending on the case. return *this; } #endif ~MovableType() { delete[] mpData; } }; ////////////////////////////////////////////////////////////////////////////// // AutoRefCount // // Basic ref-counted object. // template class AutoRefCount { public: T* mpObject; public: AutoRefCount() EA_NOEXCEPT : mpObject(NULL) {} AutoRefCount(T* pObject) EA_NOEXCEPT : mpObject(pObject) { if(mpObject) mpObject->AddRef(); } AutoRefCount(T* pObject, int) EA_NOEXCEPT : mpObject(pObject) { // Inherit the existing refcount. } AutoRefCount(const AutoRefCount& x) EA_NOEXCEPT : mpObject(x.mpObject) { if(mpObject) mpObject->AddRef(); } AutoRefCount& operator=(const AutoRefCount& x) { return operator=(x.mpObject); } AutoRefCount& operator=(T* pObject) { if(pObject != mpObject) { T* const pTemp = mpObject; // Create temporary to prevent possible problems with re-entrancy. if(pObject) pObject->AddRef(); mpObject = pObject; if(pTemp) pTemp->Release(); } return *this; } #if EASTL_MOVE_SEMANTICS_ENABLED AutoRefCount(AutoRefCount&& x) EA_NOEXCEPT : mpObject(x.mpObject) { x.mpObject = NULL; } AutoRefCount& operator=(AutoRefCount&& x) { if(mpObject) mpObject->Release(); mpObject = x.mpObject; x.mpObject = NULL; return *this; } #endif ~AutoRefCount() { if(mpObject) mpObject->Release(); } T& operator *() const EA_NOEXCEPT { return *mpObject; } T* operator ->() const EA_NOEXCEPT { return mpObject; } operator T*() const EA_NOEXCEPT { return mpObject; } }; // class AutoRefCount struct RefCounted { int mRefCount; static int msAddRefCount; static int msReleaseCount; RefCounted() : mRefCount(1) {} int AddRef() { ++msAddRefCount; return ++mRefCount; } int Release() { ++msReleaseCount; if(mRefCount > 1) return --mRefCount; delete this; return 0; } }; int RefCounted::msAddRefCount = 0; int RefCounted::msReleaseCount = 0; } // namespace namespace { template void TestPushBack(EA::StdC::Stopwatch& stopwatch, Container& c, eastl::vector& intVector) { stopwatch.Restart(); for(eastl_size_t j = 0, jEnd = intVector.size(); j < jEnd; j++) c.push_back((uint64_t)intVector[j]); stopwatch.Stop(); } template void TestBracket(EA::StdC::Stopwatch& stopwatch, Container& c) { uint64_t temp = 0; stopwatch.Restart(); for(typename Container::size_type j = 0, jEnd = c.size(); j < jEnd; j++) temp += c[j]; stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)(temp & 0xffffffff)); } void TestBracket(EA::StdC::Stopwatch& stopwatch, EaTupleVectorUint64& c) { uint64_t temp = 0; stopwatch.Restart(); for (typename EaTupleVectorUint64::size_type j = 0, jEnd = c.size(); j < jEnd; j++) temp += eastl::get<0>(c[j]); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)(temp & 0xffffffff)); } template void TestFind(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); typedef typename Container::iterator iterator_t; // This typedef is required to get this code to compile on RVCT iterator_t it = eastl::find(c.begin(), c.end(), UINT64_C(0xffffffffffff)); stopwatch.Stop(); if(it != c.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)*it); } void TestFind(EA::StdC::Stopwatch& stopwatch, EaTupleVectorUint64& c) { eastl::tuple val(0xffffffffffff); stopwatch.Restart(); EaTupleVectorUint64::iterator it = eastl::find(c.begin(), c.end(), val); stopwatch.Stop(); if (it != c.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)eastl::get<0>(*it)); } template void TestSort(EA::StdC::Stopwatch& stopwatch, Container& c) { // Intentionally use eastl sort in order to measure just // vector access speed and not be polluted by sort speed. stopwatch.Restart(); eastl::quick_sort(c.begin(), c.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)(c[0] & 0xffffffff)); } void TestSort(EA::StdC::Stopwatch& stopwatch, EaTupleVectorUint64& c) { // Intentionally use eastl sort in order to measure just // vector access speed and not be polluted by sort speed. stopwatch.Restart(); eastl::quick_sort(c.begin(), c.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)(eastl::get<0>(c[0]) & 0xffffffff)); } template void TestInsert(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it; stopwatch.Restart(); for(j = 0, jEnd = 100, it = c.begin(); j < jEnd; ++j) { it = c.insert(it, UINT64_C(0xffffffffffff)); if(it == c.end()) // Try to safely increment the iterator three times. it = c.begin(); if(++it == c.end()) it = c.begin(); if(++it == c.end()) it = c.begin(); } stopwatch.Stop(); } template void TestErase(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it; stopwatch.Restart(); for(j = 0, jEnd = 100, it = c.begin(); j < jEnd; ++j) { it = c.erase(it); if(it == c.end()) // Try to safely increment the iterator three times. it = c.begin(); if(++it == c.end()) it = c.begin(); if(++it == c.end()) it = c.begin(); } stopwatch.Stop(); } template void TestMoveReallocate(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); while(c.size() < 8192) c.resize(c.capacity() + 1); stopwatch.Stop(); } template void TestMoveErase(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); while(!c.empty()) c.erase(c.begin()); stopwatch.Stop(); } ////////////////////////////////////////////////////////////////////////// // Variations of test functions for the Padded structures template void TestTuplePushBack(EA::StdC::Stopwatch& stopwatch, Container& c, eastl::vector& intVector) { stopwatch.Restart(); for (eastl_size_t j = 0, jEnd = intVector.size(); j < jEnd; j++) { PaddedTuple tup((uint64_t)intVector[j], DefaultPadding); c.push_back(tup); } stopwatch.Stop(); } template void TestTupleBracket(EA::StdC::Stopwatch& stopwatch, Container& c) { uint64_t temp = 0; stopwatch.Restart(); for (typename Container::size_type j = 0, jEnd = c.size(); j < jEnd; j++) temp += eastl::get<0>(c[j]); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)(temp & 0xffffffff)); } template void TestTupleFind(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); typedef typename Container::iterator iterator_t; // This typedef is required to get this code to compile on RVCT iterator_t it = eastl::find_if(c.begin(), c.end(), [](auto tup) { return eastl::get<0>(tup) == 0xFFFFFFFF; }); stopwatch.Stop(); if (it != c.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)eastl::get<0>(*it)); } template void TestTupleSort(EA::StdC::Stopwatch& stopwatch, Container& c) { // Intentionally use eastl sort in order to measure just // vector access speed and not be polluted by sort speed. stopwatch.Restart(); eastl::quick_sort(c.begin(), c.end(), [](auto a, auto b) { return eastl::get<0>(a) < eastl::get<0>(b); }); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)(eastl::get<0>(c[0]) & 0xffffffff)); } template void TestTupleInsert(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it; PaddedTuple tup(0xFFFFFFFF, DefaultPadding); stopwatch.Restart(); for (j = 0, jEnd = 100, it = c.begin(); j < jEnd; ++j) { it = c.insert(it, tup); if (it == c.end()) // Try to safely increment the iterator three times. it = c.begin(); if (++it == c.end()) it = c.begin(); if (++it == c.end()) it = c.begin(); } stopwatch.Stop(); } template void TestTupleErase(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it; stopwatch.Restart(); for (j = 0, jEnd = 100, it = c.begin(); j < jEnd; ++j) { it = c.erase(it); if (it == c.end()) // Try to safely increment the iterator three times. it = c.begin(); if (++it == c.end()) it = c.begin(); if (++it == c.end()) it = c.begin(); } stopwatch.Stop(); } } // namespace void BenchmarkTupleVector() { EASTLTest_Printf("TupleVector\n"); EA::UnitTest::RandGenT rng(EA::UnitTest::GetRandSeed()); EA::StdC::Stopwatch stopwatch1(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatch2(EA::StdC::Stopwatch::kUnitsCPUCycles); { eastl::vector intVector(100000); eastl::generate(intVector.begin(), intVector.end(), rng); for(int i = 0; i < 2; i++) { StdVectorUint64 stdVectorUint64; EaTupleVectorUint64 eaTupleVectorUint64; /////////////////////////////// // Test push_back /////////////////////////////// TestPushBack(stopwatch1, stdVectorUint64, intVector); TestPushBack(stopwatch2, eaTupleVectorUint64, intVector); if(i == 1) Benchmark::AddResult("tuple_vector/push_back", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test operator[]. /////////////////////////////// TestBracket(stopwatch1, stdVectorUint64); TestBracket(stopwatch2, eaTupleVectorUint64); if(i == 1) Benchmark::AddResult("tuple_vector/operator[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test iteration via find(). /////////////////////////////// TestFind(stopwatch1, stdVectorUint64); TestFind(stopwatch2, eaTupleVectorUint64); TestFind(stopwatch1, stdVectorUint64); TestFind(stopwatch2, eaTupleVectorUint64); if(i == 1) Benchmark::AddResult("tuple_vector/iteration", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test sort /////////////////////////////// // Currently VC++ complains about our sort function decrementing std::iterator that is already at begin(). In the strictest sense, // that's a valid complaint, but we aren't testing std STL here. We will want to revise our sort function eventually. #if !defined(_MSC_VER) || !defined(_ITERATOR_DEBUG_LEVEL) || (_ITERATOR_DEBUG_LEVEL < 2) TestSort(stopwatch1, stdVectorUint64); TestSort(stopwatch2, eaTupleVectorUint64); if(i == 1) Benchmark::AddResult("tuple_vector/sort", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); #endif /////////////////////////////// // Test insert /////////////////////////////// TestInsert(stopwatch1, stdVectorUint64); TestInsert(stopwatch2, eaTupleVectorUint64); if(i == 1) Benchmark::AddResult("tuple_vector/insert", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase /////////////////////////////// TestErase(stopwatch1, stdVectorUint64); TestErase(stopwatch2, eaTupleVectorUint64); if(i == 1) Benchmark::AddResult("tuple_vector/erase", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////////////////// // Test move of MovableType // Should be much faster with C++11 move. /////////////////////////////////////////// std::vector stdVectorMovableType; eastl::tuple_vector eaTupleVectorMovableType; TestMoveReallocate(stopwatch1, stdVectorMovableType); TestMoveReallocate(stopwatch2, eaTupleVectorMovableType); if(i == 1) Benchmark::AddResult("tuple_vector/reallocate", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestMoveErase(stopwatch1, stdVectorMovableType); TestMoveErase(stopwatch2, eaTupleVectorMovableType); if(i == 1) Benchmark::AddResult("tuple_vector/erase", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////////////////// // Test move of AutoRefCount // Should be much faster with C++11 move. /////////////////////////////////////////// std::vector > stdVectorAutoRefCount; eastl::tuple_vector > eaTupleVectorAutoRefCount; for(size_t a = 0; a < 2048; a++) { stdVectorAutoRefCount.push_back(AutoRefCount(new RefCounted)); eaTupleVectorAutoRefCount.push_back(AutoRefCount(new RefCounted)); } RefCounted::msAddRefCount = 0; RefCounted::msReleaseCount = 0; TestMoveErase(stopwatch1, stdVectorAutoRefCount); //EASTLTest_Printf("tuple_vector/erase std counts: %d %d\n", RefCounted::msAddRefCount, RefCounted::msReleaseCount); RefCounted::msAddRefCount = 0; RefCounted::msReleaseCount = 0; TestMoveErase(stopwatch2, eaTupleVectorAutoRefCount); //EASTLTest_Printf("tuple_vector/erase EA counts: %d %d\n", RefCounted::msAddRefCount, RefCounted::msReleaseCount); if(i == 1) Benchmark::AddResult("tuple_vector/erase", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); ////////////////////////////////////////////////////////////////////////// // Test various operations with "padded" data, to demonstrate access/modification of sparse data StdVectorUint64Padded stdVectorUint64Padded; EaTupleVectorUint64Padded eaTupleVectorUint64Padded; /////////////////////////////// // Test push_back /////////////////////////////// TestTuplePushBack(stopwatch1, stdVectorUint64Padded, intVector); TestTuplePushBack(stopwatch2, eaTupleVectorUint64Padded, intVector); if(i == 1) Benchmark::AddResult("tuple_vector/push_back", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test operator[]. /////////////////////////////// TestTupleBracket(stopwatch1, stdVectorUint64Padded); TestTupleBracket(stopwatch2, eaTupleVectorUint64Padded); if(i == 1) Benchmark::AddResult("tuple_vector/operator[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test iteration via find(). /////////////////////////////// TestTupleFind(stopwatch1, stdVectorUint64Padded); TestTupleFind(stopwatch2, eaTupleVectorUint64Padded); TestTupleFind(stopwatch1, stdVectorUint64Padded); TestTupleFind(stopwatch2, eaTupleVectorUint64Padded); if(i == 1) Benchmark::AddResult("tuple_vector/iteration", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test sort /////////////////////////////// // Currently VC++ complains about our sort function decrementing std::iterator that is already at // begin(). In the strictest sense, that's a valid complaint, but we aren't testing std STL here. We // will want to revise our sort function eventually. #if !defined(_MSC_VER) || !defined(_ITERATOR_DEBUG_LEVEL) || (_ITERATOR_DEBUG_LEVEL < 2) TestTupleSort(stopwatch1, stdVectorUint64Padded); TestTupleSort(stopwatch2, eaTupleVectorUint64Padded); if(i == 1) Benchmark::AddResult("tuple_vector/sort", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); #endif /////////////////////////////// // Test insert /////////////////////////////// TestTupleInsert(stopwatch1, stdVectorUint64Padded); TestTupleInsert(stopwatch2, eaTupleVectorUint64Padded); if(i == 1) Benchmark::AddResult("tuple_vector/insert", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase /////////////////////////////// TestTupleErase(stopwatch1, stdVectorUint64Padded); TestTupleErase(stopwatch2, eaTupleVectorUint64Padded); if(i == 1) Benchmark::AddResult("tuple_vector/erase", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } ================================================ FILE: benchmark/source/BenchmarkVector.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #include "EASTLBenchmark.h" #include "EASTLTest.h" #include #include #include #include #ifdef _MSC_VER #pragma warning(push, 0) #pragma warning(disable: 4350) #endif #include #include #include #include #ifdef _MSC_VER #pragma warning(pop) #endif using namespace EA; typedef std::vector StdVectorUint64; typedef eastl::vector EaVectorUint64; namespace { ////////////////////////////////////////////////////////////////////////////// // MovableType // struct MovableType { int8_t* mpData; enum { kDataSize = 128 }; MovableType() : mpData(new int8_t[kDataSize]) { memset(mpData, 0, kDataSize); } MovableType(const MovableType& x) : mpData(new int8_t[kDataSize]) { memcpy(mpData, x.mpData, kDataSize); } MovableType& operator=(const MovableType& x) { if(!mpData) mpData = new int8_t[kDataSize]; memcpy(mpData, x.mpData, kDataSize); return *this; } MovableType(MovableType&& x) EA_NOEXCEPT : mpData(x.mpData) { x.mpData = NULL; } MovableType& operator=(MovableType&& x) { eastl::swap(mpData, x.mpData); // In practice it may not be right to do a swap, depending on the case. return *this; } ~MovableType() { delete[] mpData; } }; ////////////////////////////////////////////////////////////////////////////// // AutoRefCount // // Basic ref-counted object. // template class AutoRefCount { public: T* mpObject; public: AutoRefCount() EA_NOEXCEPT : mpObject(NULL) {} AutoRefCount(T* pObject) EA_NOEXCEPT : mpObject(pObject) { if(mpObject) mpObject->AddRef(); } AutoRefCount(T* pObject, int) EA_NOEXCEPT : mpObject(pObject) { // Inherit the existing refcount. } AutoRefCount(const AutoRefCount& x) EA_NOEXCEPT : mpObject(x.mpObject) { if(mpObject) mpObject->AddRef(); } AutoRefCount& operator=(const AutoRefCount& x) { return operator=(x.mpObject); } AutoRefCount& operator=(T* pObject) { if(pObject != mpObject) { T* const pTemp = mpObject; // Create temporary to prevent possible problems with re-entrancy. if(pObject) pObject->AddRef(); mpObject = pObject; if(pTemp) pTemp->Release(); } return *this; } AutoRefCount(AutoRefCount&& x) EA_NOEXCEPT : mpObject(x.mpObject) { x.mpObject = NULL; } AutoRefCount& operator=(AutoRefCount&& x) { if(mpObject) mpObject->Release(); mpObject = x.mpObject; x.mpObject = NULL; return *this; } ~AutoRefCount() { if(mpObject) mpObject->Release(); } T& operator *() const EA_NOEXCEPT { return *mpObject; } T* operator ->() const EA_NOEXCEPT { return mpObject; } operator T*() const EA_NOEXCEPT { return mpObject; } }; // class AutoRefCount struct RefCounted { int mRefCount; static int msAddRefCount; static int msReleaseCount; RefCounted() : mRefCount(1) {} int AddRef() { ++msAddRefCount; return ++mRefCount; } int Release() { ++msReleaseCount; if(mRefCount > 1) return --mRefCount; delete this; return 0; } }; int RefCounted::msAddRefCount = 0; int RefCounted::msReleaseCount = 0; } // namespace namespace { template void TestPushBack(EA::StdC::Stopwatch& stopwatch, Container& c, eastl::vector& intVector) { stopwatch.Restart(); for(eastl_size_t j = 0, jEnd = intVector.size(); j < jEnd; j++) c.push_back((uint64_t)intVector[j]); stopwatch.Stop(); } template void TestBracket(EA::StdC::Stopwatch& stopwatch, Container& c) { uint64_t temp = 0; stopwatch.Restart(); for(typename Container::size_type j = 0, jEnd = c.size(); j < jEnd; j++) temp += c[j]; stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)(temp & 0xffffffff)); } template void TestFind(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); typedef typename Container::iterator iterator_t; // This typedef is required to get this code to compile on RVCT iterator_t it = eastl::find(c.begin(), c.end(), UINT64_C(0xffffffffffff)); stopwatch.Stop(); if(it != c.end()) EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)*it); } template void TestSort(EA::StdC::Stopwatch& stopwatch, Container& c) { // Intentionally use eastl sort in order to measure just // vector access speed and not be polluted by sort speed. stopwatch.Restart(); eastl::quick_sort(c.begin(), c.end()); stopwatch.Stop(); EA::StdC::Snprintf(Benchmark::gScratchBuffer, Benchmark::kScratchBufferSize, "%u", (unsigned)(c[0] & 0xffffffff)); } template void TestInsert(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it; stopwatch.Restart(); for(j = 0, jEnd = 100, it = c.begin(); j < jEnd; ++j) { it = c.insert(it, UINT64_C(0xffffffffffff)); if(it == c.end()) // Try to safely increment the iterator three times. it = c.begin(); if(++it == c.end()) it = c.begin(); if(++it == c.end()) it = c.begin(); } stopwatch.Stop(); } template void TestErase(EA::StdC::Stopwatch& stopwatch, Container& c) { typename Container::size_type j, jEnd; typename Container::iterator it; stopwatch.Restart(); for(j = 0, jEnd = 100, it = c.begin(); j < jEnd; ++j) { it = c.erase(it); if(it == c.end()) // Try to safely increment the iterator three times. it = c.begin(); if(++it == c.end()) it = c.begin(); if(++it == c.end()) it = c.begin(); } stopwatch.Stop(); } template void TestMoveReallocate(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); while(c.size() < 8192) c.resize(c.capacity() + 1); stopwatch.Stop(); } template void TestMoveErase(EA::StdC::Stopwatch& stopwatch, Container& c) { stopwatch.Restart(); while(!c.empty()) c.erase(c.begin()); stopwatch.Stop(); } } // namespace void BenchmarkVector() { EASTLTest_Printf("Vector\n"); EA::UnitTest::RandGenT rng(EA::UnitTest::GetRandSeed()); EA::StdC::Stopwatch stopwatch1(EA::StdC::Stopwatch::kUnitsCPUCycles); EA::StdC::Stopwatch stopwatch2(EA::StdC::Stopwatch::kUnitsCPUCycles); { eastl::vector intVector(100000); eastl::generate(intVector.begin(), intVector.end(), rng); for(int i = 0; i < 2; i++) { StdVectorUint64 stdVectorUint64; EaVectorUint64 eaVectorUint64; /////////////////////////////// // Test push_back /////////////////////////////// TestPushBack(stopwatch1, stdVectorUint64, intVector); TestPushBack(stopwatch2, eaVectorUint64, intVector); if(i == 1) Benchmark::AddResult("vector/push_back", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test operator[]. /////////////////////////////// TestBracket(stopwatch1, stdVectorUint64); TestBracket(stopwatch2, eaVectorUint64); if(i == 1) Benchmark::AddResult("vector/operator[]", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test iteration via find(). /////////////////////////////// TestFind(stopwatch1, stdVectorUint64); TestFind(stopwatch2, eaVectorUint64); TestFind(stopwatch1, stdVectorUint64); TestFind(stopwatch2, eaVectorUint64); if(i == 1) Benchmark::AddResult("vector/iteration", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test sort /////////////////////////////// // Currently VC++ complains about our sort function decrementing std::iterator that is already at begin(). In the strictest sense, // that's a valid complaint, but we aren't testing std STL here. We will want to revise our sort function eventually. #if !defined(_MSC_VER) || !defined(_ITERATOR_DEBUG_LEVEL) || (_ITERATOR_DEBUG_LEVEL < 2) TestSort(stopwatch1, stdVectorUint64); TestSort(stopwatch2, eaVectorUint64); if(i == 1) Benchmark::AddResult("vector/sort", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); #endif /////////////////////////////// // Test insert /////////////////////////////// TestInsert(stopwatch1, stdVectorUint64); TestInsert(stopwatch2, eaVectorUint64); if(i == 1) Benchmark::AddResult("vector/insert", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////// // Test erase /////////////////////////////// TestErase(stopwatch1, stdVectorUint64); TestErase(stopwatch2, eaVectorUint64); if(i == 1) Benchmark::AddResult("vector/erase", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////////////////// // Test move of MovableType // Should be much faster with C++11 move. /////////////////////////////////////////// std::vector stdVectorMovableType; eastl::vector eaVectorMovableType; TestMoveReallocate(stopwatch1, stdVectorMovableType); TestMoveReallocate(stopwatch2, eaVectorMovableType); if(i == 1) Benchmark::AddResult("vector/reallocate", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); TestMoveErase(stopwatch1, stdVectorMovableType); TestMoveErase(stopwatch2, eaVectorMovableType); if(i == 1) Benchmark::AddResult("vector/erase", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); /////////////////////////////////////////// // Test move of AutoRefCount // Should be much faster with C++11 move. /////////////////////////////////////////// std::vector > stdVectorAutoRefCount; eastl::vector > eaVectorAutoRefCount; for(size_t a = 0; a < 2048; a++) { stdVectorAutoRefCount.push_back(AutoRefCount(new RefCounted)); eaVectorAutoRefCount.push_back(AutoRefCount(new RefCounted)); } RefCounted::msAddRefCount = 0; RefCounted::msReleaseCount = 0; TestMoveErase(stopwatch1, stdVectorAutoRefCount); EASTLTest_Printf("vector/erase std counts: %d %d\n", RefCounted::msAddRefCount, RefCounted::msReleaseCount); RefCounted::msAddRefCount = 0; RefCounted::msReleaseCount = 0; TestMoveErase(stopwatch2, eaVectorAutoRefCount); EASTLTest_Printf("vector/erase EA counts: %d %d\n", RefCounted::msAddRefCount, RefCounted::msReleaseCount); if(i == 1) Benchmark::AddResult("vector/erase", stopwatch1.GetUnits(), stopwatch1.GetElapsedTime(), stopwatch2.GetElapsedTime()); } } } ================================================ FILE: benchmark/source/EASTLBenchmark.cpp ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #include "EASTLBenchmark.h" #include "EASTLTest.h" #include #include #ifdef _MSC_VER #pragma warning(push, 0) #endif #include #include #include #ifdef _MSC_VER #pragma warning(pop) #endif namespace Benchmark { static int64_t ConvertStopwatchUnits(EA::StdC::Stopwatch::Units unitsSource, int64_t valueSource, EA::StdC::Stopwatch::Units unitsDest) { using namespace EA::StdC; int64_t valueDest = valueSource; if(unitsSource != unitsDest) { double sourceMultiplier; switch (unitsSource) { case Stopwatch::kUnitsCPUCycles: sourceMultiplier = Stopwatch::GetUnitsPerCPUCycle(unitsDest); // This will typically be a number less than 1. valueDest = (int64_t)(valueSource * sourceMultiplier); break; case Stopwatch::kUnitsCycles: sourceMultiplier = Stopwatch::GetUnitsPerStopwatchCycle(unitsDest); // This will typically be a number less than 1. valueDest = (int64_t)(valueSource * sourceMultiplier); break; case Stopwatch::kUnitsNanoseconds: case Stopwatch::kUnitsMicroseconds: case Stopwatch::kUnitsMilliseconds: case Stopwatch::kUnitsSeconds: case Stopwatch::kUnitsMinutes: case Stopwatch::kUnitsUserDefined: // To do. Also, handle the case of unitsDest being Cycles or CPUCycles and unitsSource being a time. break; } } return valueDest; } void WriteTime(int64_t timeNS, eastl::string& sTime) { if(timeNS > 1000000000) sTime.sprintf(" %6.2f s", (double)timeNS / 1000000000); else if(timeNS > 1000000) sTime.sprintf("%6.1f ms", (double)timeNS / 1000000); else if(timeNS > 1000) sTime.sprintf("%6.1f us", (double)timeNS / 1000); else sTime.sprintf("%6.1f ns", (double)timeNS / 1); } Environment gEnvironment; Environment& GetEnvironment() { return gEnvironment; } ResultSet gResultSet; ResultSet& GetResultSet() { return gResultSet; } // Scratch sprintf buffer char gScratchBuffer[kScratchBufferSize]; void DoNothing(...) { // Intentionally nothing. } void AddResult(const char* pName, int units, int64_t nTime1, int64_t nTime2, const char* pNotes) { Result result; result.msName = pName; result.mUnits = units; result.mTime1 = nTime1; result.mTime1NS = ConvertStopwatchUnits((EA::StdC::Stopwatch::Units)units, nTime1, EA::StdC::Stopwatch::kUnitsNanoseconds); result.mTime2 = nTime2; result.mTime2NS = ConvertStopwatchUnits((EA::StdC::Stopwatch::Units)units, nTime2, EA::StdC::Stopwatch::kUnitsNanoseconds); if(pNotes) result.msNotes = pNotes; gResultSet.insert(result); } void PrintResultLine(const Result& result) { const double fRatio = (double)result.mTime1 / (double)result.mTime2; const double fRatioPrinted = (fRatio > 100) ? 100 : fRatio; const double fPercentChange = fabs(((double)result.mTime1 - (double)result.mTime2) / (((double)result.mTime1 + (double)result.mTime2) / 2)); const bool bDifference = (result.mTime1 > 10) && (result.mTime2 > 10) && (fPercentChange > 0.25); const char* pDifference = (bDifference ? (result.mTime1 < result.mTime2 ? "-" : "+") : ""); eastl::string sClockTime1, sClockTime2; WriteTime(result.mTime1NS, sClockTime1); // This converts an integer in nanoseconds (e.g. 23400000) to a string (e.g. "23.4 ms") WriteTime(result.mTime2NS, sClockTime2); EA::UnitTest::Report("%-43s | %13" PRIu64 " %s | %13" PRIu64 " %s | %10.2f%10s", result.msName.c_str(), result.mTime1, sClockTime1.c_str(), result.mTime2, sClockTime2.c_str(), fRatioPrinted, pDifference); if(result.msNotes.length()) // If there are any notes... EA::UnitTest::Report(" %s", result.msNotes.c_str()); EA::UnitTest::Report("\n"); } #if defined(EASTL_BENCHMARK_WRITE_FILE) && EASTL_BENCHMARK_WRITE_FILE #if !defined(EASTL_BENCHMARK_WRITE_FILE_PATH) #define EASTL_BENCHMARK_WRITE_FILE_PATH "BenchmarkResults.txt" #endif struct FileWriter { FILE* mpReportFile; EA::EAMain::ReportFunction mpSavedReportFunction; static FileWriter* gpFileWriter; static void StaticPrintfReportFunction(const char8_t* pText) { if(gpFileWriter) gpFileWriter->PrintfReportFunction(pText); } void PrintfReportFunction(const char8_t* pText) { fwrite(pText, strlen(pText), 1, mpReportFile); EA::EAMain::ReportFunction gpReportFunction = EA::EAMain::GetDefaultReportFunction(); gpReportFunction(pText); } FileWriter() : mpReportFile(NULL), mpSavedReportFunction(NULL) { mpReportFile = fopen(EASTL_BENCHMARK_WRITE_FILE_PATH, "w+"); if(mpReportFile) { gpFileWriter = this; mpSavedReportFunction = EA::EAMain::GetDefaultReportFunction(); EA::EAMain::SetReportFunction(StaticPrintfReportFunction); } } ~FileWriter() { if(mpReportFile) { gpFileWriter = NULL; EA::EAMain::SetReportFunction(mpSavedReportFunction); fclose(mpReportFile); } } }; FileWriter* FileWriter::gpFileWriter = NULL; #endif void PrintResults() { #if defined(EASTL_BENCHMARK_WRITE_FILE) && EASTL_BENCHMARK_WRITE_FILE FileWriter fileWriter; // This will auto-execute. #endif // Print the results EA::UnitTest::Report("\n"); EA::UnitTest::Report("****************************************************************************************\n"); EA::UnitTest::Report("EASTL Benchmark test results\n"); EA::UnitTest::Report("****************************************************************************************\n"); EA::UnitTest::Report("\n"); EA::UnitTest::Report("EASTL version: %s\n", EASTL_VERSION); EA::UnitTest::Report("Platform: %s\n", gEnvironment.msPlatform.c_str()); EA::UnitTest::Report("Compiler: %s\n", EA_COMPILER_STRING); #if defined(EA_DEBUG) || defined(_DEBUG) EA::UnitTest::Report("Allocator: PPMalloc::GeneralAllocatorDebug. Thread safety enabled.\n"); EA::UnitTest::Report("Build: Debug. Inlining disabled. STL debug features disabled.\n"); #else EA::UnitTest::Report("Allocator: PPMalloc::GeneralAllocator. Thread safety enabled.\n"); EA::UnitTest::Report("Build: Full optimization. Inlining enabled.\n"); #endif EA::UnitTest::Report("\n"); EA::UnitTest::Report("Values are ticks and time to complete tests; smaller values are better.\n"); EA::UnitTest::Report("\n"); EA::UnitTest::Report("%-43s%26s%26s%13s%13s\n", "Test", gEnvironment.msSTLName1.c_str(), gEnvironment.msSTLName2.c_str(), "Ratio", "Difference?"); EA::UnitTest::Report("---------------------------------------------------------------------------------------------------------------------\n"); eastl::string sTestTypeLast; eastl::string sTestTypeTemp; for(ResultSet::iterator it = gResultSet.begin(); it != gResultSet.end(); ++it) { const Result& result = *it; eastl_size_t n = result.msName.find('/'); if(n == eastl::string::npos) n = result.msName.length(); sTestTypeTemp.assign(result.msName, 0, n); if(sTestTypeTemp != sTestTypeLast) // If it looks like we are changing to a new test type... add an empty line to help readability. { if(it != gResultSet.begin()) EA::UnitTest::Report("\n"); sTestTypeLast = sTestTypeTemp; } PrintResultLine(result); } // We will print out a final line that has the sum of the rows printed above. Result resultSum; resultSum.msName = "sum"; for(ResultSet::iterator its = gResultSet.begin(); its != gResultSet.end(); ++its) { const Result& resultTemp = *its; EASTL_ASSERT(resultTemp.mUnits == EA::StdC::Stopwatch::kUnitsCPUCycles); // Our ConvertStopwatchUnits call below assumes that every measured time is CPUCycles. resultSum.mTime1 += resultTemp.mTime1; resultSum.mTime2 += resultTemp.mTime2; } // We do this convert as a final step instead of the loop in order to avoid loss of precision. resultSum.mTime1NS = ConvertStopwatchUnits(EA::StdC::Stopwatch::kUnitsCPUCycles, resultSum.mTime1, EA::StdC::Stopwatch::kUnitsNanoseconds); resultSum.mTime2NS = ConvertStopwatchUnits(EA::StdC::Stopwatch::kUnitsCPUCycles, resultSum.mTime2, EA::StdC::Stopwatch::kUnitsNanoseconds); EA::UnitTest::Report("\n"); PrintResultLine(resultSum); EA::UnitTest::Report("\n"); EA::UnitTest::Report("****************************************************************************************\n"); EA::UnitTest::Report("\n"); // Clear the results gResultSet.clear(); gEnvironment.clear(); } } // namespace Benchmark ================================================ FILE: benchmark/source/EASTLBenchmark.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #ifndef EASTLBENCHMARK_H #define EASTLBENCHMARK_H // Intrinsic control // // Our benchmark results are being skewed by inconsistent decisions by the // VC++ compiler to use intrinsic functions. Additionally, many of our // benchmarks work on large blocks of elements, whereas intrinsics often // are an improvement only over small blocks of elements. As a result, // enabling of intrinsics is often resulting in poor benchmark results for // code that gets an intrinsic enabled for it, even though it will often // happen in real code to be the opposite case. The disabling of intrinsics // here often results in EASTL performance being lower than it would be in // real-world situations. // #include #ifdef _MSC_VER #pragma function(strlen, strcmp, strcpy, strcat, memcpy, memcmp, memset) #endif #include #include #include #include #include #include void BenchmarkSort(); void BenchmarkList(); void BenchmarkString(); void BenchmarkVector(); void BenchmarkDeque(); void BenchmarkSet(); void BenchmarkMap(); void BenchmarkHash(); void BenchmarkAlgorithm(); void BenchmarkHeap(); void BenchmarkBitset(); void BenchmarkTupleVector(); namespace Benchmark { // Environment // // The environment for this benchmark test. // struct Environment { eastl::string8 msPlatform; // Name of test platform (e.g. "Windows") eastl::string8 msSTLName1; // Name of competitor #1 (e.g. "EASTL"). eastl::string8 msSTLName2; // Name of competitor #2 (e.g. "MS STL"). void clear() { msPlatform.set_capacity(0); msSTLName1.set_capacity(0); msSTLName2.set_capacity(0); } }; Environment& GetEnvironment(); // Result // // An individual benchmark result. // struct Result { eastl::string8 msName; // Test name (e.g. "vector/insert"). int mUnits; // Timing units (e.g. EA::StdC::Stopwatch::kUnitsSeconds). int64_t mTime1; // Time of competitor #1. uint64_t mTime1NS; // Nanoseconds. int64_t mTime2; // Time of competitor #2. int64_t mTime2NS; // Nanoseconds. eastl::string8 msNotes; // Any comments to attach to this result. Result() : msName(), mUnits(EA::StdC::Stopwatch::kUnitsCPUCycles), mTime1(0), mTime1NS(0), mTime2(0), mTime2NS(0), msNotes() { } }; inline bool operator<(const Result& r1, const Result& r2) { return r1.msName < r2.msName; } typedef eastl::set ResultSet; ResultSet& GetResultSet(); // Scratch sprintf buffer const int kScratchBufferSize = 1024; extern char gScratchBuffer[kScratchBufferSize]; // Utility functions // void DoNothing(...); void AddResult(const char* pName, int units, int64_t nTime1, int64_t nTime2, const char* pNotes = NULL); void PrintResults(); void WriteTime(int64_t timeNS, eastl::string& sTime); } // namespace Benchmark /////////////////////////////////////////////////////////////////////////////// /// LargePOD /// /// Implements a structure which is essentially a largish POD. Useful for testing /// containers and algorithms for their ability to efficiently work with PODs. /// This class isn't strictly a POD by the definition of the C++ standard, /// but it suffices for our interests. /// struct LargeObject { int32_t mData[2048]; }; struct LargePOD { LargeObject mLargeObject1; LargeObject mLargeObject2; const char* mpName1; const char* mpName2; explicit LargePOD(int32_t x = 0) // A true POD doesn't have a non-trivial constructor. { memset(mLargeObject1.mData, 0, sizeof(mLargeObject1.mData)); memset(mLargeObject2.mData, 0, sizeof(mLargeObject2.mData)); mLargeObject1.mData[0] = x; mpName1 = "LargePOD1"; mpName2 = "LargePOD2"; } LargePOD(const LargePOD& largePOD) // A true POD doesn't have a non-trivial copy-constructor. : mLargeObject1(largePOD.mLargeObject1), mLargeObject2(largePOD.mLargeObject2), mpName1(largePOD.mpName1), mpName2(largePOD.mpName2) { } virtual ~LargePOD() { } LargePOD& operator=(const LargePOD& largePOD) // A true POD doesn't have a non-trivial assignment operator. { if(&largePOD != this) { mLargeObject1 = largePOD.mLargeObject1; mLargeObject2 = largePOD.mLargeObject2; mpName1 = largePOD.mpName1; mpName2 = largePOD.mpName2; } return *this; } virtual void DoSomething() // Note that by declaring this virtual, this class is not truly a POD. { // But it acts like a POD for the purposes of EASTL algorithms. mLargeObject1.mData[1]++; } operator int() { return (int)mLargeObject1.mData[0]; } }; //EASTL_DECLARE_POD(LargePOD); //EASTL_DECLARE_TRIVIAL_CONSTRUCTOR(LargePOD); //EASTL_DECLARE_TRIVIAL_COPY(LargePOD); //EASTL_DECLARE_TRIVIAL_ASSIGN(LargePOD); //EASTL_DECLARE_TRIVIAL_DESTRUCTOR(LargePOD); //EASTL_DECLARE_TRIVIAL_RELOCATE(LargePOD); // Operators // We specifically define only == and <, in order to verify that // our containers and algorithms are not mistakenly expecting other // operators for the contained and manipulated classes. inline bool operator==(const LargePOD& t1, const LargePOD& t2) { return (memcmp(&t1.mLargeObject1, &t2.mLargeObject1, sizeof(t1.mLargeObject1)) == 0) && (memcmp(&t1.mLargeObject2, &t2.mLargeObject2, sizeof(t1.mLargeObject2)) == 0) && (strcmp(t1.mpName1, t2.mpName1) == 0) && (strcmp(t1.mpName2, t2.mpName2) == 0); } inline bool operator<(const LargePOD& t1, const LargePOD& t2) { return (memcmp(&t1.mLargeObject1, &t2.mLargeObject1, sizeof(t1.mLargeObject1)) < 0) && (memcmp(&t1.mLargeObject2, &t2.mLargeObject2, sizeof(t1.mLargeObject2)) < 0) && (strcmp(t1.mpName1, t2.mpName1) < 0) && (strcmp(t1.mpName2, t2.mpName2) < 0); } #endif // Header sentry ================================================ FILE: benchmark/source/main.cpp ================================================ /////////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. /////////////////////////////////////////////////////////////////////////////// #include "EASTLBenchmark.h" #include "EASTLTest.h" #if !EASTL_OPENSOURCE #include #endif #include #include #include #include #include #include EA_DISABLE_VC_WARNING(4946) #include "EAMain/EAEntryPointMain.inl" #include "EASTLTestAllocator.h" /////////////////////////////////////////////////////////////////////////////// // gpEAGeneralAllocator / gpEAGeneralAllocatorDebug // #if !EASTL_OPENSOURCE namespace EA { namespace Allocator { #ifdef EA_DEBUG extern GeneralAllocatorDebug gGeneralAllocator; extern PPM_API GeneralAllocatorDebug* gpEAGeneralAllocatorDebug; #else extern GeneralAllocator gGeneralAllocator; extern PPM_API GeneralAllocator* gpEAGeneralAllocator; #endif } } #endif /////////////////////////////////////////////////////////////////////////////// // Required by EASTL. // #if !defined(EASTL_EASTDC_VSNPRINTF) || !EASTL_EASTDC_VSNPRINTF int Vsnprintf8(char8_t* pDestination, size_t n, const char8_t* pFormat, va_list arguments) { return EA::StdC::Vsnprintf(pDestination, n, pFormat, arguments); } int Vsnprintf16(char16_t* pDestination, size_t n, const char16_t* pFormat, va_list arguments) { return EA::StdC::Vsnprintf(pDestination, n, pFormat, arguments); } #if (EASTDC_VERSION_N >= 10600) int Vsnprintf32(char32_t* pDestination, size_t n, const char32_t* pFormat, va_list arguments) { return EA::StdC::Vsnprintf(pDestination, n, pFormat, arguments); } #endif #endif /////////////////////////////////////////////////////////////////////////////// // main // int EAMain(int argc, char* argv[]) { bool bWaitAtEnd = false; bool bPrintHelp = false; int nOptionCount = 0; int nErrorCount = 0; EA::EAMain::PlatformStartup(); EA::EAMain::SetVerbosity(2); // Default value. // Set up debug parameters. #ifdef EA_DEBUG // Only enable this temporarily to help find any problems you might find. // EA::Allocator::gpEAGeneralAllocatorDebug->SetAutoHeapValidation(EA::Allocator::GeneralAllocator::kHeapValidationLevelBasic, 16); #endif // Parse command line arguments for(int i = 1; i < argc; i++) { if(strstr(argv[i], "-w") == argv[i]) { bWaitAtEnd = true; nOptionCount++; } else if(strstr(argv[i], "-v") == argv[i]) { uint32_t verbosity = EA::StdC::AtoU32(argv[i] + 3); EA::EAMain::SetVerbosity(verbosity); nOptionCount++; } else if(strstr(argv[i], "-l:") == argv[i]) { gEASTL_TestLevel = atoi(argv[i] + 3); if(gEASTL_TestLevel < kEASTL_TestLevelLow) gEASTL_TestLevel = kEASTL_TestLevelLow; else if(gEASTL_TestLevel > kEASTL_TestLevelHigh) gEASTL_TestLevel = kEASTL_TestLevelHigh; nOptionCount++; } else if(strstr(argv[i], "-s:") == argv[i]) { uint32_t seed = (eastl_size_t)atoi(argv[i] + 3); EA::UnitTest::SetRandSeed(seed); nOptionCount++; } else if((strstr(argv[i], "-?") == argv[i]) || (strstr(argv[i], "-h") == argv[i])) { bPrintHelp = true; nOptionCount++; } } // Print user help. if(!bPrintHelp) bPrintHelp = (nOptionCount == 0); if(bPrintHelp) { EASTLTest_Printf("Options\n"); EASTLTest_Printf(" -w Wait at end.\n"); EASTLTest_Printf(" -l:N Test level in range of [1, 10]. 10 means maximum testing.\n"); EASTLTest_Printf(" -s:N Specify a randomization seed. 0 is default and means use clock.\n"); EASTLTest_Printf(" -? Show help.\n"); } // Set up test information Benchmark::Environment& environment = Benchmark::GetEnvironment(); environment.msPlatform = EA_PLATFORM_DESCRIPTION; environment.msSTLName1 = GetStdSTLName(); environment.msSTLName2 = "EASTL"; // Run tests #ifndef EA_DEBUG EA::UnitTest::SetHighThreadPriority(); #endif EA::StdC::Stopwatch stopwatch(EA::StdC::Stopwatch::kUnitsSeconds, true); // Measure seconds, start the counting immediately. BenchmarkAlgorithm(); BenchmarkList(); BenchmarkString(); BenchmarkVector(); BenchmarkDeque(); BenchmarkSet(); BenchmarkMap(); BenchmarkHash(); BenchmarkHeap(); BenchmarkBitset(); BenchmarkSort(); BenchmarkTupleVector(); stopwatch.Stop(); #ifndef EA_DEBUG EA::UnitTest::SetNormalThreadPriority(); #endif Benchmark::PrintResults(); eastl::string sClockTime; Benchmark::WriteTime(stopwatch.GetElapsedTime(), sClockTime); EASTLTest_Printf("Time to complete all tests: %s.\n", sClockTime.c_str()); // Done if(bWaitAtEnd) { EASTLTest_Printf("\nPress any key to exit.\n"); getchar(); // Wait for the user and shutdown } EA::EAMain::PlatformShutdown(nErrorCount); return 0; } ================================================ FILE: doc/Benchmarks.md ================================================ # EASTL Benchmarks ## Introduction This document provides a number of benchmark results of EASTL. Where possible, these benchmarks are implemented as comparisons with equivalent functionality found in other libraries such as compiler STL libraries or other well-known libraries. These comparison benchmarks concentrate on highlighting the differences between implementations rather than the similarities. In many mundane cases -- such as accessing a vector element via operator [] -- virtually all vector/array implementations you are likely to run into will have identical performance. It's also important to note that the platform you run on can make a significant difference in the results. On a modern 3+GHz Windows PC many operations are fast due to large memory caches, intelligent branch prediction, and parallel instruction execution. However, on embedded or console systems none of these may be the case. While EASTL generally outperforms std STL, there are some benchmarks here in which EASTL is slower than std STL. There are three primary explanations of this: 1. EASTL is making some kind of speed, memory, or design tradeoff that results in the given speed difference. In may such cases, EASTL goes slower on one benchmark in order to go faster on another benchmark deemed more important. This explanation constitutes about 60% of the cases. 2. Compiler optimizations and resulting code generation is coincidencally favoring one kind of implementation over another, often when they are visually virtually identical. This explantation constitutes about 30% of the cases. 3. EASTL is simply not yet as optimized as it could be. This explanation constitutes about 10% of the cases (as of this writing there are about three such functions throughout EASTL). ## Benchmarks Below is a table of links to detailed benchmark results derived from the Benchmark test present in the EASTL package. The detailed results are present below the table. Additional platforms will be added as results become available for those platforms. Debug benchmarks are present because (lack of) debug performance can be significant for highly templated libraries. EASTL has specific optimizations to enhance debug performance relative to other standard libraries; in some cases it is 10x or more faster than alternatives (though there are exceptions where EASTL is slower). Feel free to submit results for additional compilers/platforms. | Platform | Compiler | STL type | Build | Results | |------|------|------|------|------| | Win32 | VC++ 7.1 | Microsoft (Dinkumware) | Debug | [Detail]() | | Win32 | VC++ 7.1 | Microsoft (Dinkumware) | Release | [Detail]() | | Win32 | VC++ 7.1 | STLPort | Debug | [Detail]() | | Win32 | VC++ 7.1 | STLPort | Release | [Detail]() | ### Win32.VC71.MS.Debug ``` EASTL version: 0.96.00 Platform: Windows on X86 Compiler: Microsoft Visual C++ compiler, version 1310 Allocator: PPMalloc::GeneralAllocatorDebug. Thread safety enabled. Build: Debug. Inlining disabled. STL debug features disabled. Values are times to complete tests; smaller values are better. Alarm indicates a greater than 10% difference. Test VC++ EASTL Ratio Alarm ---------------------------------------------------------------------------------------- algorithm/adj_find/vector 33061345 6497757 5.09 * algorithm/copy/vector 5844906 4876076 1.20 * algorithm/copy/vector 1634346 166065 9.84 * algorithm/copy_backward/vector 4515974 4638892 0.97 algorithm/copy_backward/vector 1821168 121746 14.96 * algorithm/count/vector 17048884 2720766 6.27 * algorithm/equal_range/vector 1111147812 448756888 2.48 * algorithm/fill/bool[] 1728722 91936 18.80 * algorithm/fill/char[]/'d' 1299200 33745 38.50 * algorithm/fill/vector/'d' 10205092 33796 100.00 * algorithm/fill/vector/0 10200748 33805 100.00 * algorithm/fill/vector 10416538 1399687 7.44 * algorithm/fill/vector 10221837 1307700 7.82 * algorithm/fill_n/bool[] 1399033 34196 40.91 * algorithm/fill_n/char[] 1299225 33754 38.49 * algorithm/fill_n/vector 5961637 1371900 4.35 * algorithm/find_end/string/end 16569373 2657372 6.24 * algorithm/find_end/string/middle 16558638 20242410 0.82 * algorithm/find_end/string/none 16811207 40480468 0.42 * algorithm/lex_cmp/schar[] 1749674 194429 9.00 * algorithm/lex_cmp/vector 32824195 5253587 6.25 * algorithm/lex_cmp/vector 29852034 202658 100.00 * algorithm/lower_bound/vector 798624462 350027935 2.28 * algorithm/min_element/vector 21675298 5314676 4.08 * algorithm/rand_shuffle/vector 84236190 43677506 1.93 * algorithm/reverse/list 3007292 2105799 1.43 * algorithm/reverse/vector 2974618 2124796 1.40 * algorithm/search/string 16228158 3594268 4.52 * algorithm/search_n/string 16926985 1522096 11.12 * algorithm/unique/vector 54206243 9988002 5.43 * algorithm/unique/vector 26940079 1741991 15.47 * algorithm/unique/vector 47621344 5213127 9.13 * algorithm/upper_bound/vector 372381295 137901552 2.70 * bitset<1500>/>>=/1 90196544 92539832 0.97 bitset<1500>/count 50753832 53742117 0.94 bitset<1500>/flip 86935875 85121117 1.02 bitset<1500>/reset 78153837 79922611 0.98 bitset<1500>/set() 79214968 79360658 1.00 bitset<1500>/set(i) 11300589 12199651 0.93 bitset<1500>/test 11282679 13186450 0.86 * bitset<15>/>>=/1 10500577 6000559 1.75 * bitset<15>/count 4000356 6399753 0.63 * bitset<15>/flip 7268877 5647944 1.29 * bitset<15>/reset 8564235 5800163 1.48 * bitset<15>/set() 9935523 5914012 1.68 * bitset<15>/set(i) 11199703 12503637 0.90 * bitset<15>/test 10600623 12899592 0.82 * bitset<35>/>>=/1 13076052 6599834 1.98 * bitset<35>/count 4800384 11500330 0.42 * bitset<35>/flip 7915439 5816313 1.36 * bitset<35>/reset 9400049 5803180 1.62 * bitset<35>/set() 10701152 5840316 1.83 * bitset<35>/set(i) 11342936 12271128 0.92 bitset<35>/test 10670799 13099682 0.81 * bitset<75>/>>=/1 14198834 17151088 0.83 * bitset<75>/count 5795530 8576373 0.68 * bitset<75>/flip 8516703 8922995 0.95 bitset<75>/reset 9999970 8526095 1.17 * bitset<75>/set() 11124877 9009686 1.23 * bitset<75>/set(i) 11300563 12531618 0.90 * bitset<75>/test 11031913 13100523 0.84 * deque/erase 743801706 335646802 2.22 * deque/insert 742331809 341912866 2.17 * deque/iteration 29097030 16315827 1.78 * deque/operator[] 49859598 24026313 2.08 * deque/push_back 424807033 34497608 12.31 * deque/push_front 402313373 38006322 10.59 * deque/sort 725101017 581796551 1.25 * hash_map/clear 559462 961019 0.58 * hash_map/count 53377807 8091448 6.60 * hash_map/erase pos 613573 858084 0.72 * hash_map/erase range 5488748 461134 11.90 * hash_map/erase val 35760096 16379858 2.18 * hash_map/find 43490335 10324823 4.21 * hash_map/find_as/char* 49343818 8617139 5.73 * hash_map/insert 107420281 168690439 0.64 * hash_map/iteration 2456356 1255153 1.96 * hash_map/operator[] 47209502 12581624 3.75 * hash_map/clear 533172 546449 0.98 hash_map/count 28667432 2899997 9.89 * hash_map/erase pos 683239 538289 1.27 * hash_map/erase range 9632676 253037 38.07 * hash_map/erase val 25466026 7752188 3.29 * hash_map/find 20048253 4678502 4.29 * hash_map/insert 71085798 37686187 1.89 * hash_map/iteration 1460318 1338317 1.09 hash_map/operator[] 23226692 7888748 2.94 * heap (uint32_t[])/make_heap 5399966 6961305 0.78 * heap (uint32_t[])/pop_heap 108060534 103511318 1.04 heap (uint32_t[])/push_heap 22595661 16640688 1.36 * heap (uint32_t[])/sort_heap 93559424 83076731 1.13 * heap (vector)/make_heap 91770743 21724870 4.22 * heap (vector)/pop_heap 1175599317 284007398 4.14 * heap (vector)/push_heap 207804541 45918046 4.53 * heap (vector)/sort_heap 970394145 208321477 4.66 * list/ctor(it) 805539509 760938607 1.06 list/ctor(n) 80959236 75106995 1.08 list/erase 1052543704 1044976137 1.01 list/find 97785267 75970884 1.29 * list/insert 873895175 807051107 1.08 list/push_back 812797710 780742425 1.04 list/remove 1850600714 1436980599 1.29 * list/reverse 180270465 80466636 2.24 * list/size/1 440148 599642 0.73 * list/size/10 439433 1329817 0.33 * EASTL intentionally implements list::size as O(n). list/size/100 439595 11030060 0.04 * EASTL intentionally implements list::size as O(n). list/splice 177106094 69383027 2.55 * map/clear 508283 470807 1.08 map/count 43145354 14280357 3.02 * map/equal_range 38594004 16520447 2.34 * map/erase/key 33948082 16123175 2.11 * map/erase/pos 578332 455201 1.27 * MS uses a code bloating implementation of erase. map/erase/range 387345 284538 1.36 * map/find 22897224 12766100 1.79 * map/insert 61665800 47286928 1.30 * map/iteration 1977202 745391 2.65 * map/lower_bound 19892941 12260928 1.62 * map/operator[] 24199084 15429634 1.57 * map/upper_bound 19842409 12064441 1.64 * set/clear 1027625 1000901 1.03 set/count 39730182 13329565 2.98 * set/equal_range 34681649 14768827 2.35 * set/erase range 841458 602030 1.40 * set/erase/pos 1380485 1084303 1.27 * MS uses a code bloating implementation of erase. set/erase/val 31617425 13344023 2.37 * set/find 19582428 10788864 1.82 * set/insert 61434014 48232086 1.27 * set/iteration 1512057 667820 2.26 * set/lower_bound 18394885 10402785 1.77 * set/upper_bound 17189083 10554425 1.63 * sort/q_sort/TestObject[] 87088799 15037988 5.79 * sort/q_sort/TestObject[]/sorted 21502892 3284299 6.55 * sort/q_sort/vector 87962047 15004677 5.86 * sort/q_sort/vector/sorted 21396523 3341163 6.40 * sort/q_sort/vector 80334589 10429161 7.70 * sort/q_sort/vector/sorted 22133295 3230553 6.85 * sort/q_sort/vector 72195388 5940302 12.15 * sort/q_sort/vector/sorted 19635171 995495 19.72 * string/compare 523013373 534722089 0.98 string/erase/pos,n 3446597 3439492 1.00 string/find/p,pos,n 383873158 441902786 0.87 * string/find_first_not_of/p,pos,n 174157 134131 1.30 * string/find_first_of/p,pos,n 11715423 8520944 1.37 * string/find_last_of/p,pos,n 1871556 1226457 1.53 * string/insert/pos,p 3624877 3357058 1.08 string/iteration 6766787933 581916665 11.63 * string/operator[] 4820827 2335579 2.06 * string/push_back 59812962 6757466 8.85 * string/replace/pos,n,p,n 4371279 4459713 0.98 string/reserve 2307530 1919386 1.20 * string/rfind/p,pos,n 734826 372615 1.97 * string/size 41608 28866 1.44 * string/swap 1033932 1490994 0.69 * string/compare 63086797 64194771 0.98 string/erase/pos,n 2045687 1960270 1.04 string/find/p,pos,n 123872549 471364764 0.26 * string/find_first_not_of/p,pos,n 140013 130271 1.07 string/find_first_of/p,pos,n 8051906 8749994 0.92 string/find_last_of/p,pos,n 1318835 1230715 1.07 string/insert/pos,p 1770610 1724234 1.03 string/iteration 28112136 2544475 11.05 * string/operator[] 4810525 2255841 2.13 * string/push_back 54869634 6127447 8.95 * string/replace/pos,n,p,n 2737578 2847900 0.96 string/reserve 1123395 394902 2.84 * string/rfind/p,pos,n 737299 368518 2.00 * string/size 42245 26801 1.58 * string/swap 1036142 1491028 0.69 * vector/erase 56417135 55770251 1.01 vector/insert 56617761 56100468 1.01 vector/iteration 10413895 1291269 8.06 * vector/operator[] 23507193 3479390 6.76 * vector/push_back 34687939 13806627 2.51 * vector/sort 256886550 84669657 3.03 * ``` ### Win32.VC71.MS.Release ``` EASTL version: 0.96.00 Platform: Windows on X86 Compiler: Microsoft Visual C++ compiler, version 1310 Allocator: PPMalloc::GeneralAllocator. Thread safety enabled. Build: Full optimization. Inlining enabled. Values are times to complete tests; smaller values are better. Alarm indicates a greater than 10% difference. Test VC++ EASTL Ratio Alarm ---------------------------------------------------------------------------------------- algorithm/adj_find/vector 2783546 2750660 1.01 algorithm/copy/vector 6474025 4972738 1.30 * algorithm/copy/vector 157267 173162 0.91 algorithm/copy_backward/vector 4836406 4374780 1.11 * algorithm/copy_backward/vector 104780 120912 0.87 * algorithm/count/vector 1368440 1368696 1.00 algorithm/equal_range/vector 114199387 102783938 1.11 * algorithm/fill/bool[] 253215 27353 9.26 * algorithm/fill/char[]/'d' 253164 27404 9.24 * algorithm/fill/vector/'d' 253105 27362 9.25 * algorithm/fill/vector/0 253275 27353 9.26 * algorithm/fill/vector 397001 394323 1.01 algorithm/fill/vector 547196 642362 0.85 * algorithm/fill_n/bool[] 229177 27361 8.38 * algorithm/fill_n/char[] 228845 27404 8.35 * algorithm/fill_n/vector 565233 1376822 0.41 * algorithm/find_end/string/end 2107116 82356 25.59 * algorithm/find_end/string/middle 2111672 664283 3.18 * algorithm/find_end/string/none 2110423 1519596 1.39 * algorithm/lex_cmp/schar[] 741021 176162 4.21 * algorithm/lex_cmp/vector 2610494 2642183 0.99 algorithm/lex_cmp/vector 697595 167866 4.16 * algorithm/lower_bound/vector 62462233 58146664 1.07 algorithm/min_element/vector 4350385 2671227 1.63 * algorithm/rand_shuffle/vector 10868261 11300818 0.96 algorithm/reverse/list 483718 470024 1.03 algorithm/reverse/vector 476739 484322 0.98 algorithm/search/string 2560387 1259496 2.03 * algorithm/search_n/string 2770991 458524 6.04 * algorithm/unique/vector 4194520 4658910 0.90 * algorithm/unique/vector 538730 787924 0.68 * algorithm/unique/vector 3169829 2575636 1.23 * algorithm/upper_bound/vector 27495562 25321593 1.09 bitset<1500>/>>=/1 33464228 33469719 1.00 bitset<1500>/count 18736116 18814903 1.00 bitset<1500>/flip 19299309 18605438 1.04 bitset<1500>/reset 22200487 15262847 1.45 * bitset<1500>/set() 14418193 17557319 0.82 * bitset<1500>/set(i) 1599250 1599199 1.00 bitset<1500>/test 1599241 1599233 1.00 bitset<15>/>>=/1 2199222 2264442 0.97 bitset<15>/count 1399406 1399193 1.00 bitset<15>/flip 1266712 1199197 1.06 bitset<15>/reset 1399364 1399109 1.00 bitset<15>/set() 1199197 999201 1.20 * bitset<15>/set(i) 1599258 1462952 1.09 bitset<15>/test 1599275 1599224 1.00 bitset<35>/>>=/1 2599266 1933376 1.34 * bitset<35>/count 2599240 2592559 1.00 bitset<35>/flip 1693124 1199188 1.41 * bitset<35>/reset 1399406 999201 1.40 * bitset<35>/set() 1599403 1199205 1.33 * bitset<35>/set(i) 1599241 1599190 1.00 bitset<35>/test 1599250 1599232 1.00 bitset<75>/>>=/1 4199332 4199213 1.00 bitset<75>/count 2999497 2199341 1.36 * bitset<75>/flip 2399499 1830178 1.31 * bitset<75>/reset 2199468 1199197 1.83 * bitset<75>/set() 1999387 1199851 1.67 * bitset<75>/set(i) 1599266 1599198 1.00 bitset<75>/test 1599241 1662651 0.96 deque/erase 90444165 37113253 2.44 * deque/insert 93299349 36175167 2.58 * deque/iteration 2756414 2122076 1.30 * deque/operator[] 5117969 4632075 1.10 deque/push_back 30300757 3060357 9.90 * deque/push_front 25498529 2808392 9.08 * deque/sort 142283047 111292464 1.28 * hash_map/clear 146769 389699 0.38 * hash_map/count 13059434 3460324 3.77 * hash_map/erase pos 184246 331925 0.56 * hash_map/erase range 382432 167237 2.29 * hash_map/erase val 6187898 3302114 1.87 * hash_map/find 11289369 3459024 3.26 * hash_map/find_as/char* 13559192 3662387 3.70 * hash_map/insert 17514012 14095176 1.24 * hash_map/iteration 801014 218450 3.67 * hash_map/operator[] 11457065 3690385 3.10 * hash_map/clear 141865 265379 0.53 * hash_map/count 1766045 703613 2.51 * hash_map/erase pos 172337 218458 0.79 * hash_map/erase range 537846 102340 5.26 * hash_map/erase val 2220132 1441787 1.54 * hash_map/find 1612994 1043953 1.55 * hash_map/insert 7141547 4348056 1.64 * hash_map/iteration 199512 169328 1.18 * hash_map/operator[] 1831733 1519707 1.21 * heap (uint32_t[])/make_heap 3366247 1949093 1.73 * heap (uint32_t[])/pop_heap 57280514 53779440 1.07 heap (uint32_t[])/push_heap 9700217 7582935 1.28 * heap (uint32_t[])/sort_heap 47227751 46131948 1.02 heap (vector)/make_heap 11458442 11510819 1.00 heap (vector)/pop_heap 122897267 119061132 1.03 heap (vector)/push_heap 21688481 21176220 1.02 heap (vector)/sort_heap 90867380 88869523 1.02 list/ctor(it) 74591104 69845817 1.07 list/ctor(n) 6243998 5838582 1.07 list/erase 299509298 206013676 1.45 * list/find 40927185 14514243 2.82 * list/insert 71277251 47234534 1.51 * list/push_back 73780527 44116725 1.67 * list/remove 786197776 326434612 2.41 * list/reverse 49283128 25029678 1.97 * list/size/1 159741 139400 1.15 * list/size/10 159324 346579 0.46 * EASTL intentionally implements list::size as O(n). list/size/100 159188 97235419 0.00 * EASTL intentionally implements list::size as O(n). list/splice 63548584 19322931 3.29 * map/clear 167408 170501 0.98 map/count 10213685 4748346 2.15 * map/equal_range 9515053 5677558 1.68 * map/erase/key 6646260 4302300 1.54 * map/erase/pos 297135 327938 0.91 MS uses a code bloating implementation of erase. map/erase/range 148614 163702 0.91 map/find 5637531 4767055 1.18 * map/insert 9591128 9030349 1.06 map/iteration 323595 325261 0.99 map/lower_bound 5398239 4784089 1.13 * map/operator[] 5631250 5141166 1.10 map/upper_bound 5436336 4762431 1.14 * set/clear 155983 156026 1.00 set/count 9635965 4392146 2.19 * set/equal_range 8504157 5247832 1.62 * set/erase range 140488 119408 1.18 * set/erase/pos 260678 286697 0.91 MS uses a code bloating implementation of erase. set/erase/val 6008225 4012825 1.50 * set/find 5145432 4381945 1.17 * set/insert 8087129 8697251 0.93 set/iteration 271507 304538 0.89 * set/lower_bound 4666228 4404250 1.06 set/upper_bound 4623600 4402974 1.05 sort/q_sort/TestObject[] 9596169 5578652 1.72 * sort/q_sort/TestObject[]/sorted 602463 1016132 0.59 * sort/q_sort/vector 9674828 5430199 1.78 * sort/q_sort/vector/sorted 606908 1111647 0.55 * sort/q_sort/vector 6284194 3423452 1.84 * sort/q_sort/vector/sorted 711629 569364 1.25 * sort/q_sort/vector 5453379 2916146 1.87 * sort/q_sort/vector/sorted 537047 419144 1.28 * string/compare 435083295 251985824 1.73 * string/erase/pos,n 3454842 3451858 1.00 string/find/p,pos,n 401954723 165298157 2.43 * string/find_first_not_of/p,pos,n 131452 65374 2.01 * string/find_first_of/p,pos,n 11657444 4144515 2.81 * string/find_last_of/p,pos,n 1604248 567571 2.83 * string/insert/pos,p 3398734 3355460 1.01 string/iteration 218856504 218771844 1.00 string/operator[] 714161 240023 2.98 * string/push_back 34968235 2444897 14.30 * string/replace/pos,n,p,n 4226693 4198498 1.01 string/reserve 1901765 390805 4.87 * string/rfind/p,pos,n 195483 150985 1.29 * string/size 11169 11245 0.99 string/swap 1459280 419807 3.48 * string/compare 63071275 77209580 0.82 * string/erase/pos,n 2008652 1944494 1.03 string/find/p,pos,n 123201023 167536164 0.74 * string/find_first_not_of/p,pos,n 93372 67864 1.38 * string/find_first_of/p,pos,n 7542492 3375758 2.23 * string/find_last_of/p,pos,n 933972 583576 1.60 * string/insert/pos,p 1737213 1750847 0.99 string/iteration 893834 899130 0.99 string/operator[] 817879 313437 2.61 * string/push_back 20857734 2004410 10.41 * string/replace/pos,n,p,n 2578696 2607655 0.99 string/reserve 915127 85289 10.73 * string/rfind/p,pos,n 196103 148894 1.32 * string/size 11619 11220 1.04 string/swap 1461056 419874 3.48 * vector/erase 55235116 55284587 1.00 vector/insert 55166046 55142755 1.00 vector/iteration 553954 509719 1.09 vector/operator[] 1284239 798516 1.61 * vector/push_back 5399549 3867959 1.40 * vector/sort 43636314 42619952 1.02 ``` ### Win32.VC71.STLPort.Debug ``` EASTL version: 0.96.00 Platform: Windows on X86 Compiler: Microsoft Visual C++ compiler, version 1310 Allocator: PPMalloc::GeneralAllocatorDebug. Thread safety enabled. Build: Debug. Inlining disabled. STL debug features disabled. Values are times to complete tests; smaller values are better. Alarm indicates a greater than 10% difference. Test STLPort EASTL Ratio Alarm ---------------------------------------------------------------------------------------- algorithm/adj_find/vector 5661170 5689517 1.00 algorithm/copy/vector 5573815 5124428 1.09 algorithm/copy/vector 148273 125782 1.18 * algorithm/copy_backward/vector 5429791 4834510 1.12 * algorithm/copy_backward/vector 156765 163038 0.96 algorithm/count/vector 2730922 2730072 1.00 algorithm/equal_range/vector 639366489 452896251 1.41 * algorithm/fill/bool[] 1299326 27361 47.49 * algorithm/fill/char[]/'d' 27378 27361 1.00 algorithm/fill/vector/'d' 34459 27361 1.26 * algorithm/fill/vector/0 1299224 27361 47.48 * algorithm/fill/vector 1400647 1400145 1.00 algorithm/fill/vector 1308779 1309085 1.00 algorithm/fill_n/bool[] 1299156 27352 47.50 * algorithm/fill_n/char[] 1299258 27369 47.47 * algorithm/fill_n/vector 1451162 1313632 1.10 algorithm/find_end/string/end 13089999 2526412 5.18 * algorithm/find_end/string/middle 12627412 20190101 0.63 * algorithm/find_end/string/none 12704185 40728803 0.31 * algorithm/lex_cmp/schar[] 1749844 195806 8.94 * algorithm/lex_cmp/vector 5060968 4799882 1.05 algorithm/lex_cmp/vector 1668354 189490 8.80 * algorithm/lower_bound/vector 450240945 353437573 1.27 * algorithm/min_element/vector 5861744 5326371 1.10 algorithm/rand_shuffle/vector 40780449 45780090 0.89 * algorithm/reverse/list 2657678 2130627 1.25 * algorithm/reverse/vector 2666424 2124889 1.25 * algorithm/search/string 3110379 3613460 0.86 * algorithm/search_n/string 3061665 1521261 2.01 * algorithm/unique/vector 12423684 9485439 1.31 * algorithm/unique/vector 3718699 1726596 2.15 * algorithm/unique/vector 6205110 4591631 1.35 * algorithm/upper_bound/vector 185391094 139336317 1.33 * bitset<1500>/>>=/1 120666960 92449816 1.31 * STLPort is broken, neglects wraparound check. bitset<1500>/count 201709793 52874726 3.81 * bitset<1500>/flip 87360297 81737071 1.07 bitset<1500>/reset 23950178 77390323 0.31 * bitset<1500>/set() 84608107 76912011 1.10 bitset<1500>/set(i) 18023620 12229604 1.47 * bitset<1500>/test 18006553 13276396 1.36 * bitset<15>/>>=/1 11935904 6012695 1.99 * STLPort is broken, neglects wraparound check. bitset<15>/count 9368581 6022742 1.56 * bitset<15>/flip 11600706 6533635 1.78 * bitset<15>/reset 5830957 5874690 0.99 bitset<15>/set() 11695328 5701621 2.05 * bitset<15>/set(i) 16363205 12570216 1.30 * bitset<15>/test 16743172 13201452 1.27 * bitset<35>/>>=/1 22950918 6774457 3.39 * STLPort is broken, neglects wraparound check. bitset<35>/count 12655309 11736256 1.08 bitset<35>/flip 13738575 5800042 2.37 * bitset<35>/reset 15561434 5800510 2.68 * bitset<35>/set() 13564283 5600709 2.42 * bitset<35>/set(i) 18519689 12199973 1.52 * bitset<35>/test 18000569 13103566 1.37 * bitset<75>/>>=/1 25579525 16669664 1.53 * STLPort is broken, neglects wraparound check. bitset<75>/count 18740698 8480492 2.21 * bitset<75>/flip 13555630 8300335 1.63 * bitset<75>/reset 15200133 8200000 1.85 * bitset<75>/set() 14408112 8001959 1.80 * bitset<75>/set(i) 18137741 12374257 1.47 * bitset<75>/test 18422135 13100038 1.41 * deque/erase 651933790 326443043 2.00 * deque/insert 659786183 333304660 1.98 * deque/iteration 23734592 16173706 1.47 * deque/operator[] 59126816 23911774 2.47 * deque/push_back 58056988 31859266 1.82 * deque/push_front 57780891 31743199 1.82 * deque/sort 818414195 596568113 1.37 * hash_map/clear 3422133 2204517 1.55 * hash_map/count 9869545 8624924 1.14 * hash_map/erase pos 3256350 2069299 1.57 * hash_map/erase range 3230203 1151392 2.81 * hash_map/erase val 16860362 15939778 1.06 hash_map/find 10286971 9920910 1.04 hash_map/find_as/char* 118136025 9458468 12.49 * hash_map/insert 188948336 174490082 1.08 hash_map/iteration 4037049 2021036 2.00 * hash_map/operator[] 11472127 12887699 0.89 * hash_map/clear 2522264 1331848 1.89 * hash_map/count 3210739 2897063 1.11 * hash_map/erase pos 1862281 1304783 1.43 * hash_map/erase range 698079 579606 1.20 * hash_map/erase val 8806722 7041298 1.25 * hash_map/find 3604875 4709645 0.77 * hash_map/insert 40785711 40376342 1.01 hash_map/iteration 3064088 1508834 2.03 * hash_map/operator[] 6053742 8176906 0.74 * heap (uint32_t[])/make_heap 5799813 5738596 1.01 heap (uint32_t[])/pop_heap 113775168 102076134 1.11 * heap (uint32_t[])/push_heap 21649151 16854845 1.28 * heap (uint32_t[])/sort_heap 97535213 83290735 1.17 * heap (vector)/make_heap 22215557 22277063 1.00 heap (vector)/pop_heap 275392171 277340039 0.99 heap (vector)/push_heap 51479442 47342577 1.09 heap (vector)/sort_heap 214474736 218497540 0.98 list/ctor(it) 767753795 753421427 1.02 list/ctor(n) 74185322 73386245 1.01 list/erase 1021003824 1033873589 0.99 list/find 77666072 74917622 1.04 list/insert 788071150 774188737 1.02 list/push_back 760490154 737327348 1.03 list/remove 1682511938 1434771006 1.17 * list/reverse 87237327 80394623 1.09 list/size/1 3828111 599530 6.39 * list/size/10 9600605 1329535 7.22 * EASTL intentionally implements list::size as O(n). list/size/100 62952334 15022551 4.19 * EASTL intentionally implements list::size as O(n). list/splice 96536412 60804817 1.59 * map/clear 1142127 1099066 1.04 map/count 19659726 14647548 1.34 * map/equal_range 36680687 18219086 2.01 * map/erase/key 28892154 16037774 1.80 * map/erase/pos 1209643 1185495 1.02 map/erase/range 715402 670539 1.07 map/find 21020992 13429575 1.57 * map/insert 59530871 51120640 1.16 * map/iteration 972825 1191946 0.82 * map/lower_bound 18852651 12495034 1.51 * map/operator[] 22889573 16676736 1.37 * map/upper_bound 18603584 12406922 1.50 * set/clear 919555 882988 1.04 set/count 17561110 12461084 1.41 * set/equal_range 31522488 15230282 2.07 * set/erase range 687582 564765 1.22 * set/erase/pos 1044352 1045355 1.00 set/erase/val 25525304 12940774 1.97 * set/find 17140751 10704866 1.60 * set/insert 56035051 45555664 1.23 * set/iteration 682669 640831 1.07 set/lower_bound 16339932 10475740 1.56 * set/upper_bound 17779424 10652599 1.67 * sort/q_sort/TestObject[] 17000866 14823515 1.15 * sort/q_sort/TestObject[]/sorted 6658559 3263328 2.04 * sort/q_sort/vector 17476629 14953285 1.17 * sort/q_sort/vector/sorted 6667034 3327435 2.00 * sort/q_sort/vector 15391357 10820848 1.42 * sort/q_sort/vector/sorted 6617122 3232949 2.05 * sort/q_sort/vector 8343906 6014846 1.39 * sort/q_sort/vector/sorted 3039430 1003127 3.03 * string/compare 1489709846 532664000 2.80 * string/erase/pos,n 3528690 3439864 1.03 string/find/p,pos,n 2521448321 443752189 5.68 * string/find_first_not_of/p,pos,n 661206 137419 4.81 * string/find_first_of/p,pos,n 54746434 8521335 6.42 * string/find_last_of/p,pos,n 10607778 1212414 8.75 * string/insert/pos,p 3445016 3360126 1.03 string/iteration 580955636 579452556 1.00 string/operator[] 2206353 1987809 1.11 * string/push_back 22421368 6007808 3.73 * string/replace/pos,n,p,n 5138454 4464786 1.15 * string/reserve 4922413418 335622 100.00 * string/rfind/p,pos,n 1440308 380578 3.78 * string/size 25355 25398 1.00 string/swap 2122704 1490823 1.42 * string/compare 77222134 77443134 1.00 string/erase/pos,n 1965344 1956521 1.00 string/find/p,pos,n 2468091951 474205522 5.20 * string/find_first_not_of/p,pos,n 660960 130211 5.08 * string/find_first_of/p,pos,n 55020899 9240171 5.95 * string/find_last_of/p,pos,n 10576210 1239053 8.54 * string/insert/pos,p 1822756 1750880 1.04 string/iteration 2617889 2540148 1.03 string/operator[] 2254794 2256443 1.00 string/push_back 12463022 5210321 2.39 * string/replace/pos,n,p,n 3744862 2855260 1.31 * string/reserve 1372046888 218815 100.00 * string/rfind/p,pos,n 1446232 366902 3.94 * string/size 26859 25431 1.06 string/swap 2123350 1490509 1.42 * vector/erase 55164013 56417449 0.98 vector/insert 55872973 56432664 0.99 vector/iteration 1329102 1324623 1.00 vector/operator[] 5264738 3136746 1.68 * vector/push_back 14903245 13171175 1.13 * vector/sort 88429095 88542171 1.00 ``` ### Win32.VC71.STLPort.Release ``` EASTL version: 0.96.00 Platform: Windows on X86 Compiler: Microsoft Visual C++ compiler, version 1310 Allocator: PPMalloc::GeneralAllocator. Thread safety enabled. Build: Full optimization. Inlining enabled. Values are times to complete tests; smaller values are better. Alarm indicates a greater than 10% difference. Test STLPort EASTL Ratio Alarm ---------------------------------------------------------------------------------------- algorithm/adj_find/vector 2741046 2731441 1.00 algorithm/copy/vector 6065923 5085142 1.19 * algorithm/copy/vector 158304 165555 0.96 algorithm/copy_backward/vector 4710258 4896476 0.96 algorithm/copy_backward/vector 146030 142630 1.02 algorithm/count/vector 1395921 1406334 0.99 algorithm/equal_range/vector 211692764 118969493 1.78 * algorithm/fill/bool[] 366078 33737 10.85 * algorithm/fill/char[]/'d' 33736 33771 1.00 algorithm/fill/vector/'d' 28466 33720 0.84 * algorithm/fill/vector/0 366086 33728 10.85 * algorithm/fill/vector 466250 401591 1.16 * algorithm/fill/vector 521603 693481 0.75 * algorithm/fill_n/bool[] 599709 33762 17.76 * algorithm/fill_n/char[] 599573 33711 17.79 * algorithm/fill_n/vector 434971 1374084 0.32 * algorithm/find_end/string/end 1494742 85349 17.51 * algorithm/find_end/string/middle 1480700 687208 2.15 * algorithm/find_end/string/none 1540540 1546431 1.00 algorithm/lex_cmp/schar[] 921638 178797 5.15 * algorithm/lex_cmp/vector 2623559 2643551 0.99 algorithm/lex_cmp/vector 960899 183608 5.23 * algorithm/lower_bound/vector 60630534 56531528 1.07 algorithm/min_element/vector 4209022 2768527 1.52 * algorithm/rand_shuffle/vector 13762010 15969052 0.86 * algorithm/reverse/list 673387 731825 0.92 algorithm/reverse/vector 634576 754511 0.84 * algorithm/search/string 1262599 1387608 0.91 algorithm/search_n/string 1166242 458592 2.54 * algorithm/unique/vector 4912193 5336317 0.92 algorithm/unique/vector 809387 809081 1.00 algorithm/unique/vector 4371814 2414255 1.81 * algorithm/upper_bound/vector 31899081 29555596 1.08 bitset<1500>/>>=/1 63308136 40553560 1.56 * STLPort is broken, neglects wraparound check. bitset<1500>/count 62523178 22799473 2.74 * bitset<1500>/flip 20302845 19919232 1.02 bitset<1500>/reset 18892015 15403148 1.23 * bitset<1500>/set() 15803302 17322192 0.91 bitset<1500>/set(i) 2799271 2999310 0.93 bitset<1500>/test 2999293 2799262 1.07 bitset<15>/>>=/1 1199239 3199256 0.37 * STLPort is broken, neglects wraparound check. bitset<15>/count 3599461 2199231 1.64 * bitset<15>/flip 1199231 1199188 1.00 bitset<15>/reset 1199188 1199180 1.00 bitset<15>/set() 1199214 1199180 1.00 bitset<15>/set(i) 2599257 1399262 1.86 * bitset<15>/test 2599274 2599283 1.00 bitset<35>/>>=/1 6643974 4599239 1.44 * STLPort is broken, neglects wraparound check. bitset<35>/count 5151331 5399438 0.95 bitset<35>/flip 1999404 1199273 1.67 * bitset<35>/reset 9805285 1399313 7.01 * bitset<35>/set() 2799279 1199248 2.33 * bitset<35>/set(i) 2799246 1599241 1.75 * bitset<35>/test 2999234 2999251 1.00 bitset<75>/>>=/1 7002045 6999333 1.00 STLPort is broken, neglects wraparound check. bitset<75>/count 5999351 3002259 2.00 * bitset<75>/flip 3599334 3599163 1.00 bitset<75>/reset 9799344 3399218 2.88 * bitset<75>/set() 3599232 3599062 1.00 bitset<75>/set(i) 2799228 1599284 1.75 * bitset<75>/test 2999250 2799339 1.07 deque/erase 127108651 115258113 1.10 deque/insert 137727889 116552332 1.18 * deque/iteration 7144182 6009899 1.19 * deque/operator[] 34241222 20535039 1.67 * deque/push_back 6585800 3932126 1.67 * deque/push_front 6805865 3993513 1.70 * deque/sort 395352323 348778188 1.13 * hash_map/clear 426640 447015 0.95 hash_map/count 4359344 3883089 1.12 * hash_map/erase pos 584392 458142 1.28 * hash_map/erase range 221034 196078 1.13 * hash_map/erase val 3539867 3790813 0.93 hash_map/find 3966831 3811910 1.04 hash_map/find_as/char* 11591612 4243710 2.73 * hash_map/insert 16763887 16719194 1.00 hash_map/iteration 909968 478609 1.90 * hash_map/operator[] 4360041 4108313 1.06 hash_map/clear 302634 283722 1.07 hash_map/count 916487 907426 1.01 hash_map/erase pos 388042 321385 1.21 * hash_map/erase range 122680 116280 1.06 hash_map/erase val 1710931 1729529 0.99 hash_map/find 1089462 1346527 0.81 * hash_map/insert 4560310 5072350 0.90 * hash_map/iteration 960117 495354 1.94 * hash_map/operator[] 1872830 1890595 0.99 heap (uint32_t[])/make_heap 3528418 3327257 1.06 heap (uint32_t[])/pop_heap 63243859 61011853 1.04 heap (uint32_t[])/push_heap 11602424 10045869 1.15 * heap (uint32_t[])/sort_heap 52965362 48744729 1.09 heap (vector)/make_heap 13191456 13089711 1.01 heap (vector)/pop_heap 148555656 144787742 1.03 heap (vector)/push_heap 28696689 26618830 1.08 heap (vector)/sort_heap 112473989 114018643 0.99 list/ctor(it) 80186731 74006287 1.08 list/ctor(n) 6232311 6128007 1.02 list/erase 344556374 212877808 1.62 * list/find 39859075 14591347 2.73 * list/insert 86935153 56138233 1.55 * list/push_back 79569180 46700641 1.70 * list/remove 785786758 324201016 2.42 * list/reverse 45248186 24852759 1.82 * list/size/1 219844 219496 1.00 list/size/10 519563 519579 1.00 EASTL intentionally implements list::size as O(n). list/size/100 4567194 101230266 0.05 * EASTL intentionally implements list::size as O(n). list/splice 68321087 23601687 2.89 * map/clear 168011 180540 0.93 map/count 4830439 5139287 0.94 map/equal_range 8700090 6158531 1.41 * map/erase/key 6696776 4617038 1.45 * map/erase/pos 309273 333183 0.93 map/erase/range 137419 136068 1.01 map/find 4773498 4931352 0.97 map/insert 9651877 9311699 1.04 map/iteration 372946 416364 0.90 * map/lower_bound 4784234 4915797 0.97 map/operator[] 5040254 5183147 0.97 map/upper_bound 4724292 4915984 0.96 set/clear 165300 173289 0.95 set/count 4958654 4885086 1.02 set/equal_range 8434134 5698681 1.48 * set/erase range 145554 133960 1.09 set/erase/pos 299914 324760 0.92 set/erase/val 6506155 4335034 1.50 * set/find 4866879 4556043 1.07 set/insert 8340523 8957257 0.93 set/iteration 294465 343442 0.86 * set/lower_bound 4548095 4756498 0.96 set/upper_bound 4559196 4521498 1.01 sort/q_sort/TestObject[] 7316766 7013894 1.04 sort/q_sort/TestObject[]/sorted 1668439 1332885 1.25 * sort/q_sort/vector 7331530 7017260 1.04 sort/q_sort/vector/sorted 1601629 1247120 1.28 * sort/q_sort/vector 7071643 7067869 1.00 sort/q_sort/vector/sorted 2136390 1703799 1.25 * sort/q_sort/vector 3292891 2943627 1.12 * sort/q_sort/vector/sorted 653693 473612 1.38 * string/compare 356579259 432760228 0.82 * string/erase/pos,n 3430422 3428645 1.00 string/find/p,pos,n 229263402 225830975 1.02 string/find_first_not_of/p,pos,n 187391 81404 2.30 * string/find_first_of/p,pos,n 4411831 4413532 1.00 string/find_last_of/p,pos,n 731655 726155 1.01 string/insert/pos,p 3408628 3319726 1.03 string/iteration 309993861 310333547 1.00 string/operator[] 580839 579904 1.00 string/push_back 3983338 2975553 1.34 * string/replace/pos,n,p,n 4361095 4211504 1.04 string/reserve 935141729 247010 100.00 * string/rfind/p,pos,n 248956 223397 1.11 * string/size 13311 13107 1.02 string/swap 519129 579445 0.90 * string/compare 76695559 76828015 1.00 string/erase/pos,n 1951566 1947282 1.00 string/find/p,pos,n 185878944 185605039 1.00 string/find_first_not_of/p,pos,n 196877 81600 2.41 * string/find_first_of/p,pos,n 4147685 4145356 1.00 string/find_last_of/p,pos,n 605897 598222 1.01 string/insert/pos,p 1781592 1768264 1.01 string/iteration 921502 921272 1.00 string/operator[] 361250 359873 1.00 string/push_back 3363288 2530493 1.33 * string/replace/pos,n,p,n 2682600 2633130 1.02 string/reserve 672517501 78387 100.00 * string/rfind/p,pos,n 226202 200013 1.13 * string/size 11280 11109 1.02 string/swap 519393 559759 0.93 vector/erase 55184856 55192217 1.00 vector/insert 56764267 55682726 1.02 vector/iteration 423122 424039 1.00 vector/operator[] 1189397 860991 1.38 * vector/push_back 5626609 4027317 1.40 * vector/sort 49227036 49231362 1.00 ``` ---------------------------------------------- End of document ================================================ FILE: doc/BestPractices.md ================================================ # EASTL Best Practices In this document we discuss best practices for using EASTL. The primary emphasis is on performance with a secondary emphasis on correctness and maintainability. Some best practices apply only to some situations, and these will be pointed out as we go along. In order to be easily digestible, we present these practices as a list of items in the tone of the Effective C++ series of books. ## Summary The descriptions here are intentionally terse; this is to make them easier to visually scan. 1. [Consider intrusive containers.](#consider-intrusive-containers) 2. [Consider fixed-size containers.](#consider-fixed-size-containers) 3. [Consider custom allocators.](#consider-custom-allocators) 4. [Consider hash tables instead of maps.](#consider-hash-tables-instead-of-maps) 5. [Consider a vector_map (a.k.a. sorted vector) for unchanging data.](#consider-a-vector_map-aka-sorted-vector-for-unchanging-data) 6. [Consider slist instead of list.](#consider-slist-instead-of-list) 7. [Avoid redundant end() and size() in loops.](#avoid-redundant-end-and-size-in-loops) 8. [Iterate containers instead of using operator\[\].](#iterate-containers-instead-of-using-operator) 9. [Learn to use the string class appropriately.](#learn-to-use-the-string-class-appropriately) 10. [Cache list size if you want size() to be O(1).](#cache-list-size-if-you-want-listsize-to-be-o1) 11. [Use empty() instead of size() when possible.](#use-empty-instead-of-size-when-possible) 12. [Know your container efficiencies.](#know-your-container-efficiencies) 13. [Use vector::reserve.](#use-vectorreserve) 14. [Use vector::set_capacity to trim memory usage.](#use-vectorset_capacity-to-trim-memory-usage) 15. [Use swap() instead of a manually implemented version.](#use-swap-instead-of-a-manually-implemented-version) 16. [Consider storing pointers instead of objects.](#consider-storing-pointers-instead-of-objects) 17. [Consider smart pointers instead of raw pointers.](#consider-smart-pointers-instead-of-raw-pointers) 18. [Use iterator pre-increment instead of post-increment.](#use-iterator-pre-increment-instead-of-post-increment) 19. [Make temporary references so the code can be traced/debugged.](#make-temporary-references-so-the-code-can-be-traceddebugged) 20. [Consider bitvector or bitset instead of vector\.](#consider-bitvector-or-bitset-instead-of-vector) 21. [Vectors can be treated as contiguous memory.](#vectors-can-be-treated-as-contiguous-memory) 22. [Search hash_map\ using heterogeneous lookup.](#search-hash_mapstring-using-heterogeneous-lookup) 23. [Take advantage of type_traits.](#take-advantage-of-type_traits) 24. [Name containers to track memory usage.](#name-containers-to-track-memory-usage) 25. [Learn the algorithms.](#learn-the-algorithms) 26. [Pass and return containers by reference instead of value.](#pass-and-return-containers-by-reference-instead-of-value) 27. [Consider using reset_lose_memory() for fast container teardown.](#consider-using-reset_lose_memory-for-fast-container-teardown) 28. [Consider using fixed_substring instead of copying strings.](#consider-using-fixed_substring-instead-of-copying-strings) 29. [Consider using vector::push_back(void).](#consider-using-vectorpush_backvoid) ## Detail ### Consider intrusive containers. Intrusive containers (such as intrusive_list) differ from regular containers (such as list) in that they use the stored objects to manage the linked list instead of using nodes allocated from a memory heap. The result is better usage of memory. Additionally intrusive_list objects can be removed from their list without knowing what list they belong to. To make an intrusive_list of Widgets, you have Widget inherit from intrusive_list_node or simply have mpPrev/mpNext member variables. To create an intrusive_list container, you can use the following code: ```cpp class Widget : public intrusive_list_node { }; intrusive_list widgetList; widgetList.push_back(someWidget); ``` ### Consider fixed-size containers. Fixed-size containers (such as fixed_list) are variations of regular containers (such as list) in that they allocate from a fixed block of local memory instead of allocating from a generic heap. The result is better usage of memory due to reduced fragmentation, better cache behavior, and faster allocation/deallocation. The presence of fixed-size containers negate the most common complaint that people have about STL: that it fragments the heap or "allocates all over the place." EASTL fixed containers include: * fixed_list * fixed_slist * fixed_vector * fixed_string * fixed_map * fixed_multimap * fixed_set * fixed_multiset * fixed_hash_map * fixed_hash_multimap * fixed_hash_set * fixed_hash_multiset To create a fixed_set, you can use the following code: ```cpp fixed_set intSet; // Create a set capable of holding 25 elements. intSet.push_back(37); ``` ### Consider custom allocators. While EASTL provides fixed-size containers in order to control container memory usage, EASTL lets you assign a custom allocator to any container. This lets you define your own memory pool. EASTL has a more flexible and powerful mechanism of doing this that standard STL, as EASTL understands object alignment requirements, allows for debug naming, allows for sharing allocators across containers, and allows dynamic allocator assignment. To create a list container that uses your custom allocator and uses block naming, you can use the following code: ```cpp list intList(pSomeAllocator, "graphics/intList"); intList.push_back(37); ``` ### Consider hash tables instead of maps. Hash containers (such as hash_map) provide the same interface as associative containers (such as map) but have faster lookup and use less memory. The primary disadvantage relative to associative containers is that hash containers are not sorted. To make a hash_map (dictionary) of integers to strings, you can use the following code: ```cpp hash_map stringTable; stringTable[37] = "hello"; ``` ### Consider a vector_map (a.k.a. sorted vector) for unchanging data. You can improve speed, memory usage, and cache behavior by using a vector_map instead of a map (or vector_set instead of set, etc.). The primary disadvantage of vector_map is that insertions and removal of elements is O(n) instead of O(1). However, if your associative container is not going to be changing much or at all, you can benefit from using a vector_map. Consider calling reserve on the vector_map in order to set the desired capacity up front. To make a vector_set, you can use the following code: ```cpp vector_set intSet(16); // Create a vector_set with an initial capacity of 16. intSet.insert(37); ``` Note that you can use containers other than vector to implement vector_set. Here's how you do it with deque: ```cpp vector_set, EASTLAllocatorType, deque > intSet; intSet.insert(37); ``` ### Consider slist instead of list. An slist is a singly-linked list; it is much like a list except that it can only be traversed in a forward direction and not a backward direction. The benefit is that each node is 4 bytes instead of 8 bytes. This is a small improvement, but if you don't need reverse iteration then it can be an improvement. There's also intrusive_slist as an option. To make an slist, you can use the following code: ```cpp slist intSlist; intSlist.push_front(37); ``` ### Avoid redundant end() and size() in loops. Instead of writing code like this: ```cpp for(deque::iterator it = d.begin(); it != d.end(); ++it) ... ``` write code like this: ```cpp for(deque::iterator it = d.begin(), itEnd = d.end(); it != itEnd; ++it) ... ``` The latter avoids a function call and return of an object (which in deque's case happens to be more than just a pointer). The above only works when the container is unchanged or for containers that have a constant end value. But "constant end value" we mean containers which can be modified but end always remains the same. | Constant begin | Non-constant begin | Constant end | Non-constant end | |------|------|------|------| | array1 | string
vector
deque
intrusive_list
intrusive_slist
vector_map
vector_multimap
vector_set
vector_multiset
bit_vector
hash_map
hash_multimap
hash_set
hash_multiset
intrusive_hash_map
intrusive_hash_multimap
intrusive_hash_set
intrusive_hash_multiset | array
list
slist
intrusive_list
intrusive_slist
map
multimap
set
multiset
hash_map2
hash_multimap2
hash_set2
hash_multiset2
intrusive_hash_map
intrusive_hash_multimap
intrusive_hash_set
intrusive_hash_multiset | string
vector
deque
vector_map
vector_multimap
vector_set
vector_multiset
bit_vector | * 1 Arrays can be neither resized nor reallocated. * 2 Constant end if the hashtable can't/won't re-hash. Non-constant if it can re-hash. ### Iterate containers instead of using operator[]. It's faster to iterate random access containers via iterators than via operator[], though operator[] usage may look simpler. Instead of doing this: ```cpp for(unsigned i = 0, iEnd = intVector.size(); i != iEnd; ++i) intVector[i] = 37; ``` you can execute more efficiently by doing this: ```cpp for(vector::iterator it = intVector.begin(), itEnd = intVector.end(); it != itEnd; ++it) *it = 37; ``` ### Learn to use the string class appropriately. Oddly enough, the most mis-used STL container is easily the string class. The tales of string abuse could rival the 1001 Arabian Nights. Most of the abuses involve doing things in a harder way than need be. In examining the historical mis-uses of string, it is clear that many of the problems stem from the user thinking in terms of C-style string operations instead of object-oriented strings. This explains why statements such as strlen(s.c_str()) are so common, whereas the user could just use s.length() instead and be both clearer and more efficient. Here we provide a table of actual collected examples of things done and how they could have been done instead. | What was written | What could have been written | |------|------| | `s = s.Left(i) + '+' + s.Right(s.length() - i - 1);` | `s[i] = '+';` | | `string s(""); // This is the most commonly found misuse.` | `string s;` | | `s = "";` | `s.clear();` | | `s.c_str()[0] = 'u';` | `s[0] = 'u';` | | `len = strlen(s.c_str());` | `len = s.length();` | | `s = string("u");` | `s = "u";` | | `puts(s + string("u"));` | `puts(s + "u");` | | `string s(" ");`
`puts(s.c_str());` | `puts(" ");` | | `s.sprintf("u");` | s = "u";` | | `char array[32];`
`sprintf(array, "%d", 10);`
`s = string(array);` | `s.sprintf("%d", 10);` | The chances are that if you want to do something with a string, there is a very basic way to do it. You don't want your code to appear in a future version of the above table. ### Cache list size if you want list::size() to be O(1). EASTL's list, slist, intrusive_list, and intrusive_slist containers have a size() implementation which is O(n). That is, these containers don't keep a count (cache) of the current list size and when you call the size() function they iterate the list. This is by design and the reasoning behind it has been deeply debated and considered (and is discussed in the FAQ and the list header file). In summary, list doesn't cache its size because the only function that would benefit is the size function while many others would be negatively impacted and the memory footprint would be negatively impacted, yet list::size is not a very frequently called function in well-designed code. At the same time, nothing prevents the user from caching the size himself, though admittedly it adds some tedium and risk to the code writing process. Here's an example of caching the list size manually: ```cpp list intList; size_t n = 0; intList.push_back(37); ++n; intList.pop_front(); --n; ``` ### Use empty() instead of size() when possible. All conventional containers have both an empty function and a size function. For all containers empty() executes with O(1) (constant time) efficiency. However, this is not so for size(), as some containers need to calculate the size and others need to do pointer subtraction (which may involve integer division) to find the size. ### Know your container efficiencies. The above two practices lead us to this practice, which is a generalization of the above. We present a table of basic information for the conventional EASTL containers. The values are described at the bottom. | Container | empty() efficiency | size() efficiency | operator[] efficiency | insert() efficiency | erase() efficiency | find() efficiency | sort efficiency | |------|------|------|------|------|------|------|------| | slist | 1 | O(n) | - | O(1) | O(1) | O(n) | O(n+) | | list | 1 | n | - | 1 | 1 | n | n log(n) | | intrusive_slist | 1 | n | - | 1 | 1 | 1 | n+ | | intrusive_list | 1 | n | - | 1 | 1 | 1 | n log(n) | | array | 1 | 1 | 1 | - | - | n | n log(n) | | vector | 1 | 1a | 1 | 1 at end, else n | 1 at end, else n | n | n log(n) | | vector_set | 1 | 1a | 1 | 1 at end, else n | 1 at end, else n | log(n) | 1 | | vector_multiset | 1 | 1a | 1 | 1 at end, else n | 1 at end, else n | log(n) | 1 | | vector_map | 1 | 1a | 1 | 1 at end, else n | 1 at end, else n | log(n) | 1 | | vector_multimap | 1 | 1a | 1 | 1 at end, else n | 1 at end, else n | log(n) | 1 | | deque | 1 | 1a | 1 | 1 at begin or end, else n / 2 | 1 at begin or end, else n / 2 | n | n log(n) | | bit_vector | 1 | 1a | 1 | 1 at end, else n | 1 at end, else n | n | n log(n) | | string, cow_string | 1 | 1a | 1 | 1 at end, else n | 1 at end, else n | n | n log(n) | | set | 1 | 1 | - | log(n) | log(n) | log(n) | 1 | | multiset | 1 | 1 | - | log(n) | log(n) | log(n) | 1 | | map | 1 | 1 | log(n) | log(n) | log(n) | log(n) | 1 | | multimap | 1 | 1 | - | log(n) | log(n) | log(n) | 1 | | hash_set | 1 | 1 | - | 1 | 1 | 1 | - | | hash_multiset | 1 | 1 | - | 1 | 1 | 1 | - | | hash_map | 1 | 1 | - | 1 | 1 | 1 | - | | hash_multimap | 1 | 1 | - | 1 | 1 | 1 | - | | intrusive_hash_set | 1 | 1 | - | 1 | 1 | 1 | - | | intrusive_hash_multiset | 1 | 1 | - | 1 | 1 | 1 | - | | intrusive_hash_map | 1 | 1 | - | 1 | 1 | 1 | - | | intrusive_hash_multimap | 1 | 1 | - | 1 | 1 | 1 | - | Notes: * \- means that the operation does not exist. * 1 means amortized constant time. Also known as O(1) * n means time proportional to the container size. Also known as O(n) * log(n) means time proportional to the natural logarithm of the container size. Also known as O(log(n)) * n log(n) means time proportional to log(n) times the size of the container. Also known as O(n log(n)) * n+ means that the time is at least n, and possibly higher. * Inserting at the end of a vector may cause the vector to be resized; resizing a vector is O(n). However, the amortized time complexity for vector insertions at the end is constant. * Sort assumes the usage of the best possible sort for a large container of random data. Some sort algorithms (e.g. quick_sort) require random access iterators and so the sorting of some containers requires a different sort algorithm. We do not include bucket or radix sorts, as they are always O(n). * a vector, deque, string size is O(1) but involves pointer subtraction and thus integer division and so is not as efficient as containers that store the size directly. ### Use vector::reserve. You can prevent vectors (and strings) from reallocating as you add items by specifying up front how many items you will be requiring. You can do this in the constructor or by calling the reserve function at any time. The capacity function returns the amount of space which is currently reserved. Here's how you could specify reserved capacity in a vector: ```cpp vector v(37); // Reserve space to hold up to 37 items. or vector v; // This empty construction causes to memory to be allocated or reserved. v.reserve(37); ``` The EASTL vector (and string) implementation looks like this: ```cpp template class vector { T* mpBegin; // Beginning of used element memory. T* mpEnd; // End of used element memory. T* mpCapacity; // End of storage capacity. Is >= mpEnd } ``` Another approach to being efficient with vector memory usage is to use fixed_vector. ### Use vector::set_capacity to trim memory usage. A commonly asked question about vectors and strings is, "How do I reduce the capacity of a vector?" The conventional solution for std STL is to use the somewhat non-obvious trick of using vector(v).swap(v). EASTL provides the same functionality via a member function called set_capacity() which is present in both the vector and string classes. An example of reducing a vector is the following: ```cpp vector v; ... v.set_capacity(); ``` An example of resizing to zero and completely freeing the memory of a vector is the following: ```cpp vector v; ... v.set_capacity(0); ``` ### Use swap() instead of a manually implemented version. The generic swap algorithm provides a basic version for any kind of object. However, each EASTL container provides a specialization of swap which is optimized for that container. For example, the list container implements swap by simply swapping the internal member pointers and not by moving individual elements. ### Consider storing pointers instead of objects. There are times when storing pointers to objects is more efficient or useful than storing objects directly in containers. It can be more efficient to store pointers when the objects are big and the container may need to construct, copy, and destruct objects during sorting or resizing. Moving pointers is usually faster than moving objects. It can be useful to store pointers instead of objects when somebody else owns the objects or the objects are in another container. It might be useful for a Widget to be in a list and in a hash table at the same time. ### Consider smart pointers instead of raw pointers. If you take the above recommendation and store objects as pointers instead of as objects, you may want to consider storing them as smart pointers instead of as regular pointers. This is particularly useful for when you want to delete the object when it is removed from the container. Smart pointers will automatically delete the pointed-to object when the smart pointer is destroyed. Otherwise, you will have to be careful about how you work with the list so that you don't generate memory leaks. Smart pointers implement a shared reference count on the stored pointer, as so any operation you do on a smart pointer container will do the right thing. Any pointer can be stored in a smart pointer, and custom new/delete mechanisms can work with smart pointers. The primary smart pointer is shared_ptr. Here is an example of creating and using a shared_ptr: ```cpp typedef shared_ptr WPtr; list wList; wList.push_back(WPtr(new Widget)); // The user may have operator new/delete overrides. wList.pop_back(); // Implicitly deletes the Widget. ``` Here is an example of creating and using a shared_ptr that uses a custom allocation and deallocation mechanism: ```cpp typedef shared_ptr WPtr; // WidgetDelete is a custom destroyer. list wList; wList.push_back(WPtr(WidgetCreate(Widget))); // WidgetCreate is a custom allocator. wList.pop_back(); // Implicitly calls WidgetDelete. ``` ### Use iterator pre-increment instead of post-increment. Pre-increment (e.g. ++x) of iterators is better than post-increment (x++) when the latter is not specifically needed. It is common to find code that uses post-incrementing when it could instead use pre-incrementing; presumably this is due to post-increment looking a little better visually. The problem is that the latter constructs a temporary object before doing the increment. With built-in types such as pointers and integers, the compiler will recognize that the object is a trivial built-in type and that the temporary is not needed, but the compiler cannot do this for other types, even if the compiler sees that the temporary is not used; this is because the constructor may have important side effects and the compiler would be broken if it didn't construct the temporary object. EASTL iterators are usually not trivial types and so it's best not to hope the compiler will do the best thing. Thus you should always play it safe an use pre-increment of iterators whenever post-increment is not required. Here is an example of using iterator pre-increment; for loops like this should always use pre-increment: ```cpp for(set::iterator it(intSet.begin()), itEnd(intSet.end()); it != itEnd; ++it) *it = 37; ``` ### Make temporary references so the code can be traced/debugged. Users want to be able to inspect or modify variables which are referenced by iterators. While EASTL containers and iterators are designed to make this easier than other STL implementations, it makes things very easy if the code explicitly declares a reference to the iterated element. In addition to making the variable easier to debug, it also makes code easier to read and makes the debug (and possibly release) version of the application run more efficiently. Instead of doing this: ```cpp for(list::iterator it = wl.begin(), itEnd = wl.end(); it != itEnd; ++it) { (*it).x = 37; (*it).y = 38; (*it).z = 39; } ``` Consider doing this: ```cpp for(list::iterator it = wl.begin(), itEnd = wl.end(); it != itEnd; ++it) { Widget& w = *it; // The user can easily inspect or modify w here. w.x = 37; w.y = 38; w.z = 39; } ``` ### Consider bitvector or bitset instead of vector. In EASTL, a vector of bool is exactly that. It intentionally does not attempt to make a specialization which implements a packed bit array. The bitvector class is specifically designed for this purpose. There are arguments either way, but if vector were allowed to be something other than an array of bool, it would go against user expectations and prevent users from making a true array of bool. There's a mechanism for specifically getting the bit packing, and it is bitvector. Additionally there is bitset, which is not a conventional iterateable container but instead acts like bit flags. bitset may better suit your needs than bitvector if you need to do flag/bit operations instead of array operations. bitset does have an operator[], though. ### Vectors can be treated as contiguous memory. EASTL vectors (and strings) guarantee that elements are present in a linear contiguous array. This means that you can use a vector as you would a C-style array by using the vector data() member function or by using &v[0]. To use a vector as a pointer to an array, you can use the following code: ```cpp struct Widget { uint32_t x; uint32_t y; }; vector v; quick_sort((uint64_t*)v.data(), (uint64_t*)(v.data() + v.size())); ``` ### Search hash_map\ using heterogeneous lookup When using a hash map (or any associative container) of string objects that you want to search for by string literals (e.g. "hello") or char pointers, if you search for a string via the `find` function, your string literal will necessarily be converted to a temporary string object, which is inefficient because string allocates. You can avoid creating these temporary objects declare the container type with [transparent comparison types](https://en.cppreference.com/w/cpp/utility/functional#Transparent_function_objects) (eg. [`equal_to`](https://en.cppreference.com/w/cpp/utility/functional/less_void)). eg. ```cpp eastl::hash_map> hashMap; ... auto it = hashMap.find("hello"); // No string object created. Uses heterogeneous lookup, which calls transparent_string_hash::operator()(const string&) and equal_to::operator()(const string&, const char*). ``` EASTL containers support this optimization for additional member functions that take a `key_type` parameter, ie. heterogeneous lookup, insertion and erasure. EASTL also has a member function called `find_as` which lets you search a hash table by something other than the container comparison type. Note that the comparison semantics must be equivalent to the container's comparison object, otherwise the behaviour is undefined. To use `find_as`, you can use the following code: ```cpp hash_map hashMap; ... auto it = hashMap.find_as("hello", /* a hash object */, /* an equality comparison object */); ``` Using transparent comparison types should be preferred due to the support for additional member functions and additional safety. ### Take advantage of type_traits. EASTL includes a fairly serious type traits library that is on par with the one found in Boost but offers some additional performance-enhancing help as well. The type_traits library provides information about class *types*, as opposed to class instances. For example, the is_integral type trait tells if a type is one of int, short, long, char, uint64_t, etc. There are three primary uses of type traits: * Allowing for optimized operations on some data types. * Allowing for different logic pathways based on data types. * Allowing for compile-type assertions about data type expectations. Here is an example of using type traits to tell if a value is a floating point value or not: ```cpp template DoSomething(T t) { assert(is_floating_point::value); } ``` Here is an example of declaring a class that is trivially copyable and using it in a vector. ```cpp class Widget { ... }; // Anything conforming to the trivially copyable rules: https://en.cppreference.com/w/cpp/language/classes#Trivially_copyable_class vector wVector{ ... some elements ... }; wVector.erase(wVector.begin()); // This operation will be optimized to memcpy the elements into place. ``` ### Name containers to track memory usage. All EASTL containers which allocate memory have a built-in function called set_name and have a constructor argument that lets you specify the container name. This name is used in memory tracking and allows for the categorization and measurement of memory usage. You merely need to supply a name for your containers to use and it does the rest. Here is an example of creating a list and naming it "collision list": `list collisionList(allocator("collision list"));` or ```cpp list collisionList; collisionList.get_allocator().set_name("collision list"); ``` Note that EASTL containers do not copy the name contents but merely copy the name pointer. This is done for simplicity and efficiency. A user can get around this limitation by creating a persistently present string table. Additionally, the user can get around this by declaring static but non-const strings and modifying them at runtime. ### Learn the algorithms. EASTL algorithms provide a variety of optimized implementations of fundamental algorithms. Many of the EASTL algorithms are the same as the STL algorithm set, though EASTL adds additional algorithms and additional optimizations not found in STL implementations such as Microsoft's. The copy algorithm, for example, will memcpy data types that have the has_trivial_relocate type trait instead of doing an element-by-element copy. The classifications we use here are not exactly the same as found in the C++ standard; they have been modified to be a little more intuitive. Not all the functions listed here may be yet available in EASTL as you read this. If you want some function then send a request to the maintainer. Detailed documentation for each algorithm is found in algorithm.h or the otherwise corresponding header file for the algorithm. **Search** * find, find_if * find_end * find_first_of * adjacent_find * binary_search * search, search_n * lower_bound * upper_bound * equal_range **Sort** * is_sorted * quick_sort * insertion_sort * shell_sort * heap_sort * merge_sort, merge_sort_buffer * merge * inplace_merge * partial_sort * stable_sort * partial_sort_copy * **Modifying** * fill, fill_n * generate, generate_n * random_shuffle * swap * iter_swap * swap_ranges * remove, remove_if * remove_copy, remove_copy_if * replace, replace_if * replace_copy, replace_copy_if * reverse * reverse_copy * rotate * rotate_copy * partition * stable_partition * transform * next_permutation * prev_permutation * unique * unique_copy **Non-Modifying** * for_each * copy * copy_backward * count, count_if * equal * mismatch * min * max * min_element * max_element * lexicographical_compare * nth_element **Heap** * is_heap * make_heap * push_heap * pop_heap * change_heap * sort_heap * remove_heap **Set** * includes * set_difference * set_symmetric_difference * set_intersection * set_union ### Pass and return containers by reference instead of value. If you aren't paying attention you might accidentally write code like this: ```cpp void DoSomething(list widgetList) { ... } ``` The problem with the above is that widgetList is passed by value and not by reference. Thus the a copy of the container is made and passed instead of a reference of the container being passed. This may seem obvious to some but this happens periodically and the compiler gives no warning and the code will often execute properly, but inefficiently. Of course there are some occasions where you really do want to pass values instead of references. ### Consider using reset_lose_memory() for fast container teardown. EASTL containers have a reset_lose_memory function which unilaterally resets the container to a newly constructed state. The contents of the container are forgotten; no destructors are called and no memory is freed. This is a risky but powerful function for the purpose of implementing very fast temporary containers. There are numerous cases in high performance programming when you want to create a temporary container out of a scratch buffer area, use the container, and then just "vaporize" it, as it would be waste of time to go through the trouble of clearing the container and destroying and freeing the objects. Such functionality is often used with hash tables or maps and with a stack allocator (a.k.a. linear allocator). Here's an example of usage of the reset function and a PPMalloc-like StackAllocator: ```cpp pStackAllocator->push_bookmark(); hash_set, StackAllocator> wSet(pStackAllocator); wSet.reset_lose_memory(); pStackAllocator->pop_bookmark(); ``` ### Consider using fixed_substring instead of copying strings. EASTL provides a fixed_substring class which uses a reference to a character segment instead of allocating its own string memory. This can be a more efficient way to work with strings under some circumstances. Here's an example of usage of fixed_substring: ```cpp basic_string str("hello world"); fixed_substring sub(str, 6, 5); // sub == "world" fixed_substring can refer to any character array and not just one that derives from a string object. ``` ### Consider using vector::push_back(void). EASTL provides an alternative way to insert elements into containers that avoids copy construction and/or the creation of temporaries. Consider the following code: ```cpp vector widgetArray; widgetArray.push_back(Widget()); ``` The standard vector push_back function requires you to supply an object to copy from. This incurs the cost of the creation of a temporary and for some types of classes or situations this cost may be undesirable. It additionally requires that your contained class support copy-construction whereas you may not be able to support copy construction. As an alternative, EASTL provides a push_back(void) function which requires nothing to copy from but instead constructs the object in place in the container. So you can do this: ```cpp vector widgetArray; widgetArray.push_back(); widgetArray.back().x = 0; // Example of how to reference the new object. ``` Other containers with such copy-less functions include: ```cpp vector::push_back() deque::push_back() deque::push_front() list::push_back() list::push_front() slist::push_front() map::insert(const key_type& key) multimap::insert(const key_type& key) hash_map::insert(const key_type& key) hash_multimap::insert(const key_type& key) ``` Note that the map functions above allow you to insert a default value specified by key alone and not a value_type like with the other map insert functions. ---------------------------------------------- End of document ================================================ FILE: doc/Bonus/tuple_vector_readme.md ================================================ ## Introduction to tuple_vector `tuple_vector` is a data container that is designed to abstract and simplify the handling of a "structure of arrays" layout of data in memory. In particular, it mimics the interface of `vector`, including functionality to do inserts, erases, push_backs, and random-access. It also provides a `RandomAccessIterator` and corresponding functionality, making it compatible with most STL (and STL-esque) algorithms such as ranged-for loops, `find_if`, `remove_if`, or `sort`. When used or applied properly, this container can improve performance of some algorithms through cache-coherent data accesses or allowing for sensible SIMD programming, while keeping the structure of a single container, to permit a developer to continue to use existing algorithms in STL and the like. ## Review of "Structure of arrays" data layouts When trying to improve the performance of some code, it can sometimes be desirable to transform how some data is stored in memory to be laid out not as an "array of structures", but as a "structure of arrays". That is, instead of storing a series of objects as a single contiguous chunk of memory, one or more data members are instead stored as separate chunks of memory that are handled and accessed in parallel to each other. This can be beneficial in two primary respects: 1) To improve the cache coherency of the data accesses, e.g. by utilizing more data that is loaded per cache line loaded from memory, and thereby reducing the amount of time waiting on memory accesses from off-CPU memory. This presentation from Mike Acton touches on this, among other things: https://www.youtube.com/watch?v=rX0ItVEVjHc 2) To allow the data to be more easily loaded and utilized by SIMD kernels, by being able to load memory directly into a SIMD register. This is touched on in this presentation from Andreas Fredriksson for writing code with SIMD intrinsics: http://www.gdcvault.com/play/1022249/SIMD-at-Insomniac-Games-How ...and as well in this guide for writing performant ISPC kernels: https://ispc.github.io/perfguide.html ## How TupleVecImpl works `tuple_vector` inherits from `TupleVecImpl`, which provides the bulk of the functionality for those data containers. It manages the memory allocated, marshals data members to each array of memory, generates the necessary iterators, and so on. When a `tuple_vector` is declared, it is alongside a list of types, or "tuple elements", indicating what data to store in the container, similar to how `tuple` operates. `TupleVecImpl` uses this list of tuple elements to then inherit from a series of `TupleVecLeaf` structures, which each have their own pointer to an array of their corresponding type in memory. When dereferencing the container, either to fetch a tuple of references or just fetching pointers to the memory, it is these pointers that are utilized or fetched. While each `TupleVecLeaf` contains a pointer to its own block of memory, they are not individual memory allocations. When `TupleVecImpl` needs to grow its capacity, it calculates the total size needed for a single allocation, taking into account the number of objects for the container, the size of each tuple element's type, and the alignment requirements for each type. Pointers into the allocation for each tuple element are also determined at the same time, which are passed to each `TupleVecLeaf`. From there, many of the interactions with `TupleVecImpl`, to modify or access members of the container, then reference each `TupleVecLeaf`'s data pointer in series, using parameter packs to repeat each operation for each parent `TupleVecLeaf`. ## How tuple_vector's iterator works `TupleVecImpl` provides a definition to an iterator type, `TupleVecIter`. As mentioned above, `TupleVecIter` provides all of the functionality to operate as a `RandomAccessIterator`. When it is dereferenced, it provides a tuple of references, similar to `at()` or `operator[]` on `TupleVecImpl`, as opposed to a reference of some other type. As well, a customization of `move_iterator` for `TupleVecIter` is provided, which will return a tuple of rvalue-references. The way that `TupleVecIter` operates internally is to track an index into the container, as well as a copy of all of the `TupleVecImpl`'s `TupleVecLeaf` pointers at the time of the iterator's construction. As a result, modifying the iterator involves just changing the index, and dereferencing the iterator into the tuple of references involves dereferencing each pointer with an offset specified by that index. Of the various ways of handling the multitude of references, this tended to provide the best code-generation. For example, having a tuple of pointers that are collectively modified with each iterator modification resulted in the compiler not being able to accurately determine which pointers were relevant to the final output of some function, creating many redundant operations. Similarly, having the iterator refer to the source `TupleVecImpl` for the series of pointers often resulted in extra, unnecessary, data hops to the `TupleVecImpl` to repeatedly fetch data that was not practically mutable, but theoretically mutable. While this solution is the heaviest in terms of storage, the resulted assembly tends to be competitive with traditional structure-of-arrays setups. ## How to work with tuple_vector, and where to use it Put simply, `tuple_vector` can be used as a replacement for `vector`. For example, instead of declaring a structure and vector as: ``` struct Entity { bool active; float lifetime; Vec3 position; } vector entityVec; ``` ...the `tuple_vector` equivalent of this can be defined as: ``` tuple_vector entityVec; ``` In terms of how `tuple_vector` is modified and accessed, it has a similar featureset as `vector`, except where `vector` would accept or return a single value, it instead accepts or returns a tuple of values or unstructured series of equivalent arguments. For example, the following functions can be used to access the data, either by fetching a tuple of references to a series of specific values, or the data pointers to the tuple elements: ``` tuple operator[](size_type) tuple at(size_type) tuple iterator::operator*() tuple move_iterator::operator*() tuple data() // extract the Ith tuple element pointer from the tuple_vector template T* get() // e.g. bool* get<0>(), float* get<1>(), and Vec3* get<2>() // extract the tuple element pointer of type T from the tuple_vector // note that this function can only be used if there is one instance // of type T in the tuple_vector's elements template T* get() // e.g. bool* get(), float* get(), and Vec3* get() ``` And `push_back(...)` has the following overloads, accepting either values or tuples as needed. ``` tuple push_back() push_back(const bool&, const float&, const Vec3&) push_back(tuple) push_back(bool&&, float&&, Vec3&&) push_back(tuple) ``` ...and so on, and so forth, for others like the constructor, `insert(...)`, `emplace(...)`, `emplace_back(...)`, `assign(...)`, and `resize(...)`. As well, note that the tuple types that are accepted or returned for `tuple_vector` have typedefs available in the case of not wanting to use automatic type deduction: ``` typedef eastl::tuple value_tuple; typedef eastl::tuple reference_tuple; typedef eastl::tuple const_reference_tuple; typedef eastl::tuple ptr_tuple; typedef eastl::tuple const_ptr_tuple; typedef eastl::tuple rvalue_tuple; ``` With this, and the fact that the iterator type satisfies the `RandomAccessIterator` requirements, it is possible to use `tuple_vector` in most ways and manners that `vector` was previously used, with few structural differences. However, even if not using it strictly as a replacement for `vector`, it is still useful as a tool for simplifying management of a traditional structure of arrays. That is, it is possible to use `tuple_vector` to just perform a single large memory allocation instead of a series of smaller memory allocations, by sizing the `tuple_vector` as needed, fetching the necessary pointers with `data()` or `get<...>()`, and carrying on normally. One example where this can be utilized is with ISPC integration. Given the following ISPC function definition: export void simple(uniform float vin[], uniform float vfactors[], uniform float vout[], uniform int size); ...which generates the following function prototype for C/C++ usage: extern void simple(float* vin, float* vfactors, float* vout, int32_t size); ...this can be utilized with some raw float arrays: ``` float* vin = new float[NumElements]; float* vfactors = new float[NumElements]; float* vout = new float[NumElements]; // Initialize input buffer for (int i = 0; i < NumElements; ++i) { vin[i] = (float)i; vfactors[i] = (float)i / 2.0f; } // Call simple() function from simple.ispc file simple(vin, vfactors, vout, NumElements); delete vin; delete vfactors; delete vout; ``` or, with `tuple_vector`: ``` tuple_vector simpleData(NumElements); float* vin = simpleData.get<0>(); float* vfactors = simpleData.get<1>(); float* vout = simpleData.get<2>(); // Initialize input buffer for (int i = 0; i < NumElements; ++i) { vin[i] = (float)i; vfactors[i] = (float)i / 2.0f; } // Call simple() function from simple.ispc file simple(vin, vfactors, vout, NumElements); ``` `simpleData` here only has a single memory allocation during its construction, instead of the three in the first example, and also automatically releases the memory when it falls out of scope. It is possible to also skip a memory allocation entirely, in some circumstances. EASTL provides "fixed" counterparts of many data containers which allows for a data container to have an inlined buffer of memory. For example, `eastl::vector` has the following counterpart: eastl::fixed_vector This buffer allows for enough space to hold a `nodeCount` number of `T` objects, skipping any memory allocation at all, until the requested size becomes greater than `nodeCount` - assuming `enableOverflow` is True. There is a similar counterpart to `eastl::tuple_vector` available as well: eastl::fixed_tuple_vector This does the similar legwork in creating an inlined buffer, and all of the functionality of `tuple_vector` otherwise is supported. Note the slight difference in declaration, though: `nodeCount` and `enableOverflow` are defined first, and `enableOverflow` is not a default parameter. This change arises out of restrictions surrounding variadic templates, in that they must be declared last, and cannot be mixed with default template parameters. Lastly, `eastl::vector` and other EASTL data containers support custom Memory Allocator types, through their template parameters. For example, `eastl::vector`'s full declaration is actually: eastl::vector However, because such a default template parameter cannot be used with variadic templates, a separate type for `tuple_vector` is required for such a definition: eastl::tuple_vector_alloc Note that `tuple_vector` uses EASTLAllocatorType as the allocator. ## Performance comparisons/discussion A small benchmark suite for `tuple_vector` is included when running the EASTLBenchmarks project. It provides the following output on a Core i7 3770k (Skylake) at 3.5GHz, with DDR3-1600 memory. The `tuple_vector` benchmark cases compare total execution time of similar algorithms run against `eastl::tuple_vector` and `std::vector`, such as erasing or inserting elements, iterating through the array to find a specific element, sum all of the elements together via operator[] access, or just running `eastl::sort` on the data containers. More information about the EASTLBenchmarks suite can be found in EASTL/doc/EASTL Benchmarks.html Benchmark | STD execution time | EASTL execution time | Ratio --------- | -------- | ---------- | ----- `tuple_vector/erase ` | 1.7 ms | 1.7 ms | 1.00 `tuple_vector/erase ` | 104.6 ms | 106.3 ms | 0.98 `tuple_vector/reallocate ` | 1.3 ms | 1.7 ms | 0.77 - | | | `tuple_vector/erase ` | 3.4 ms | 3.5 ms | 0.98 `tuple_vector/insert ` | 3.4 ms | 3.4 ms | 0.99 `tuple_vector/iteration ` | 56.3 us | 81.4 us | 0.69 - `tuple_vector/operator[] ` | 67.4 us | 61.8 us | 1.09 `tuple_vector/push_back ` | 1.3 ms | 818.3 us | 1.53 + `tuple_vector/sort ` | 5.8 ms | 7.3 ms | 0.80 | | | `tuple_vector/erase ` | 34.7 ms | 32.9 ms | 1.05 `tuple_vector/insert ` | 41.0 ms | 32.6 ms | 1.26 `tuple_vector/iteration ` | 247.1 us | 80.5 us | 3.07 + `tuple_vector/operator[]` | 695.7 us | 81.1 us | 8.58 + `tuple_vector/push_back ` | 10.0 ms | 6.0 ms | 1.67 + `tuple_vector/sort ` | 8.2 ms | 10.1 ms | 0.81 | | | `vector/erase ` | 1.3 ms | 1.2 ms | 1.05 `vector/erase ` | 104.4 ms | 109.4 ms | 0.95 `vector/reallocate ` | 1.5 ms | 1.5 ms | 0.95 | | | `vector/erase ` | 4.3 ms | 3.6 ms | 1.20 `vector/insert ` | 4.8 ms | 4.8 ms | 1.01 `vector/iteration ` | 71.5 us | 77.3 us | 0.92 `vector/operator[] ` | 90.7 us | 87.2 us | 1.04 `vector/push_back ` | 1.6 ms | 1.2 ms | 1.38 + `vector/sort ` | 7.7 ms | 8.2 ms | 0.93 First off, `tuple_vector`'s performance versus `std::vector` is comparable, as expected, as the `tuple_vector`'s management for one type becomes very similar to just a regular vector. The major notable exception is the iteration case, which runs `eastl::find_if`. This performance differences is a consequence of the iterator design, and how it works with indices, not a direct pointer, so the code generation suffers slightly in this compute-bound scenario. This is worth noting as a demonstration of a case where falling back to pointer-based iteration by fetching the `begin` and `end` pointers of that tuple element may be preferable, instead of using the iterator constructs. The set of `tuple_vector` tests are more interesting. This is a comparison between a single `std::vector` with a structure containing a `uint64` and 56 bytes of padding, and a `tuple_vector` with two elements: one for `uint64` and one for 56 bytes of padding. The erase, insert, push_back, and sort cases all perform at a similar relative rate as they did in the `tuple_vector` tests - demonstrating that operations that have to touch all of elements do not have a significant change in performance. However, iteration and operator[] are very different, because those only access the `uint64` member of both `vector` and `tuple_vector` to run some operation. The iteration test now runs 3x faster whereas before it ran 0.7x as fast, and operator[] runs 8.5x faster, instead of 1.1x. This demonstrates some of the utility of `tuple_vector`, in that these algorithms end up being limited by the CPU's compute capabilities, as opposed to being limited by how fast they can load memory in from DRAM. In a series of other tests, generally speaking, `tuple_vector` tends to perform on par with manual management of multiple arrays in many algorithms and operations, often even generating the same code. It should be noted that significant degrees of inlining and optimization are required to get the most out of `tuple_vector`. Compared to accessing a series of arrays or vectors, `tuple_vector` does perform a multitude of extra trivial function calls internally in order to manage the various elements, or interact with `eastl::tuple` through its interface, so running in debug configurations can run significantly slower in some cases, e.g. sometimes running at 0.2x the speed compared to vector. ## The problem of referencing tuple elements This will be experienced shortly after using `tuple_vector` in most capacities, but it should be noted that the most significant drawback is that there is no way to **symbolically** reference each tuple element of the `tuple_vector` - much in the same way as `tuple`. For example, if translating a struct such as... ``` struct Entity { float x, y, z; float lifetime; }; ``` ...to `tuple_vector`, it will exist as: ``` tuple_vector entityVec; ``` ...and can only be accessed in a manner like `entityVec.get<3>()` to refer to the `lifetime` member. With existing tools, the only good alternatives are to encapsulate each float as a separate struct to give it unique typenames... ``` struct entityX { float val; }; struct entityY { float val; }; struct entityZ { float val; }; struct entityLifetime { float val; }; tuple_vector entityVec; ``` ...and then access each tuple element by typename like `entityVec.get()`; or, creating an enumerated value to replace the indices... ``` enum EntityTypeEnum { entityX = 0, entityY = 1, entityZ = 2, entityLifetime = 3 }; tuple_vector entityVec; ``` ...and then access each tuple element by the enumerated value: `entityVec.get()`. Either way, there is a fairly significant maintenance and readability issue around this. This is arguably more severe than with `tuple` on its own because that is generally not intended for structures with long lifetime. Ideally, if the language could be mutated to accommodate such a thing, it would be good to have some combination of typenames and symbolic names in the declaration, e.g. something like ``` tuple_vector entityVec; ``` and be able to reference the tuple elements not just by typename or index, but through their corresponding symbol, like `entityVec.get()`. Or, it may be interesting if the necessary `get` functions could be even automatically generated through a reflection system, e.g. `entityVec.get_lifetime()`. All of this remains a pipe dream for now. ================================================ FILE: doc/CMake/EASTL_Project_Integration.md ================================================ ## Using EASTL in your own projects This page describes the steps needed to use EASTL in your own projects ## Setting up your project ### Using CMake Add to your CMakeLists.txt: ```cmake set(EASTL_ROOT_DIR C:/EASTL) include_directories (${EASTL_ROOT_DIR}/include) include_directories (${EASTL_ROOT_DIR}/test/packages/EAAssert/include) include_directories (${EASTL_ROOT_DIR}/test/packages/EABase/include/Common) include_directories (${EASTL_ROOT_DIR}/test/packages/EAMain/include) include_directories (${EASTL_ROOT_DIR}/test/packages/EAStdC/include) include_directories (${EASTL_ROOT_DIR}/test/packages/EATest/include) include_directories (${EASTL_ROOT_DIR}/test/packages/EAThread/include) set(EASTL_LIBRARY debug ${EASTL_ROOT_DIR}/build/Debug/EASTL.lib optimized ${EASTL_ROOT_DIR}/build/Release/EASTL.lib) add_custom_target(NatVis SOURCES ${EASTL_ROOT_DIR}/doc/EASTL.natvis) ``` And then add the library into the linker ``` target_link_libraries(... ${EASTL_LIBRARY}) ``` ### Using Visual Studio Using Visual Studio projecs directly you will need do the following steps: - Add the include paths - Add the library path - Add the library dependency - Add natvis (optional) > Note that in the examples below ${EASTL_ROOT_DIR} is the folder in which you stored EASTL. You could create an environment variable for this. #### Add the include paths Add the following paths to your C/C++ -> General -> Additional include directories: ``` ${EASTL_ROOT_DIR}/include ${EASTL_ROOT_DIR}/test/packages/EAAssert/include ${EASTL_ROOT_DIR}/test/packages/EABase/include/Common ${EASTL_ROOT_DIR}/test/packages/EAMain/include) ${EASTL_ROOT_DIR}/test/packages/EAStdC/include) ${EASTL_ROOT_DIR}/test/packages/EATest/include) ${EASTL_ROOT_DIR}/test/packages/EAThread/include) ``` #### Add the library path Add the following library path to your Linker -> General -> Additional Library Directories: ``` ${EASTL_ROOT_DIR}/build/$(Configuration) ``` #### Add the library dependency Either add the following library to your Linker -> Input -> Additional Dependencies ``` EASTL.lib ``` Or in code use the following: ``` #pragma comment(lib, "EASTL.lib") ``` #### Add natvis (optional) > Adding the natvis file to your project allows the debugger to use custom visualizers for the eastl data types. This greatly enhances the debugging experience. Add the natvis file anywhere in your solution: ``` Right-click your project: Add -> Existing item and then add the following file: ${EASTL_ROOT_DIR}/doc/EASTL.natvis ``` ## Setting up your code ### Overloading operator new[] EASTL requires you to have an overload for the operator new[], here is an example that just forwards to global new[]: ```c void* __cdecl operator new[](size_t size, const char* name, int flags, unsigned debugFlags, const char* file, int line) { return new uint8_t[size]; } ``` ================================================ FILE: doc/Design.md ================================================ # EASTL Design ## Introduction EASTL (EA Standard Template Library) is designed to be a template library which encompasses and extends the functionality of standard C++ STL while improving it in various ways useful to game development. Much of EASTL's design is identical to standard STL, as the large majority of the STL is well-designed for many uses. The primary areas where EASTL deviates from standard STL implementations are essentially the following: * EASTL has a simplified and more flexible custom allocation scheme. * EASTL has significantly easier to read code. * EASTL has extension containers and algorithms. * EASTL has optimizations designed for game development. Of the above items, the only one which is an incompatible difference with STL is the case of memory allocation. The method for defining a custom allocator for EASTL is slightly different than that of standard STL, though they are 90% similar. The 10% difference, however, is what makes EASTL generally easier and more powerful to work with than standard STL. Containers without custom allocators act identically between EASTL and standard STL. ## Motivations Our motifications for making EASTL drive the design of EASTL. As identified in the EASTL RFC (Request for Comment), the primary reasons for implementing a custom version of the STL are: * Some STL implementations (especially Microsoft STL) have inferior performance characteristics that make them unsuitable for game development. EASTL is faster than all existing STL implementations. * The STL is sometimes hard to debug, as most STL implementations use cryptic variable names and unusual data structures. * STL allocators are sometimes painful to work with, as they have many requirements and cannot be modified once bound to a container. * The STL includes excess functionality that can lead to larger code than desirable. It's not very easy to tell programmers they shouldn't use that functionality. * The STL is implemented with very deep function calls. This results is unacceptable performance in non-optimized builds and sometimes in optimized builds as well. * The STL doesn't support alignment of contained objects. * STL containers won't let you insert an entry into a container without supplying an entry to copy from. This can be inefficient. * Useful STL extensions (e.g. slist, hash_map, shared_ptr) found in existing STL implementations such as STLPort are not portable because they don't exist in other versions of STL or aren't consistent between STL versions. * The STL lacks useful extensions that game programmers find useful (e.g. intrusive_list) but which could be best optimized in a portable STL environment. * The STL has specifications that limit our ability to use it efficiently. For example, STL vectors are not guaranteed to use contiguous memory and so cannot be safely used as an array. * The STL puts an emphasis on correctness before performance, whereas sometimes you can get significant performance gains by making things less academcially pure. * STL containers have private implementations that don't allow you to work with their data in a portable way, yet sometimes this is an important thing to be able to do (e.g. node pools). * All existing versions of STL allocate memory in empty versions of at least some of their containers. This is not ideal and prevents optimizations such as container memory resets that can greatly increase performance in some situations. * The STL is slow to compile, as most modern STL implementations are very large. * There are legal issues that make it hard for us to freely use portable STL implementations such as STLPort. * We have no say in the design and implementation of the STL and so are unable to change it to work for our needs. ## Prime Directives The implementation of EASTL is guided foremost by the following directives which are listed in order of importance. 1. Efficiency (speed and memory usage) 2. Correctness 3. Portability 4. Readability Note that unlike commercial STL implementations which must put correctness above all, we put a higher value on efficiency. As a result, some functionality may have some usage limitation that is not present in other similar systems but which allows for more efficient operation, especially on the platforms of significance to us. Portability is significant, but not critical. Yes, EASTL must compile and run on all platforms that we will ship games for. But we don't take that to mean under all compilers that could be conceivably used for such platforms. For example, Microsoft VC6 can be used to compile Windows programs, but VC6's C++ support is too weak for EASTL and so you simply cannot use EASTL under VC6. Readability is something that EASTL achieves better than many other templated libraries, particularly Microsoft STL and STLPort. We make every attempt to make EASTL code clean and sensible. Sometimes our need to provide optimizations (particularly related to type_traits and iterator types) results in less simple code, but efficiency happens to be our prime directive and so it overrides all other considerations. ## Thread Safety It's not simple enough to simply say that EASTL is thread-safe or thread-unsafe. However, we can say that with respect to thread safety that EASTL does the right thing. Individual EASTL containers are not thread-safe. That is, access to an instance of a container from multiple threads at the same time is unsafe if any of those accesses are modifying operations. A given container can be read from multiple threads simultaneously as well as any other standalone data structure. If a user wants to be able to have modifying access an instance of a container from multiple threads, it is up to the user to ensure that proper thread synchronization occurs. This usually means using a mutex. EASTL classes other than containers are the same as containers with respect to thread safety. EASTL functions (e.g. algorithms) are inherently thread-safe as they have no instance data and operate entirely on the stack. As of this writing, no EASTL function allocates memory and thus doesn't bring thread safety issues via that means. The user may well need to be concerned about thread safety with respect to memory allocation. If the user modifies containers from multiple threads, then allocators are going to be accessed from multiple threads. If an allocator is shared across multiple container instances (of the same type of container or not), then mutexes (as discussed above) the user uses to protect access to indivudual instances will not suffice to provide thread safety for allocators used across multiple instances. The conventional solution here is to use a mutex within the allocator if it is exected to be used by multiple threads. EASTL uses neither static nor global variables and thus there are no inter-instance dependencies that would make thread safety difficult for the user to implement. ## Container Design All EASTL containers follow a set of consistent conventions. Here we define the prototypical container which has the minimal functionality that all (non-adapter) containers must have. Some containers (e.g. stack) are explicitly adapter containers and thus wrap or inherit the properties of the wrapped container in a way that is implementation specific. ```cpp template class container { public: typedef container this_type; typedef T value_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; typedef ptrdiff_t difference_type; typedef impl_defined size_type; typedef impl-defined iterator; typedef impl-defined const_iterator; typedef reverse_iterator reverse_iterator; typedef reverse_iterator reverse_const_iterator; typedef Allocator allocator_type; public: container(const allocator_type& allocator = allocator_type()); container(const this_type& x); this_type& operator=(this_type& x); void swap(this_type& x); void reset(); allocator_type& get_allocator(); void set_allocator(allocator_type& allocator); iterator begin(); const_iterator begin() const; iterator end(); const_iterator end() const; bool validate() const; int validate_iterator(const_iterator i) const; protected: allocator_type mAllocator; }; template bool operator==(const container& a, const container& b); template bool operator!=(const container& a, const container& b); ``` Notes: * Swapped containers do not swap their allocators. * Newly constructed empty containers do no memory allocation. Some STL and other container libraries allocate an initial node from the class memory allocator. EASTL containers by design never do this. If a container needs an initial node, that node should be made part of the container itself or be a static empty node object. * Empty containers (new or otherwise) contain no constructed objects, including those that might be in an 'end' node. Similarly, no user object (e.g. of type T) should be constructed unless required by the design and unless documented in the cotainer/algorithm contract.  * The reset function is a special extension function which unilaterally resets the container to an empty state without freeing the memory of the contained objects. This is useful for very quickly tearing down a container built into scratch memory. No memory is allocated by reset, and the container has no allocatedmemory after the reset is executed. * The validate and validate_iterator functions provide explicit container and iterator validation. EASTL provides an option to do implicit automatic iterator and container validation, but full validation (which can be potentially extensive) has too much of a performance cost to execute implicitly, even in a debug build. So EASTL provides these explicit functions which can be called by the user at the appropriate time and in optimized builds as well as debug builds. ## Allocator Design The most significant difference between EASTL and standard C++ STL is that standard STL containers are templated on an allocator class with the interface defined in std::allocator. std::allocator is defined in the C++ standard as this: ```cpp // Standard C++ allocator template class allocator { public: typedef size_t size_type; typedef ptrdiff_t difference_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; typedef T value_type; template struct rebind { typedef allocator other; }; allocator() throw(); allocator(const allocator&) throw(); template allocator(const allocator&) throw(); ~allocator() throw(); pointer address(reference x) const; const_pointer address(const_reference x) const; pointer allocate(size_type, typename allocator::const_pointer hint = 0); void deallocate(pointer p, size_type n); size_type max_size() const throw(); void construct(pointer p, const T& val); void destroy(pointer p); }; ``` Each STL container needs to have an allocator templated on container type T associated with it. The problem with this is that allocators for containers are defined at the class level and not the instance level. This makes it painful to define custom allocators for containers and adds to code bloat. Also, it turns out that the containers don't actually use allocator but instead use allocator\::rebind\::other. Lastly, you cannot access this allocator after the container is constructed. There are some good academic reasons why the C++ standard works this way, but it results in a lot of unnecessary pain and makes concepts like memory tracking much harder to implement. What EASTL does is use a more familiar memory allocation pattern whereby there is only one allocator class interface and it is used by all containers. Additionally EASTL containers let you access their allocators and query them, name them, change them, etc. EASTL has chosen to make allocators not be copied between containers during container swap and assign operations. This means that if container A swaps its contents with container B, both containers retain their original allocators. Similarly, assigning container A to container B causes container B to retain its original allocator. Containers that are equivalent should report so via operator==; EASTL will do a smart swap if allocators are equal, and a brute-force swap otherwise. ```cpp // EASTL allocator class allocator { public:     allocator(const char* pName = NULL);     void* allocate(size_t n, int flags = 0);     void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0);     void  deallocate(void* p, size_t n);     const char* get_name() const;     void        set_name(const char* pName); }; allocator* GetDefaultAllocator(); ``` ## Fixed Size Container Design EASTL supplies a set of fixed-size containers that the user can use, though the user can also implement their own versions. So in addition to class list there is class fixed_list. The fixed_list class implements a linked list via a fixed-size pool of contiguous memory which has no space overhead (unlike with a regular heap), doesn't cause fragmentation, and allocates very quickly. EASTL implements fixed containers via subclasses of regular containers which set the regular container's allocator to point to themselves. Thus the implementation for fixed_list is very tiny and consists of little more than constructor and allocator functions. This design has some advantages but has one small disadvantage. The primary advantages are primarily that code bloat is reduced and that the implementation is simple and the user can easily extend it. The primary disadvantage is that the parent list class ends up with a pointer to itself and thus has 4 bytes that could arguably be saved if system was designed differently. That different design would be to make the list class have a policy template parameter which specifies that it is a fixed pool container. EASTL chose not to follow the policy design because it would complicate the implementation, make it harder for the user to extend the container, and would potentially waste more memory due to code bloat than it would save due to the 4 byte savings it achieves in container instances. ## Algorithm Design EASTL algorithms very much follow the philosophy of standard C++ algorithms, as this philosophy is sound and efficient. One of the primary aspects of algorithms is that they work on iterators and not containers. You will note for example that the find algorithm takes a first and last iterator as arguments and not a container. This has two primary benefits: it allows the user to specify a subrange of the container to search within and it allows the user to apply the find algorithm to sequences that aren't containers (e.g. a C array). EASTL algorithms are optimized at least as well as the best STL algorithms found in commercial libraries and are significantly optimized over the algorithms that come with the first-party STLs that come with compilers. Most significantly, EASTL algorithms take advantage of type traits of contained classes and take advantage of iterator types to optimize code generation. For example, if you resize an array of integers (or other "pod" type), EASTL will detect that this can be done with a memcpy instead of a slow object-by-object move as would Micrsoft STL. The optimizations found in EASTL algorithms and the supporting code in EASTL type traits consistts of some fairly tricky advanced C++ and while it is fairly easy to read, it requires a C++ expert (language lawyer, really) to implement confidently. The result of this is that it takes more effort to develop and maintain EASTL than it would to maintain a simpler library. However, the performance advantages have been deemed worth the tradeoff. ## Smart Pointer Design EASTL implements the following smart pointer types: * shared_ptr * shared_array * weak_ptr * instrusive_ptr * scoped_ptr * scoped_array * linked_ptr * linked_array All but linked_ptr/linked_array are well-known smart pointers from the Boost library. The behaviour of these smart pointers is very similar to those from Boost with two exceptions: * EASTL smart pointers allow you to assign an allocator to them. * EASTL shared_ptr implements deletion via a templated parameter instead of a dynamically allocated virtual member object interface. With respect to assigning an allocator, this gives EASTL more control over memory allocation and tracking, as Boost smart pointers unilaterally use global operator new to allocate memory from the global heap. With respect to shared_ptr deletion, EASTL's current design of using a templated parameter is questionable, but does have some reason. The advantage is that EASTL avoids a heap allocation, avoids virtual function calls, and avoids templated class proliferation. The disadvantage is that EASTL shared_ptr containers which hold void pointers can't call the destructors of their contained objects unless the user manually specifies a custom deleter template parameter. This is case whereby EASTL is more efficient but less safe. We can revisit this topic in the future if it becomes an issue. ## list::size is O(n) As of this writing, EASTL has three linked list classes: list, slist, and intrusive_list. In each of these classes, the size of the list is not cached in a member size variable. The result of this is that getting the size of a list is not a fast operation, as it requires traversing the list and counting the nodes. We could make the list::size function be fast by having a member mSize variable which tracks the size as we insert and delete items. There are reasons for having such functionality and reasons for not having such functionality. We currently choose to not have a member mSize variable as it would add four bytes to the class, add a tiny amount of processing to functions such as insert and erase, and would only serve to improve the size function, but no others. In the case of intrusive_list, it would do additional harm. The alternative argument is that the C++ standard states that std::list should be an O(1) operation (i.e. have a member size variable), that many C++ standard library list implementations do so, that the size is but an integer which is quick to update, and that many users expect to have a fast size function. In the final analysis, we are developing a library for game development and performance is paramount, so we choose to not cache the list size. The user can always implement a size cache himself. ## basic_string doesn't use copy-on-write The primary benefit of CoW is that it allows for the sharing of string data between two string objects. Thus if you say this: ```cpp string a("hello"); string b(a); ``` the "hello" will be shared between a and b. If you then say this: ```cpp a = "world"; ``` then `a` will release its reference to "hello" and leave b with the only reference to it. Normally this functionality is accomplished via reference counting and with atomic operations or mutexes. The C++ standard does not say anything about basic_string and CoW. However, for a basic_string implementation to be standards-conforming, a number of issues arise which dictate some things about how one would have to implement a CoW string. The discussion of these issues will not be rehashed here, as you can read the references below for better detail than can be provided in the space we have here. However, we can say that the C++ standard is sensible and that anything we try to do here to allow for an efficient CoW implementation would result in a generally unacceptable string interface. The disadvantages of CoW strings are: * A reference count needs to exist with the string, which increases string memory usage. * With thread safety, atomic operations and mutex locks are expensive, especially on weaker memory systems such as console gaming platforms. * All non-const string accessor functions need to do a sharing check the the first such check needs to detach the string. Similarly, all string assignments need to do a sharing check as well. If you access the string before doing an assignment, the assignment doesn't result in a shared string, because the string has already been detached. * String sharing doesn't happen the large majority of the time. In some cases, the total sum of the reference count memory can exceed any memory savings gained by the strings that share representations.  The addition of a cow_string class is under consideration for EASTL. There are conceivably some systems which have string usage patterns which would benefit from CoW sharing. Such functionality is best saved for a separate string implementation so that the other string uses aren't penalized. This is a good starting HTML reference on the topic: > [http://www.gotw.ca/publications/optimizations.htm](http://www.gotw.ca/publications/optimizations.htm) Here is a well-known Usenet discussion on the topic: > [http://groups-beta.google.com/group/comp.lang.c++.moderated/browse_thread/thread/3dc6af5198d0bf7/886c8642cb06e03d](http://groups-beta.google.com/group/comp.lang.c++.moderated/browse_thread/thread/3dc6af5198d0bf7/886c8642cb06e03d) ---------------------------------------------- End of document ================================================ FILE: doc/EASTL.natvis ================================================ ({(void*)mPair.mFirst} = {*mPair.mFirst}) ({nullptr}) (void*)mPair.mFirst *mPair.mFirst ({(void*)mpValue} = {*mpValue}) ({nullptr}) (void*)mpValue *mpValue mpRefCount->mRefCount mpRefCount->mWeakRefCount {((mpRefCount && mpRefCount->mRefCount) ? mpValue : nullptr)} mpRefCount && mpRefCount->mRefCount ? mpValue : nullptr [{$T2}] {{}} [{$T2}] {{ {*mValue} }} [{$T2}] {{ {*mValue}, {*(mValue+1)} }} [{$T2}] {{ {*mValue}, {*(mValue+1)}, {*(mValue+2)} }} [{$T2}] {{ {*mValue}, {*(mValue+1)}, {*(mValue+2)}, {*(mValue+3)} }} [{$T2}] {{ {*mValue}, {*(mValue+1)}, {*(mValue+2)}, {*(mValue+3)}, {*(mValue+4)} }} [{$T2}] {{ {*mValue}, {*(mValue+1)}, {*(mValue+2)}, {*(mValue+3)}, {*(mValue+4)}, {*(mValue+5)} }} [{$T2}] {{ {*mValue}, {*(mValue+1)}, {*(mValue+2)}, {*(mValue+3)}, {*(mValue+4)}, {*(mValue+5)}, ... }} $T2 $T2 mValue "{mPair.mFirst.heap.mpBegin,sb}" "{mPair.mFirst.sso.mData,sb}" mPair.mFirst.heap.mnSize (mPair.mFirst.heap.mnCapacity & ~kHeapMask) mPair.mFirst.heap.mpBegin,sb (SSOLayout::SSO_CAPACITY - mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize) SSOLayout::SSO_CAPACITY mPair.mFirst.sso.mData,sb !!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize & kSSOMask) {mPair.mFirst.heap.mpBegin,su} {mPair.mFirst.sso.mData,su} mPair.mFirst.heap.mnSize (mPair.mFirst.heap.mnCapacity & ~kHeapMask) mPair.mFirst.heap.mpBegin,su (SSOLayout::SSO_CAPACITY - mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize) SSOLayout::SSO_CAPACITY mPair.mFirst.sso.mData,su !!(mPair.mFirst.sso.mRemainingSizeField.mnRemainingSize & kSSOMask) ({first}, {second}) first second [{mStorage.mnSize}] {{}} [{mStorage.mnSize}] {{ {*mStorage.mpData} }} [{mStorage.mnSize}] {{ {*mStorage.mpData}, {*(mStorage.mpData+1)} }} [{mStorage.mnSize}] {{ {*mStorage.mpData}, {*(mStorage.mpData+1)}, {*(mStorage.mpData+2)} }} [{mStorage.mnSize}] {{ {*mStorage.mpData}, {*(mStorage.mpData+1)}, {*(mStorage.mpData+2)}, {*(mStorage.mpData+3)} }} [{mStorage.mnSize}] {{ {*mStorage.mpData}, {*(mStorage.mpData+1)}, {*(mStorage.mpData+2)}, {*(mStorage.mpData+3)}, {*(mStorage.mpData+4)} }} [{mStorage.mnSize}] {{ {*mStorage.mpData}, {*(mStorage.mpData+1)}, {*(mStorage.mpData+2)}, {*(mStorage.mpData+3)}, {*(mStorage.mpData+4)}, {*(mStorage.mpData+5)} }} [{mStorage.mnSize}] {{ {*mStorage.mpData}, {*(mStorage.mpData+1)}, {*(mStorage.mpData+2)}, {*(mStorage.mpData+3)}, {*(mStorage.mpData+4)}, {*(mStorage.mpData+5)}, ... }} DynamicSize mStorage.mnSize mStorage.mnSize mStorage.mpData [{$T2}] {{}} [{$T2}] {{ {*mStorage.mpData} }} [{$T2}] {{ {*mStorage.mpData}, {*(mStorage.mpData+1)} }} [{$T2}] {{ {*mStorage.mpData}, {*(mStorage.mpData+1)}, {*(mStorage.mpData+2)} }} [{$T2}] {{ {*mStorage.mpData}, {*(mStorage.mpData+1)}, {*(mStorage.mpData+2)}, {*(mStorage.mpData+3)} }} [{$T2}] {{ {*mStorage.mpData}, {*(mStorage.mpData+1)}, {*(mStorage.mpData+2)}, {*(mStorage.mpData+3)}, {*(mStorage.mpData+4)} }} [{$T2}] {{ {*mStorage.mpData}, {*(mStorage.mpData+1)}, {*(mStorage.mpData+2)}, {*(mStorage.mpData+3)}, {*(mStorage.mpData+4)}, {*(mStorage.mpData+5)} }} [{$T2}] {{ {*mStorage.mpData}, {*(mStorage.mpData+1)}, {*(mStorage.mpData+2)}, {*(mStorage.mpData+3)}, {*(mStorage.mpData+4)}, {*(mStorage.mpData+5)}, ... }} ConstantSize $T2 $T2 mStorage.mpData [{mpEnd - mpBegin}] {{}} [{mpEnd - mpBegin}] {{ {*mpBegin} }} [{mpEnd - mpBegin}] {{ {*mpBegin}, {*(mpBegin+1)} }} [{mpEnd - mpBegin}] {{ {*mpBegin}, {*(mpBegin+1)}, {*(mpBegin+2)} }} [{mpEnd - mpBegin}] {{ {*mpBegin}, {*(mpBegin+1)}, {*(mpBegin+2)}, {*(mpBegin+3)} }} [{mpEnd - mpBegin}] {{ {*mpBegin}, {*(mpBegin+1)}, {*(mpBegin+2)}, {*(mpBegin+3)}, {*(mpBegin+4)} }} [{mpEnd - mpBegin}] {{ {*mpBegin}, {*(mpBegin+1)}, {*(mpBegin+2)}, {*(mpBegin+3)}, {*(mpBegin+4)}, {*(mpBegin+5)} }} [{mpEnd - mpBegin}] {{ {*mpBegin}, {*(mpBegin+1)}, {*(mpBegin+2)}, {*(mpBegin+3)}, {*(mpBegin+4)}, {*(mpBegin+5)}, ... }} mpEnd - mpBegin mCapacityAllocator.mFirst - mpBegin mpEnd - mpBegin mpBegin [0] {{}} [1] {{ {*mItBegin.mpCurrent} }} [{(mItEnd.mpCurrentArrayPtr - mItBegin.mpCurrentArrayPtr) * $T3 + (mItEnd.mpCurrent-mItEnd.mpBegin) - (mItBegin.mpCurrent-mItBegin.mpBegin)}] {{ {*mItBegin.mpCurrent}, ... }} (mItEnd.mpCurrentArrayPtr - mItBegin.mpCurrentArrayPtr) * $T3 + (mItEnd.mpCurrent-mItEnd.mpBegin) - (mItBegin.mpCurrent-mItBegin.mpBegin) (mItEnd.mpCurrentArrayPtr - mItBegin.mpCurrentArrayPtr) * $T3 + (mItEnd.mpCurrent-mItEnd.mpBegin) - (mItBegin.mpCurrent-mItBegin.mpBegin) mItBegin.mpCurrentArrayPtr[(mItBegin.mpCurrent-mItBegin.mpBegin + $i) / $T3][(mItBegin.mpCurrent-mItBegin.mpBegin + $i) % $T3] {*mpCurrent} *mpCurrent *(*(mpCurrentArrayPtr-1) + (mpEnd-mpBegin) - 1) *(mpCurrent-1) **(mpCurrentArrayPtr+1) *(mpCurrent+1) mpCurrent == mpBegin mpCurrent+1 == mpEnd {c} c [0] {{}} [1] {{ {((eastl::ListNode<$T1>*)mNodeAllocator.mFirst.mpNext)->mValue} }} [2] {{ {((eastl::ListNode<$T1>*)mNodeAllocator.mFirst.mpNext)->mValue}, {((eastl::ListNode<$T1>*)mNodeAllocator.mFirst.mpNext->mpNext)->mValue} }} [?] {{ {((eastl::ListNode<$T1>*)mNodeAllocator.mFirst.mpNext)->mValue}, {((eastl::ListNode<$T1>*)mNodeAllocator.mFirst.mpNext->mpNext)->mValue}, ... }} Content of lists will repeat indefinitely. Keep that in mind! mNodeAllocator.mFirst.mpNext mpNext ((eastl::ListNode<$T1>*)this)->mValue {mValue} mValue *(eastl::ListNode<$T1>*)mpNext *(eastl::ListNode<$T1>*)mpPrev Content of lists will repeat indefinitely. Keep that in mind! The rest of the list follows: (eastl::ListNode<$T1>*)mpNext->mpNext (eastl::ListNode<$T1>*)mpNext mValue {*mpNode} *(eastl::ListNode<$T1>*)mpNode [0] {{}} [1] {{ {((eastl::SListNode<$T1>*)mNode.mpNext)->mValue} }} [2] {{ {((eastl::SListNode<$T1>*)mNode.mpNext)->mValue}, {((eastl::SListNode<$T1>*)mNode.mpNext->mpNext)->mValue} }} [?] {{ {((eastl::SListNode<$T1>*)mNode.mpNext)->mValue}, {((eastl::SListNode<$T1>*)mNode.mpNext->mpNext)->mValue}, ... }} mNode.mpNext mpNext ((eastl::SListNode<$T1>*)this)->mValue {mValue} mValue *(eastl::SListNode<$T1>*)mpNext The rest of the list follows: mpNext == nullptr ? nullptr : (eastl::SListNode<$T1>*)mpNext->mpNext (eastl::SListNode<$T1>*)mpNext mValue {*mpNode} *(eastl::SListNode<$T1>*)mpNode [0] {{}} [1] {{ {mAnchor.mpNext} }} [?] {{ {mAnchor.mpNext}, ... }} Content of intrusive lists will repeat indefinitely. Keep that in mind! mAnchor.mpNext mpNext *this [0] {{}} [1] {{ {mAnchor.mpNext} }} [?] {{ {mAnchor.mpNext}, ... }} Content of intrusive lists will repeat indefinitely. Keep that in mind! mAnchor.mpNext mpNext *(($T1*)this) {*($T1*)mpNode} *($T1*)mpNode [0] {{}} [1] {{ {((eastl::rbtree_node<$T1>*)mAnchor.mpNodeLeft)->mValue} }} [{mnSize}] {{ {((eastl::rbtree_node<$T1>*)mAnchor.mpNodeLeft)->mValue}, ... }} mnSize mnSize mAnchor.mpNodeParent mpNodeLeft mpNodeRight ((eastl::rbtree_node<$T1>*)this)->mValue [0] {{}} [1] {{ {((eastl::rbtree_node<$T2>*)mAnchor.mpNodeLeft)->mValue} }} [{mnSize}] {{ {((eastl::rbtree_node<$T2>*)mAnchor.mpNodeLeft)->mValue}, ... }} mnSize mnSize mAnchor.mpNodeParent mpNodeLeft mpNodeRight ((eastl::rbtree_node<$T2>*)this)->mValue {mValue} mValue It is possible to expand parents that do not exist. *(eastl::rbtree_node<$T1>*)mpNodeParent *(eastl::rbtree_node<$T1>*)mpNodeLeft *(eastl::rbtree_node<$T1>*)mpNodeRight {*(eastl::rbtree_node<$T1>*)mpNode} *(eastl::rbtree_node<$T1>*)mpNode [{mnElementCount}] {{}} [{mnElementCount}] {{ ... }} mnBucketCount mpBucketArray entry->mValue entry = entry->mpNext bucketIndex++ entry = mpBucketArray[bucketIndex] [{mnElementCount}] {{}} [{mnElementCount}] {{ ... }} entry->mValue.second entry = entry->mpNext bucketIndex++ entry = mpBucketArray[bucketIndex] {mValue}, {*mpNext} {mValue} this mpNext mValue {mpNode->mValue} mpNode->mValue {*(mIterator-1)} mIterator-1 {{count = {kSize}}} kSize kSize bBitValue = ((mWord[iWord] >> iBitInWord) % 2) != 0 ? true : false bBitValue iBitInWord++ iWord++ iBitInWord = 0 {c} c {mpBegin,[mnCount]} mpBegin,[mnCount] ({mFirst}, {mSecond}) ({mSecond}) ({mFirst}) (empty) (empty) ({mFirst}, {mSecond}) nullopt {value()} value() {mError} (Error) {mValue} mError mValue mHasValue void {mError} (Error) mError mHasValue {$T1} to {$T2}} {mRep} nanoseconds {mRep} microseconds {mRep} milliseconds {mRep} seconds {mRep} minutes {mRep} hours {mRep} duration with ratio = [{$T2} : {$T3}] empty {mInvokeFuncPtr} {*val} empty {m_storage.external_storage} {mAtomic} mAtomic {mFlag.mAtomic} [valueless_by_exception] {{ index=0, value={($T1*)mBuffer.mCharData}} {{ index=1, value={($T2*)mBuffer.mCharData}} {{ index=2, value={($T3*)mBuffer.mCharData}} {{ index=3, value={($T4*)mBuffer.mCharData}} {{ index=4, value={($T5*)mBuffer.mCharData}} {{ index=5, value={($T6*)mBuffer.mCharData}} {{ index=6, value={($T7*)mBuffer.mCharData}} {{ index=7, value={($T8*)mBuffer.mCharData}} {{ index=8, value={($T9*)mBuffer.mCharData}} {{ index=9, value={($T10*)mBuffer.mCharData}} {{ index=10, value={($T11*)mBuffer.mCharData}} {{ index=11, value={($T12*)mBuffer.mCharData}} {{ index=12, value={($T13*)mBuffer.mCharData}} {{ index=13, value={($T14*)mBuffer.mCharData}} {{ index=14, value={($T15*)mBuffer.mCharData}} {{ index=15, value={($T16*)mBuffer.mCharData}} {{ index=16, value={($T17*)mBuffer.mCharData}} {{ index=17, value={($T18*)mBuffer.mCharData}} {{ index=18, value={($T19*)mBuffer.mCharData}} {{ index=19, value={($T20*)mBuffer.mCharData}} {{ index=20, value={($T21*)mBuffer.mCharData}} {{ index=21, value={($T22*)mBuffer.mCharData}} {{ index=22, value={($T23*)mBuffer.mCharData}} {{ index=23, value={($T24*)mBuffer.mCharData}} {{ index=24, value={($T25*)mBuffer.mCharData}} {{ index=25, value={($T26*)mBuffer.mCharData}} {{ index=26, value={($T27*)mBuffer.mCharData}} {{ index=27, value={($T28*)mBuffer.mCharData}} {{ index=28, value={($T29*)mBuffer.mCharData}} {{ index=29, value={($T30*)mBuffer.mCharData}} {{ index=30, value={($T31*)mBuffer.mCharData}} index() ($T1*)mBuffer.mCharData ($T2*)mBuffer.mCharData ($T3*)mBuffer.mCharData ($T4*)mBuffer.mCharData ($T5*)mBuffer.mCharData ($T6*)mBuffer.mCharData ($T7*)mBuffer.mCharData ($T8*)mBuffer.mCharData ($T9*)mBuffer.mCharData ($T10*)mBuffer.mCharData ($T11*)mBuffer.mCharData ($T12*)mBuffer.mCharData ($T13*)mBuffer.mCharData ($T14*)mBuffer.mCharData ($T15*)mBuffer.mCharData ($T16*)mBuffer.mCharData ($T17*)mBuffer.mCharData ($T18*)mBuffer.mCharData ($T19*)mBuffer.mCharData ($T20*)mBuffer.mCharData ($T21*)mBuffer.mCharData ($T22*)mBuffer.mCharData ($T23*)mBuffer.mCharData ($T24*)mBuffer.mCharData ($T25*)mBuffer.mCharData ($T26*)mBuffer.mCharData ($T27*)mBuffer.mCharData ($T28*)mBuffer.mCharData ($T29*)mBuffer.mCharData ($T30*)mBuffer.mCharData ($T31*)mBuffer.mCharData ({*this,view(noparens)}) {(*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue} ({*this,view(noparens)}) (*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue {(*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<1,$T2,0>*)&mImpl)).mValue} ({*this,view(noparens)}) (*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<1,$T2,0>*)&mImpl)).mValue {(*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<1,$T2,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<2,$T3,0>*)&mImpl)).mValue} ({*this,view(noparens)}) (*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<1,$T2,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<2,$T3,0>*)&mImpl)).mValue {(*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<1,$T2,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<2,$T3,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<3,$T4,0>*)&mImpl)).mValue} ({*this,view(noparens)}) (*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<1,$T2,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<2,$T3,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<3,$T4,0>*)&mImpl)).mValue {(*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<1,$T2,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<2,$T3,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<3,$T4,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<4,$T5,0>*)&mImpl)).mValue} ({*this,view(noparens)}) (*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<1,$T2,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<2,$T3,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<3,$T4,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<4,$T5,0>*)&mImpl)).mValue {(*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<1,$T2,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<2,$T3,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<3,$T4,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<4,$T5,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<5,$T6,0>*)&mImpl)).mValue} ({*this,view(noparens)}) (*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<1,$T2,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<2,$T3,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<3,$T4,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<4,$T5,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<5,$T6,0>*)&mImpl)).mValue {(*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<1,$T2,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<2,$T3,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<3,$T4,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<4,$T5,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<5,$T6,0>*)&mImpl)).mValue}, {(*((eastl::Internal::TupleLeaf<6,$T7,0>*)&mImpl)).mValue} ({*this,view(noparens)}) (*((eastl::Internal::TupleLeaf<0,$T1,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<1,$T2,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<2,$T3,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<3,$T4,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<4,$T5,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<5,$T6,0>*)&mImpl)).mValue (*((eastl::Internal::TupleLeaf<6,$T7,0>*)&mImpl)).mValue (nothing) {static_cast<flag_type>(m_mask),en} m_mask,bb (flag_type)(1 << 0),en (flag_type)(1 << 1),en (flag_type)(1 << 2),en (flag_type)(1 << 3),en (flag_type)(1 << 4),en (flag_type)(1 << 5),en (flag_type)(1 << 6),en (flag_type)(1 << 7),en (flag_type)(1 << 8),en (flag_type)(1 << 9),en (flag_type)(1 << 10),en (flag_type)(1 << 11),en (flag_type)(1 << 12),en (flag_type)(1 << 13),en (flag_type)(1 << 14),en (flag_type)(1 << 15),en (flag_type)(1 << 16),en (flag_type)(1 << 17),en (flag_type)(1 << 18),en (flag_type)(1 << 19),en (flag_type)(1 << 20),en (flag_type)(1 << 21),en (flag_type)(1 << 22),en (flag_type)(1 << 23),en (flag_type)(1 << 24),en (flag_type)(1 << 25),en (flag_type)(1 << 26),en (flag_type)(1 << 27),en (flag_type)(1 << 28),en (flag_type)(1 << 29),en (flag_type)(1 << 30),en (flag_type)(1 << 31),en {static_cast<flag_type>(__log2(m_mask)),en} (multiple values) (nothing) m_mask,bb (flag_type)(0),en (flag_type)(1),en (flag_type)(2),en (flag_type)(3),en (flag_type)(4),en (flag_type)(5),en (flag_type)(6),en (flag_type)(7),en (flag_type)(8),en (flag_type)(9),en (flag_type)(10),en (flag_type)(11),en (flag_type)(12),en (flag_type)(13),en (flag_type)(14),en (flag_type)(15),en (flag_type)(16),en (flag_type)(17),en (flag_type)(18),en (flag_type)(19),en (flag_type)(20),en (flag_type)(21),en (flag_type)(22),en (flag_type)(23),en (flag_type)(24),en (flag_type)(25),en (flag_type)(26),en (flag_type)(27),en (flag_type)(28),en (flag_type)(29),en (flag_type)(30),en (flag_type)(31),en ================================================ FILE: doc/FAQ.md ================================================ # EASTL FAQ We provide a FAQ (frequently asked questions) list here for a number of commonly asked questions about EASTL and STL in general. Feel free to suggest new FAQ additions based on your own experience. ## Information 1. [What is EASTL?](#info1-what-is-eastl) 2. [What uses are EASTL suitable for?](#info2-what-uses-are-eastl-suitable-for) 3. [How does EASTL differ from standard C++ STL?](#info3-how-does-eastl-differ-from-standard-c-stl) 4. [Is EASTL thread-safe?](#info4-is-eastl-thread-safe) 5. [What platforms/compilers does EASTL support?](#info5-what-platformscompilers-does-eastl-support) 6. [Why is there EASTL when there is the STL?](#info6-why-is-there-eastl-when-there-is-the-stl) 7. [Can I mix EASTL with standard C++ STL?](#info7-can-i-mix-eastl-with-standard-c-stl) 8. [Where can I learn more about STL and EASTL?](#info8-where-can-i-learn-more-about-stl-and-eastl) 9. [What is the legal status of EASTL?](#info9-what-is-the-legal-status-of-eastl) 10. [Does EASTL deal with compiler exception handling settings?](#info10-does-eastl-deal-with-compiler-exception-handling-settings) 11. [What C++ language features does EASTL use (e.g. virtual functions)?](#info11-what-c-language-features-does-eastl-use-eg-virtual-functions) 12. [What compiler warning levels does EASTL support?](#info12-what-compiler-warning-levels-does-eastl-support) 13. [Is EASTL compatible with Lint?](#info13-is-eastl-compatible-with-lint) 14. [What compiler settings do I need to compile EASTL?](#info14-what-compiler-settings-do-i-need-to-compile-eastl) 15. [How hard is it to incorporate EASTL into my project?](#info15-how-hard-is-it-to-incorporate-eastl-into-my-project) 16. [Should I use EASTL instead of std STL or instead of my custom library?](#info16-should-i-use-eastl-instead-of-std-stl-or-instead-of-my-custom-library) 17. [I think I've found a bug. What do I do?](#info17-i-think-ive-found-a-bug-what-do-i-do) 18. [Can EASTL be used by third party EA developers?](#info18-can-eastl-be-used-by-third-party-ea-developers) ## Performance 1. [How efficient is EASTL compared to standard C++ STL implementations?](#perf1-how-efficient-is-eastl-compared-to-standard-c-stl-implementations) 2. [How efficient is EASTL in general?](#perf2-how-efficient-is-eastl-in-general) 3. [Strings don't appear to use the "copy-on-write" optimization. Why not?](#perf3-strings-dont-appear-to-use-the-copy-on-write-cow-optimization-why-not) 4. [Does EASTL cause code bloat, given that it uses templates?](#perf4-does-eastl-cause-code-bloat-given-that-it-uses-templates) 5. [Don't STL and EASTL containers fragment memory?](#perf5-dont-stl-and-eastl-containers-fragment-memory) 6. [I don't see container optimizations for equivalent scalar types such as pointer types. Why?](#perf6-i-dont-see-container-optimizations-for-equivalent-scalar-types-such-as-pointer-types-why) 7. [I've seen some STL's provide a default quick "node allocator" as the default allocator. Why doesn't EASTL do this?](#perf7-ive-seen-some-stls-provide-a-default-quick-node-allocator-as-the-default-allocator-why-doesnt-eastl-do-this) 8. [Templates sometimes seem to take a long time to compile. Why do I do about that?](#perf8-templates-sometimes-seem-to-take-a-long-time-to-compile-why-do-i-do-about-that) 9. [How do I assign a custom allocator to an EASTL container?](#cont8-how-do-i-assign-a-custom-allocator-to-an-eastl-container) 10. [How well does EASTL inline?](#perf10-how-well-does-eastl-inline) 11. [How do I control function inlining?](#perf11-how-do-i-control-function-inlining) 12. [C++ / EASTL seems to bloat my .obj files much more than C does.](#perf12-c--eastl-seems-to-bloat-my-obj-files-much-more-than-c-does) 13. [What are the best compiler settings for EASTL?](#perf13-what-are-the-best-compiler-settings-for-eastl) ## Problems 1. [I'm getting screwy behavior in sorting algorithms or sorted containers. What's wrong?](#prob1-im-getting-screwy-behavior-in-sorting-algorithms-or-sorted-containers-whats-wrong) 2. [I am getting compiler warnings (e.g. C4244, C4242 or C4267) that make no sense. Why?](#prob2-i-am-getting-compiler-warnings-eg-c4244-c4242-or-c4267-that-make-no-sense-why) 3. [I am getting compiler warning C4530, which complains about exception handling and "unwind semantics." What gives?](#prob3-i-am-getting-compiler-warning-c4530-which-complains-about-exception-handling-and-unwind-semantics-what-gives) 4. [Why are tree-based containers hard to read with a debugger?](#prob4-why-are-tree-based-eastl-containers-hard-to-read-with-a-debugger) 5. [The EASTL source code is sometimes rather complicated looking. Why is that?](#prob5-the-eastl-source-code-is-sometimes-rather-complicated-looking-why-is-that) 6. [When I get compilation errors, they are very long and complicated looking. What do I do?](#prob6-when-i-get-compilation-errors-they-are-very-long-and-complicated-looking-what-do-i-do) 7. [Templates sometimes seem to take a long time to compile. Why do I do about that?](#prob7-templates-sometimes-seem-to-take-a-long-time-to-compile-why-do-i-do-about-that) 8. [I get the compiler error: "template instantiation depth exceeds maximum of 17. use -ftemplate-depth-NN to increase the maximum"](#prob8-i-get-the-compiler-error-template-instantiation-depth-exceeds-maximum-of-17-use--ftemplate-depth-nn-to-increase-the-maximum) 9. [I'm getting errors about min and max while compiling.](#prob9-im-getting-errors-about-min-and-max-while-compiling) 10. [C++ / EASTL seems to bloat my .obj files much more than C does.](#prob10-c--eastl-seems-to-bloat-my-obj-files-much-more-than-c-does) 11. [I'm getting compiler errors regarding operator new being previously defined.](#prob11-im-getting-compiler-errors-regarding-placement-operator-new-being-previously-defined) 12. [I'm getting errors related to wchar_t string functions such as wcslen().](#prob12-im-getting-errors-related-to-wchar_t-string--functions-such-as-wcslen) 13. [I'm getting compiler warning C4619: there is no warning number Cxxxx (e.g. C4217).](#prob13-im-getting-compiler-warning-c4619-there-is-no-warning-number-cxxxx-eg-c4217) 14. [My stack-based fixed_vector is not respecting the object alignment requirements.](#prob14-my-stack-based-fixed_vector-is-not-respecting-the-object-alignment-requirements) 15. [I am getting compiler errors when using GCC under XCode (Macintosh/iphone).](#prob15-i-am-getting-compiler-errors-when-using-gcc-under-xcode-macintoshiphone) 16. [I am getting linker errors about Vsnprintf8 or Vsnprintf16.](#prob16-i-am-getting-linker-errors-about-vsnprintf8-or-vsnprintf16) 17. [I am getting compiler errors about UINT64_C or UINT32_C.](#prob17-i-am-getting-compiler-errors-about-uint64_c-or-uint32_c) 18. [I am getting a crash with a global EASTL container.](#prob18-i-am-getting-a-crash-with-a-global-eastl-container) 19. [Why doesn't EASTL support passing NULL to functions with pointer arguments?](#prob19-why-doesnt-eastl-support-passing-null-string-functions) ## Debug 1. [How do I get VC++ mouse-overs to view templated data?](#debug1-how-do-i-set-the-vc-debugger-to-display-eastl-container-data-with-tooltips) 2. [How do I view containers if the visualizer/tooltip support is not present?](#debug2-how-do-i-view-containers-if-the-visualizertooltip-support-is-not-present) 3. [The EASTL source code is sometimes rather complicated looking. Why is that?](#debug3-the-eastl-source-code-is-sometimes-rather-complicated-looking-why-is-that) 4. [When I get compilation errors, they are very long and complicated looking. What do I do?](#debug4-when-i-get-compilation-errors-they-are-very-long-and-complicated-looking-what-do-i-do) 5. [How do I measure hash table balancing?](#debug5-how-do-i-measure-hash-table-balancing) ## Containers 1. [Why do some containers have "fixed" versions (e.g. fixed_list) but others(e.g. deque) don't have fixed versions?](#cont1-why-do-some-containers-have-fixed-versions-eg-fixed_list-but-otherseg-deque-dont-have-fixed-versions) 2. [Can I mix EASTL with standard C++ STL?](#cont2-can-i-mix-eastl-with-standard-c-stl) 3. [Why are there so many containers?](#cont3-why-are-there-so-many-containers) 4. [Don't STL and EASTL containers fragment memory?](#cont4-dont-stl-and-eastl-containers-fragment-memory) 5. [I don't see container optimizations for equivalent scalar types such as pointer types. Why?](#cont5-i-dont-see-container-optimizations-for-equivalent-scalar-types-such-as-pointer-types-why) 6. [What about alternative container and algorithm implementations (e.g. treaps, skip lists, avl trees)?](#cont6-what-about-alternative-container-and-algorithm-implementations-eg-treaps-skip-lists-avl-trees) 7. [Why are containers hard to read with a debugger?](#cont7-why-are-tree-based-eastl-containers-hard-to-read-with-a-debugger) 8. [How do I assign a custom allocator to an EASTL container?](#cont8-how-do-i-assign-a-custom-allocator-to-an-eastl-container) 9. [How do I set the VC++ debugger to display EASTL container data with tooltips?](#cont9-how-do-i-set-the-vc-debugger-to-display-eastl-container-data-with-tooltips) 10. [How do I use a memory pool with a container?](#cont10-how-do-i-use-a-memory-pool-with-a-container) 11. [How do I write a comparison (operator<()) for a struct that contains two or more members?](#cont11-how-do-i-write-a-comparison-operator-for-a-struct-that-contains-two-or-more-members) 12. [Why doesn't container X have member function Y?](#cont12-why-doesnt-container-x-have-member-function-y) 13. [How do I search a map of strings via a char pointer efficiently? If I use map.find("hello") it creates a temporary string, which is inefficient.](#cont13-how-do-i-search-a-map-of-strings-via-a-char-pointer-efficiently-if-i-use-mapfindhello-it-creates-a-temporary-string-which-is-inefficient) 14. [Why are set and hash_set iterators const (i.e. const_iterator)?](#cont14-why-are-set-and-hash_set-iterators-const-ie-const_iterator) 15. [How do I prevent my hash container from re-hashing?](#cont15-how-do-i-prevent-my-hash-container-from-re-hashing) 16. [Which uses less memory, a map or a hash_map?](#cont16-which-uses-less-memory-a-map-or-a-hash_map) 17. [How do I write a custom hash function?](#cont17-how-do-i-write-a-custom-hash-function) 18. [How do I write a custom compare function for a map or set?](#cont18-how-do-i-write-a-custom-compare-function-for-a-map-or-set) 19. [How do I force my vector or string capacity down to the size of the container?](#cont19-how-do-i-force-my-vector-or-string-capacity-down-to-the-size-of-the-container) 20. [How do I iterate a container while (selectively) removing items from it?](#cont20-how-do-i-iterate-a-container-while-selectively-removing-items-from-it) 21. [How do I store a pointer in a container?](#cont21-how-do-i-store-a-pointer-in-a-container) 22. [How do I make a union of two containers? difference? intersection?](#cont22-how-do-i-make-a-union-of-two-containers-difference-intersection) 23. [How do I override the default global allocator?](#cont23-how-do-i-override-the-default-global-allocator) 24. [How do I do trick X with the string container?](#cont24-how-do-i-do-trick-x-with-the-string-container) 25. [How do EASTL smart pointers compare to Boost smart pointers?](#cont25-how-do-eastl-smart-pointers-compare-to-boost-smart-pointers) 26. [How do your forward-declare an EASTL container?](#cont26-how-do-your-forward-declare-an-eastl-container) 27. [How do I make two containers share a memory pool?](#cont27-how-do-i-make-two-containers-share-a-memory-pool) 28. [Can I use a std (STL) allocator with EASTL?](#cont28-can-i-use-a-std-stl-allocator-with-eastl) 29. [What are the requirements of classes stored in containers?](#what-are-the-requirements-of-classes-stored-in-containers) ## Algorithms 1. [I'm getting screwy behavior in sorting algorithms or sorted containers. What's wrong?](#algo1-im-getting-screwy-behavior-in-sorting-algorithms-or-sorted-containers-whats-wrong) 2. [How do I write a comparison (operator<()) for a struct that contains two or more members?](#algo2-how-do-i-write-a-comparison-operator-for-a-struct-that-contains-two-or-more-members) 3. [How do I sort something in reverse order?](#algo3-how-do-i-sort-something-in-reverse-order) 4. [I'm getting errors about min and max while compiling.](#algo4-im-getting-errors-about-min-and-max-while-compiling) 5. [Why don't algorithms take a container as an argument instead of iterators? A container would be more convenient.](#algo5-why-dont-algorithms-take-a-container-as-an-argument-instead-of-iterators-a-container-would-be-more-convenient) 6. [Given a container of pointers, how do I find an element by value (instead of by pointer)?](#algo6-given-a-container-of-pointers-how-do-i-find-an-element-by-value-instead-of-by-pointer) 7. [When do stored objects need to support opertor < vs. when do they need to support operator ==?](#algo7-when-do-stored-objects-need-to-support-operator--vs-when-do-they-need-to-support-operator-) 8. [How do I sort via pointers or array indexes instead of objects directly?](#algo8-how-do-i-sort-via-pointers-or-array-indexes-instead-of-objects-directly) ## Iterators 1. [What's the difference between iterator, const iterator, and const_iterator?](#iter1-whats-the-difference-between-iterator-const-iterator-and-const_iterator) 2. [How do I tell from an iterator what type of thing it is iterating?](#iter2-how-do-i-tell-from-an-iterator-what-type-of-thing-it-is-iterating) 3. [How do I iterate a container while (selectively) removing items from it?](#iter3-how-do-i-iterate-a-container-while-selectively-removing-items-from-it) 4. [What is an insert_iterator?](#iter4-what-is-an-insert_iterator) ## Information ### Info.1 What is EASTL? EASTL refers to "EA Standard Template Library." It is a C++ template library that is analogous to the template facilities of the C++ standard library, which are often referred to as the STL. EASTL consists of the following systems: * Containers * Iterators * Algorithms * Utilities * Smart pointers * Type traits Of these, the last two (smart pointers and type traits) do not have analogs in standard C++. With respect to the other items, EASTL provides extensions and optimizations over the equivalents in standard C++ STL. EASTL is a professional-level implementation which outperforms commercial implementations (where functionality overlaps) and is significantly easier to read and debug. ### Info.2 What uses are EASTL suitable for? EASTL is suitable for any place where templated containers and algorithms would be appropriate. Thus any C++ tools could use it and many C++ game runtimes could use it, especially 2005+ generation game platforms. EASTL has optimizations that make it more suited to the CPUs and memory systems found on console platforms. Additionally, EASTL has some type-traits and iterator-traits-derived template optimizations that make it generally more efficient than home-brew templated containers. ### Info.3 How does EASTL differ from standard C++ STL? There are three kinds of ways that EASTL differs from standard STL: * EASTL equivalents to STL sometimes differ. * EASTL implementations sometimes differ from STL implementations of the same thing. * EASTL has functionality that doesn't exist in STL. With respect to item #1, the changes are such that they benefit game development and and not the type that could silently hurt you if you were more familiar with STL interfaces. With respect to item #2, where EASTL implementations differ from STL implementations it is almost always due to improvements being made in the EASTL versions or tradeoffs being made which are considered better for game development. With respect to item #3, there are a number of facilities that EASTL has that STL doesn't have, such as intrusive_list and slist containers, smart pointers, and type traits. All of these are facilities that assist in making more efficient game code and data. Ways in which EASTL is better than standard STL: * Has higher performance in release builds, sometimes dramatically so. * Has significantly higher performance in debug builds, due to less call overhead. * Has extended per-container functionality, particularly for game development. * Has additional containers that are useful for high performance game development. * Is easier to read, trace, and debug. * Memory allocation is much simpler and more controllable. * Has higher portability, as there is a single implementation for all platforms. * Has support of object alignment, whereas such functionality is not natively supported by STL. * We have control over it, so we can modify it as we like. * Has stricter standards for container design and behavior, particularly as this benefits game development. Ways in which EASTL is worse than standard STL: * Standard STL implementations are currently very reliable and weather-worn, whereas EASTL is less tested. * Standard STL is automatically available with just about every C++ compiler vendor's library. * Standard STL is supported by the compiler vendor and somewhat by the Internet community. #### EASTL coverage of std STL * list * vector * deque * string * set * multiset * map * multimap * bitset * queue * stack * priority_queue * memory * numeric * algorithm (all but inplace_merge, prev_permutation, next_permutation, nth_element, includes, unique_copy) * utility * functional * iterator EASTL additions/amendments to std STL * allocators work in a simpler way. * exception handling can be disabled. * all containers expose/declare their node size, so you can make a node allocator for them. * all containers have reset(), which unilaterally forgets their contents. * all containers have validate() and validate_iterator() functions. * all containers understand and respect object alignment requirements. * all containers guarantee no memory allocation upon being newly created as empty. * all containers and their iterators can be viewed in a debugger (no other STL does this, believe it or not). * linear containers guarantee linear memory. * vector has push_back(void). * vector has a data() function. * vector is actually a vector of type bool. * vector and string have set_capacity(). * string has sprintf(), append_sprintf(), trim(), compare_i(), make_lower(), make_upper(). * deque allows you to specify the subarray size. * list has a push_back(void) and push_back(void) function. * hash_map, hash_set, etc. have find_as(). EASTL coverage of TR1 (tr1 refers to proposed additions for the next C++ standard library, ~2008) * array * type_traits (there are about 30 of these) * unordered_set (EASTL calls it hash_set) * unordered_multiset * unordered_map * unordered_multimap * shared_ptr, shared_array, weak_ptr, scoped_ptr, scoped_array, intrusive_ptr EASTL additional functionality (not found elsewhere) * fixed_list * fixed_slist * fixed_vector * fixed_string * fixed_substring * fixed_set * fixed_multiset * fixed_map * fixed_multimap * fixed_hash_set * fixed_hash_multiset * fixed_hash_map * fixed_hash_multimap * vector_set * vector_multiset * vector_map * vector_multimap * intrusive_list * intrusive_slist * intrusive_sdlist * intrusive_hash_set * intrusive_hash_multiset * intrusive_hash_map * intrusive_hash_multimap * slist (STLPort's STL has this) * heap * linked_ptr, linked_array * sparse_matrix (this is not complete as of this writing) * ring_buffer * compressed_pair * call_traits * binary_search_i, change_heap, find_first_not_of, find_last_of, find_last_not_of, identical * comb_sort, bubble_sort, selection_sort, shaker_sort, bucket_sort * equal_to_2, not_equal_to_2, str_equal_to, str_equal_to_i ### Info.4 Is EASTL thread-safe? It's not simple enough to simply say that EASTL is thread-safe or thread-unsafe. However, we can say that with respect to thread safety that EASTL does the right thing. Individual EASTL containers are not thread-safe. That is, access to an instance of a container from multiple threads at the same time is unsafe if any of those accesses are modifying operations. A given container can be read from multiple threads simultaneously as well as any other standalone data structure. If a user wants to be able to have modifying access an instance of a container from multiple threads, it is up to the user to ensure that proper thread synchronization occurs. This usually means using a mutex. EASTL classes other than containers are the same as containers with respect to thread safety. EASTL functions (e.g. algorithms) are inherently thread-safe as they have no instance data and operate entirely on the stack. As of this writing, no EASTL function allocates memory and thus doesn't bring thread safety issues via that means. The user may well need to be concerned about thread safety with respect to memory allocation. If the user modifies containers from multiple threads, then allocators are going to be accessed from multiple threads. If an allocator is shared across multiple container instances (of the same type of container or not), then mutexes (as discussed above) the user uses to protect access to individual instances will not suffice to provide thread safety for allocators used across multiple instances. The conventional solution here is to use a mutex within the allocator if it is expected to be used by multiple threads. EASTL uses neither static nor global variables and thus there are no inter-instance dependencies that would make thread safety difficult for the user to implement. ### Info.5 What platforms/compilers does EASTL support? EASTL's support depends entirely on the compiler and not on the platform. EASTL works on any C++ compiler that completely conforms the C++ language standard. Additionally, EASTL is 32 bit and 64 bit compatible. Since EASTL does not use the C or C++ standard library (with a couple small exceptions), it doesn't matter what kind of libraries are provided (or not provided) by the compiler vendor. However, given that we need to work with some compilers that aren't 100% conforming to the language standard, it will be useful to make a list here of these that are supported and those that are not: | Compiler | Status | Notes | |---------|--------|-------| | GCC 2.9x | Supported | However, GCC 2.9x has some issues that you may run into that cause you to use EASTL facilities differently than a fully compliant compiler would allow. | | GCC 3.x+ | Supported | This compiler is used by the Mac OSX, and Linux platforms. | | MSVC 6.0 | Not supported | This compiler is too weak in the area of template and namespace support. | | MSVC 7.0+ | Supported | This compiler is used by the PC and Win CE platforms | | Borland 5.5+ | Not supported | Borland can successfully compile many parts of EASTL, but not all parts. | | EDG | Supported | This is the compiler front end to some other compilers, such as Intel, and Comeau C++. | | IBM XL 5.0+ | Supported | This compiler is sometimes used by PowerPC platforms such as Mac OSX and possibly future console platforms. | ### Info.6 Why is there EASTL when there is the STL? The STL is largely a fine library for general purpose C++. However, we can improve upon it for our uses and gain other advantages as well. The primary motivations for the existence of EASTL are the following: * Some STL implementations (especially Microsoft STL) have inferior performance characteristics that make them unsuitable for game development. EASTL is faster than all existing STL implementations. * The STL is sometimes hard to debug, as most STL implementations use cryptic variable names and unusual data structures. * STL allocators are sometimes painful to work with, as they have many requirements and cannot be modified once bound to a container. * The STL includes excess functionality that can lead to larger code than desirable. It's not very easy to tell programmers they shouldn't use that functionality. * The STL is implemented with very deep function calls. This results is unacceptable performance in non-optimized builds and sometimes in optimized builds as well. * The STL doesn't support alignment of contained objects. * STL containers won't let you insert an entry into a container without supplying an entry to copy from. This can be inefficient. * Useful STL extensions (e.g. slist, hash_map, shared_ptr) found in existing STL implementations such as STLPort are not portable because they don't exist in other versions of STL or aren't consistent between STL versions. * The STL lacks useful extensions that game programmers find useful (e.g. intrusive_list) but which could be best optimized in a portable STL environment. * The STL has specifications that limit our ability to use it efficiently. For example, STL vectors are not guaranteed to use contiguous memory and so cannot be safely used as an array. * The STL puts an emphasis on correctness before performance, whereas sometimes you can get significant performance gains by making things less academcially pure. * STL containers have private implementations that don't allow you to work with their data in a portable way, yet sometimes this is an important thing to be able to do (e.g. node pools). * All existing versions of STL allocate memory in empty versions of at least some of their containers. This is not ideal and prevents optimizations such as container memory resets that can greatly increase performance in some situations. * The STL is slow to compile, as most modern STL implementations are very large. * There are legal issues that make it hard for us to freely use portable STL implementations such as STLPort. * We have no say in the design and implementation of the STL and so are unable to change it to work for our needs. * Note that there isn't actually anything in the C++ standard called "STL." STL is a term that merely refers to the templated portion of the C++ standard library. ### Info.7 Can I mix EASTL with standard C++ STL? This is possible to some degree, though the extent depends on the implementation of C++ STL. One of things that makes interoperability is something called iterator categories. Containers and algorithms recognize iterator types via their category and STL iterator categories are not recognized by EASTL and vice versa. Things that you definitely can do: * #include both EASTL and standard STL headers from the same .cpp file. * Use EASTL containers to hold STL containers. * Construct an STL reverse_iterator from an EASTL iterator. * Construct an EASTL reverse_iterator from an STL iterator. Things that you probably will be able to do, though a given std STL implementation may prevent it: * Use STL containers in EASTL algorithms. * Use EASTL containers in STL algorithms. * Construct or assign to an STL container via iterators into an EASTL container. * Construct or assign to an EASTL container via iterators into an STL container. * Things that you would be able to do if the given std STL implementation is bug-free: * Use STL containers to hold EASTL containers. Unfortunately, VC7.x STL has a confirmed bug that prevents this. Similarly, STLPort versions prior to v5 have a similar but. Things that you definitely can't do: * Use an STL allocator directly with an EASTL container (though you can use one indirectly). * Use an EASTL allocator directly with an STL container (though you can use one indirectly). ### Info.8 Where can I learn more about STL and EASTL? EASTL is close enough in philosophy and functionality to standard C++ STL that most of what you read about STL applies to EASTL. This is particularly useful with respect to container specifications. It would take a lot of work to document EASTL containers and algorithms in fine detail, whereas most standard STL documentation applies as-is to EASTL. We won't cover the differences here, as that's found in another FAQ entry. That being said, we provide a list of sources for STL documentation that may be useful to you, especially if you are less familiar with the concepts of STL and template programming in general. * The SGI STL web site. Includes a good STL reference. * CodeProject STL introduction. * Scott Meyers Effective STL book. * The Microsoft online STL documentation. Microsoft links go bad every couple months, so try searching for STL at the * Microsoft MSDN site. * The Dinkumware online STL documentation. * The C++ standard, which is fairly readable. You can buy an electronic version for about $18 and in the meantime you can make do with draft revisions of it off the Internet by searching for "c++ draft standard". * STL performance tips, by Pete Isensee * STL algorithms vs. hand-written loops, by Scott Meyers. ### Info.9 What is the legal status of EASTL? EASTL is usable for all uses within Electronic Arts, both for internal usage and for shipping products for all platforms. All source code was written by a single EA engineer. Any externally derived code would be explicitly stated as such and approved by the legal department if such code ever gets introduced. As of EASTL v1.0, the red_black_tree.cpp file contains two functions derived from the original HP STL and have received EA legal approval for usage in any product. ### Info.10 Does EASTL deal with compiler exception handling settings? EASTL has automatic knowledge of the compiler's enabling/disabling of exceptions. If your compiler is set to disable exceptions, EASTL automatically detects so and executes without them. Also, you can force-enable or force-disable that setting to override the automatic behavior by #defining EASTL_EXCEPTIONS_ENABLED to 0 or 1. See EASTL's config.h for more information. ### Info.11 What C++ language features does EASTL use (e.g. virtual functions)? EASTL uses the following C++ language features: * Template functions, classes, member functions. * Multiple inheritance. * Namespaces. * Operator overloading. EASTL does not use the following C++ language features: * Virtual functions / interfaces. * RTTI (dynamic_cast). * Global and static variables. There are a couple class static const variables, but they act much like enums. * Volatile declarations * Template export. * Virtual inheritance. EASTL may use the following C++ language features: * Try/catch. This is an option that the user can enable and it defaults to whatever the compiler is set to use. * Floating point math. Hash containers have one floating point calculation, but otherwise floating point is not used. Notes: * EASTL uses rather little of the standard C or C++ library and uses none of the C++ template library (STL) and iostream library. The memcpy family of functions is one example EASTL C++ library usage. * EASTL never uses global new / delete / malloc / free. All allocations are done via user-specified allocators, though a default allocator definition is available. ### Info.12 What compiler warning levels does EASTL support? For VC++ EASTL should compile without warnings on level 4, and should compile without warnings for "warnings disabled by default" except C4242, C4514, C4710, C4786, and C4820. These latter warnings are somewhat draconian and most EA projects have little choice but to leave them disabled. For GCC, EASTL should compile without warnings with -Wall. Extensive testing beyond that hasn't been done. However, due to the nature of templated code generation and due to the way compilers compile templates, unforeseen warnings may occur in user code that may or may not be addressible by modifying EASTL. ### Info.13 Is EASTL compatible with Lint? As of EASTL 1.0, minimal lint testing has occurred. Testing with the November 2005 release of Lint (8.00t) demonstrated bugs in Lint that made its analysisnot very useful. For example, Lint seems to get confused about the C++ typename keyword and spews many errors with code that uses it. We will work with the makers of Lint to get this resolved so that Lint can provide useful information about EASTL. ### Info.14 What compiler settings do I need to compile EASTL? EASTL consists mostly of header files with templated C++ code, but there are also a few .cpp files that need to be compiled and linked in order to use some of the modules. EASTL will compile in just about any environment. As mentioned elsewhere in this FAQ, EASTL can be compiled at the highest warning level of most compilers, transparently deals with compiler exception handling settings, is savvy to most or all compilation language options (e.g. wchar_t is built-in or not, for loop variables are local or not), and has almost no platform-specific or compiler-specific code. For the most part, you can just drop it in and it will work. The primary thing that needs to be in place is that EASTL .cpp files need to be compiled with the same struct padding/alignment settings as other code in the project. This of course is the same for just about any C++ source code library. See the Performance section of this FAQ for a discussion of the optimal compiler settings for EASTL performance. ### Info.15 How hard is it to incorporate EASTL into my project? It's probably trivial. EASTL has only one dependency: EABase. And EASTL auto-configures itself for most compiler environments and for the most typical configuration choices. Since it is fairly highly warning-free, you won't likely need to modify your compiler warning settings, even if they're pretty stict. EASTL has a few .cpp files which need to be compiled if you want to use the modules associated with those files. You can just compile those files with your regular compiler settings. Alternatively, you can use one of the EASTL project files. In its default configuration, the only thing you need to provide to make EASTL work is to define implementations of the following operator new functions: ```cpp #include void* operator new[](size_t size, const char* pName, int flags, unsigned debugFlags, const char* file, int line); void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line); ``` The flags and debugFlags arguments correspond to PPMalloc/RenderWare GeneralAllocator/GeneralAllocatorDebug Malloc equivalents. ### Info.16 Should I use EASTL instead of std STL or instead of my custom library? There are reasons you may want to use EASTL; there are reasons you may not want to use it. Ditto for std STL or any other library. Here we present a list of reasons (+ and -) for why you might want to use one or another. However, it should be noted that while EASTL contains functionality found in std STL, it has another ~40% of functionality not found in std STL, so EASTL and std STL (and whatever other template library you may have) are not mutually exclusive. **EASTL** * \+ Has higher performance than any commercial STL, especially on console platforms. * \+ Has extended functionality tailored for game development. * \+ Is highly configurable, and we own it so it can be amended at will. Std STL is owned by a third party committee. * \+ Is much easier to read and debug than other similar libraries, especiallly std STL. * \- Is highly unit tested, but does not have the same level as std STL. * \- Is more complicated than many users' lite template libraries, and may put off some beginners. * \- EASTL **Std STL** * \+ Is highly portable; your STL code will likely compile and run anywhere. * \+ Works without the need to install or download any package to use it. It just works. * \+ Is highly reliable and supported by the compiler vendor. You can have confidence in it. * \+ Some std STL versions (e.g. STLPort, VC8 STL) have better runtime debug checking than EASTL. * \- Has (sometimes greatly) variable implementations, behavior, and performance between implementations. * \- Is usually hard to read and debug. * \- Doesn't support some of the needs of game development, such as aligned allocations, named allocations, intrusive containers, etc. * \- Is not as efficient as EASTL, especially on console platforms. **Your own library** (please forgive us for implying there may be weaknesses in your libraries) * \+ You have control over it and can make it work however you want. * \+ You can fix bugs in it on the spot and have the fix in your codebase immediately. * \+ Your own library can be highly integrated into your application code or development environment. * \- Many custom libraries don't have the same level of testing as libraries such as std STL or EASTL. * \- Many custom libraries don't have the same breadth or depth as std STL or especially EASTL. * \- Many custom libraries don't have the level of performance tuning that std STL or especially EASTL has. ### Info.17 I think I've found a bug. What do I do? **Verify that you indeed have a bug** There are various levels of bugs that can occur, which include the following: * Compiler warnings generated by EASTL. * Compiler errors generated by EASTL (failure to compile well-formed code). * Runtime misbehavior by EASTL (function does the wrong thing). * Runtime crash or data corruption by EASTL. * Mismatch between EASTL documentation and behavior. * Mismatch between EASTL behavior and user's expections (mis-design). Any of the above items can be the fault of EASTL. However, the first four can also be the fault of the user. Your primary goal in verifying a potential bug is to determine if it is an EASTL bug or a user bug. Template errors can sometimes be hard to diagnose. It's probably best if you first show the problem to somebody you know to make sure you are not missing something obvious. Creating a reproducible case may be useful in helping convince yourself, but as is mentioned below, this is not required in order to report the bug. **Report the bug** The first place to try is the standard EA centralized tech support site. As of this writing (10/2005), that tech site is http://eatech/. Due to the frequent technology churn that seems to occur within Electronic Arts, the bug reporting system in place when you read this may not be the one that was in place when this FAQ entry was written. If the tech site route fails, consider directly contacting the maintainer of the EASTL package. In reporting a bug, it is nice if there is a simple reproducible case that can be presented. However, such a case requires time to create, and so you are welcome to initially simply state what you think the bug is without producing a simple reproducible case. It may be that this is a known bug or it may be possible to diagnose the bug without a reproducible case. If more information is needed then the step of trying to produce a reproducible case may be necessary. ### Info.18 Can EASTL be used by third party EA developers? EASTL and other core technologies authored by EA (and not licensed from other companies) can be used in source and binary form by designated 3rd parties. The primary case where there is an issue is if the library contains platform specific code for a platform that the 3rd party is not licensed for. In that case the platform-specific code would need to be removed. This doesn’t apply to EASTL, nor many of the other core tech packages. ## Performance ### Perf.1 How efficient is EASTL compared to standard C++ STL implementations? With respect to the functionality that is equivalent between EASTL and standard STL, the short answer to this is that EASTL is as at least as efficient as othe STL implementations and in a number of aspects is more so. EASTL has functionality such as intrusive_list and linked_ptr that don't exist in standard STL but are explicitly present to provide significant optimizations over standard STL. The medium length answer is that EASTL is significantly more efficient than Dinkumware STL, and Microsoft Windows STL. EASTL is generally more efficient than Metrowerks STL, but Metrowerks has a few tricks up its sleeve which EASTL doesn't currently implement. EASTL is roughly equal in efficiency to STLPort and GCC 3.x+ STL, though EASTL has some optimizations that these do not. The long answer requires a breakdown of the functionality between various versions of the STL. ### Perf.2 How efficient is EASTL in general? This question is related to the question, "How efficient are templates?" If you understand the effects of templates then you can more or less see the answer for EASTL. Templates are more efficient than the alternative when they are used appropriately, but can be less efficient than the alternative when used under circumstances that don't call for them. The strength of templates is that the compiler sees all the code and data types at compile time and can often reduce statements to smaller and faster code than with conventional non-templated code. The weakness of templates is that the sometimes produce more code and can result in what is often called "code bloat". However, it's important to note that unused template functions result in no generated nor linked code, so if you have a templated class with 100 functions but you only use one, only that one function will be compiled. EASTL is a rather efficient implementation of a template library and pulls many tricks of the trade in terms of squeezing optimal performance out of the compiler. The only way to beat it is to write custom code for the data types you are working with, and even then people are sometimes surprised to find that their hand-implemented algorithm works no better or even worse than the EASTL equivalent. But certainly there are ways to beat templates, especially if you resort to assembly language programming and some kinds of other non-generic tricks. ### Perf.3 Strings don't appear to use the "copy-on-write" (CoW) optimization. Why not? **Short answer** CoW provides a benefit for a small percentage of uses but provides a disadvantage for the large majority of uses. **Long answer** The primary benefit of CoW is that it allows for the sharing of string data between two string objects. Thus if you say this: ```cpp string a("hello"); string b(a); ``` the "hello" will be shared between a and b. If you then say this: ```cpp a = "world"; ``` then *a* will release its reference to "hello" and leave b with the only reference to it. Normally this functionality is accomplished via reference counting and with atomic operations or mutexes. The C++ standard does not say anything about basic_string and CoW. However, for a basic_string implementation to be standards-conforming, a number of issues arise which dictate some things about how one would have to implement a CoW string. The discussion of these issues will not be rehashed here, as you can read the references below for better detail than can be provided in the space we have here. However, we can say that the C++ standard is sensible and that anything we try to do here to allow for an efficient CoW implementation would result in a generally unacceptable string interface. The disadvantages of CoW strings are: * A reference count needs to exist with the string, which increases string memory usage. * With thread safety, atomic operations and mutex locks are expensive, especially on weaker memory systems such as console gaming platforms. * All non-const string accessor functions need to do a sharing check the the first such check needs to detach the string. Similarly, all string assignments need to do a sharing check as well. If you access the string before doing an assignment, the assignment doesn't result in a shared string, because the string has already been detached. * String sharing doesn't happen the large majority of the time. In some cases, the total sum of the reference count memory can exceed any memory savings gained by the strings that share representations. The addition of a cow_string class is under consideration for EASTL. There are conceivably some systems which have string usage patterns which would benefit from CoW sharing. Such functionality is best saved for a separate string implementation so that the other string uses aren't penalized. References This is a good starting HTML reference on the topic: http://www.gotw.ca/publications/optimizations.htm Here is a well-known Usenet discussion on the topic: http://groups-beta.google.com/group/comp.lang.c++.moderated/browse_thread/thread/3dc6af5198d0bf7/886c8642cb06e03d ### Perf.4 Does EASTL cause code bloat, given that it uses templates? The reason that templated functions and classes might cause an increase in code size because each template instantiation theoretically creates a unique piece of code. For example, when you compile this code: ```cpp template const T min(const T a, const T b) { return b < a ? b : a; } int i = min(3, 4); double d = min(3.0, 4.0); ``` the compiler treats it as if you wrote this: ```cpp int min(const int a, const int b) { return b < a ? b : a; } double min(const double a, const double b) { return b < a ? b : a; } ``` Imagine this same effect happening with containers such as list and map and you can see how it is that templates can cause code proliferation. A couple things offset the possibility of code proliferation: inlining and folding. In practice the above 'min' function would be converted to inlined functions by the compiler which occupy only a few CPU instructions. In many of the simplest cases the inlined version actually occupies less code than the code required to push parameters on the stack and execute a function call. And they will execute much faster as well. Code folding (a.k.a. "COMDAT folding", "duplicate stripping", "ICF" / "identical code folding") is a compiler optimization whereby the compiler realizes that two independent functions have compiled to the same code and thus can be reduced to a single function. The Microsoft VC++ compiler (Since VS2005), and GCC (v 4.5+) can do these kinds of optimizations on all platforms. This can result, for example, in all templated containers of pointers (e.g. vector, vector, etc.) to be linked as a single implementation. This folding occurs at a function level and so individual member functions can be folded while other member functions are not. A side effect of this optimization is that you aren't likely to gain much much declaring containers of void* instead of the pointer type actually contained. The above two features reduce the extent of code proliferation, but certainly don't eliminate it. What you need to think about is how much code might be generated vs. what your alternatives are. Containers like vector can often inline completely away, whereas more complicated containers such as map can only partially be inlined. In the case of map, if you need an such a container for your Widgets, what alternatives do you have that would be more efficient than instantiating a map? This is up to you to answer. It's important to note that C++ compilers will throw away any templated functions that aren't used, including unused member functions of templated classes. However, some argue that by having many functions available to the user that users will choose to use that larger function set rather than stick with a more restricted set. Also, don't be confused by syntax bloat vs. code bloat. In looking at templated libraries such as EASTL you will notice that there is sometimes a lot of text in the definition of a template implementation. But the actual underlying code is what you need to be concerned about. There is a good Usenet discussion on this topic at: http://groups.google.com/group/comp.lang.c++.moderated/browse_frm/thread/2b00649a935997f5 ### Perf.5 Don't STL and EASTL containers fragment memory? They only fragment memory if you use them in a way that does so. This is no different from any other type of container used in a dynamic way. There are various solutions to this problem, and EASTL provides additional help as well: * For vectors, use the reserve function (or the equivalent constructor) to set aside a block of memory for the container. The container will not reallocate memory unless you try grow beyond the capacity you reserve. * EASTL has "fixed" variations of containers which allow you to specify a fixed block of memory which the container uses for its memory. The container will not allocate any memory with these types of containers and all memory will be cache-friendly due to its locality. * You can assign custom allocators to containers instead of using the default global allocator. You would typically use an allocator that has its own private pool of memory. * Where possible, add all a container's elements to it at once up front instead of adding them over time. This avoids memory fragmentation and increase cache coherency. ### Perf.6 I don't see container optimizations for equivalent scalar types such as pointer types. Why? Metrowerks (and no other, as of this writing) STL has some container specializations for type T* which maps them to type void*. The idea is that a user who declares a list of Widget* and a list of Gadget* will generate only one container: a list of void*. As a result, code generation will be smaller. Often this is done only in optimized builds, as such containers are harder to view in debug builds due to type information being lost. The addition of this optimization is under consideration for EASTL, though it might be noted that optimizing compilers such as VC++ are already capable of recognizing duplicate generated code and folding it automatically as part of link-time code generation (LTCG) (a.k.a. "whole program optimization"). This has been verified with VC++, as the following code and resulting disassembly demonstrate: ```cpp eastl::list intPtrList; eastl::list toPtrList; eastl_size_t n1 = intPtrList.size(); eastl_size_t n2 = toPtrList.size(); 0042D288 lea edx,[esp+14h] 0042D28C call eastl::list::size (414180h) 0042D291 push eax 0042D292 lea edx,[esp+24h] 0042D296 call eastl::list::size (414180h) ``` Note that in the above case the compiler folded the two implementations of size() into a single implementation. ### Perf.7 I've seen some STL's provide a default quick "node allocator" as the default allocator. Why doesn't EASTL do this? **Short answer** This is a bad, misguided idea. **Long answer** These node allocators implement a heap for all of STL with buckets for various sizes of allocations and implemented fixed-size pools for each of these buckets. These pools are attractive at first because they do well in STL comparison benchmarks, especially when thread safety is disabled. Such benchmarks make it impossible to truly compare STL implementations because you have two different allocators in use and in some cases allocator performance can dominate the benchmark. However, the real problem with these node allocators is that they badly fragment and waste memory. The technical discussion of this topic is outside the scope of this FAQ, but you can learn more about it by researching memory management on the Internet. Unfortunately, the people who implement STL libraries are generally not experts on the topic of memory management. A better approach, especially for game development, is for the user to decide when fixed-size pools are appropriate and use them via custom allocator assignment to containers. ### Perf.8 Templates sometimes seem to take a long time to compile. Why do I do about that? C++ compilers are generally slower than C compilers, and C++ templates are generally slower to compile than regular C++ code. EASTL has some extra functionality (such as type_traits and algorithm specializations) that is not found in most other template libraries and significantly improves performance and usefulness but adds to the amount of code that needs to be compiled. Ironically, we have a case where more source code generates faster and smaller object code. The best solution to the problem is to use pre-compiled headers, which are available on all modern ~2002+) compilers, such as VC6.0+, GCC 3.2+, and Metrowerks 7.0+. In terms of platforms this means all 2002+ platforms. Some users have been speeding up build times by creating project files that put all the source code in one large .cpp file. This has an effect similar to pre-compiled headers. It can go even faster than pre-compiled headers but has downsides in the way of convenience and portability. ### Perf.10 How well does EASTL inline? EASTL is written in such as way as to be easier to inline than typical templated libraries such as STL. How is this so? It is so because EASTL reduces the inlining depth of many functions, particularly the simple ones. In doing so it makes the implementation less "academic" but entirely correct. An example of this is the vector operator[] function, which is implemented like so with Microsoft STL: ```cpp reference operator[](size_type n) { return *(begin() + n); } ``` EASTL implements the function directly, like so: ```cpp reference operator[](size_type n) { return *(mpBegin + n); } ``` Both implementations are correct, but hte EASTL implementation will run faster in debug builds, be easier to debug, and will be more likely to be inlined when the usage of this function is within a hierarchy of other functions being inlined. It is not so simple to say that the Microsoft version will always inline in an optimized build, as it could be part of a chain and cause the max depth to be exceeded. That being said, EASTL appears to inline fairly well under most circumstances, including with GCC, which is the poorest of the compilers in its ability to inline well. ### Perf.11 How do I control function inlining? Inlining is an important topic for templated code, as such code often relies on the compiler being able to do good function inlining for maximum performance. GCC, VC++, and Metrowerks are discussed here. We discuss compilation-level inlining and function-level inling here, though the latter is likely to be of more use to the user of EASTL, as it can externally control how EASTL is inlined. A related topic is GCC's template expansion depth, discussed elsewhere in this FAQ. We provide descriptions of inlining options here but don't currently have any advice on how to best use these with EASTL. Compilation-Level Inlining -- VC++ VC++ has some basic functionality to control inlining, and the compiler is pretty good at doing aggressive inlining when optimizing on for all platforms. > **#pragma inline_depth( [0... 255] )** > > Controls the number of times inline expansion can occur by controlling the number of times that a series of function calls can be expanded (from 0 to 255 times). This pragma controls the inlining of functions marked inline and or inlined automatically under the /Ob2 option. The inline_depth pragma controls the number of times a series of function calls can be expanded. For example, if the inline depth is 4, and if A calls B and B then calls C, all three calls will be expanded inline. However, if the closest inline expansion is 2, only A and B are expanded, and C remains as a function call. > **#pragma inline_recursion( [{on | off}] )** > > Controls the inline expansion of direct or mutually recursive function calls. Use this pragma to control functions marked as inline and or functions that the compiler automatically expands under the /Ob2 option. Use of this pragma requires an /Ob compiler option setting of either 1 or 2. The default state for inline_recursion is off. The inline_recursion pragma controls how recursive functions are expanded. If inline_recursion is off, and if an inline function calls itself (either directly or indirectly), the function is expanded only once. If inline_recursion is on, the function is expanded multiple times until it reaches the value set by inline_depth, the default value of 8, or a capacity limit. Compilation-Level Inlining -- GCC GCC has a large set of options to control function inlining. Some options are available only in GCC 3.0 and later and thus not present on older platforms. > **-fno-default-inline** > > Do not make member functions inline by default merely because they are defined inside the class scope (C++ only). Otherwise, when you specify -O, member functions defined inside class scope are compiled inline by default; i.e., you don't need to add 'inline' in front of the member function name. > > **-fno-inline** > > Don't pay attention to the inline keyword. Normally this option is used to keep the compiler from expanding any functions inline. Note that if you are not optimizing, no functions can be expanded inline. > > **-finline-functions** > > Integrate all simple functions into their callers. The compiler heuristically decides which functions are simple enough to be worth integrating in this way. If all calls to a given function are integrated, and the function is declared static, then the function is normally not output as assembler code in its own right. Enabled at level -O3. > > **-finline-limit=n** > > By default, GCC limits the size of functions that can be inlined. This flag allows the control of this limit for functions that are explicitly marked as inline (i.e., marked with the inline keyword or defined within the class definition in c++). n is the size of functions that can be inlined in number of pseudo instructions (not counting parameter handling). pseudo-instructions are an internal representation of function size. The default value of n is 600. Increasing this value can result in more inlined code at the cost of compilation time and memory consumption. Decreasing usually makes the compilation faster and less code will be inlined (which presumably means slower programs). This option is particularly useful for programs that use inlining heavily such as those based on recursive templates with C++. > > Inlining is actually controlled by a number of parameters, which may be specified individually by using --param name=value. The -finline-limit=n option sets some of these parameters as follows: > > ``` > max-inline-insns-single > is set to n/2. > max-inline-insns-auto > is set to n/2. > min-inline-insns > is set to 130 or n/4, whichever is smaller. > max-inline-insns-rtl > is set to n. > ``` > > See --param below for a documentation of the individual parameters controlling inlining. > > **-fkeep-inline-functions** > > Emit all inline functions into the object file, even if they are inlined where used. > > **--param name=value** > > In some places, GCC uses various constants to control the amount of optimization that is done. For example, GCC will not inline functions that contain more that a certain number of instructions. You can control some of these constants on the command-line using the --param option. > > max-inline-insns-single > Several parameters control the tree inliner used in gcc. This number sets the maximum number of instructions (counted in GCC's internal representation) in a single function that the tree inliner will consider for inlining. This only affects functions declared inline and methods implemented in a class declaration (C++). The default value is 450. > > max-inline-insns-auto > When you use -finline-functions (included in -O3), a lot of functions that would otherwise not be considered for inlining by the compiler will be investigated. To those functions, a different (more restrictive) limit compared to functions declared inline can be applied. The default value is 90. > >large-function-insns > The limit specifying really large functions. For functions larger than this limit after inlining inlining is constrained by --param large-function-growth. This parameter is useful primarily to avoid extreme compilation time caused by non-linear algorithms used by the backend. This parameter is ignored when -funit-at-a-time is not used. The default value is 2700. > > large-function-growth > Specifies maximal growth of large function caused by inlining in percents. This parameter is ignored when -funit-at-a-time is not used. The default value is 100 which limits large function growth to 2.0 times the original size. > > inline-unit-growth > Specifies maximal overall growth of the compilation unit caused by inlining. This parameter is ignored when -funit-at-a-time is not used. The default value is 50 which limits unit growth to 1.5 times the original size. > > max-inline-insns-recursive > max-inline-insns-recursive-auto > Specifies maximum number of instructions out-of-line copy of self recursive inline function can grow into by performing recursive inlining. For functions declared inline --param max-inline-insns-recursive is taken into acount. For function not declared inline, recursive inlining happens only when -finline-functions (included in -O3) is enabled and --param max-inline-insns-recursive-auto is used. The default value is 450. > > max-inline-recursive-depth > max-inline-recursive-depth-auto > Specifies maximum recursion depth used by the recursive inlining. For functions declared inline --param max-inline-recursive-depth is taken into acount. For function not declared inline, recursive inlining happens only when -finline-functions (included in -O3) is enabled and --param max-inline-recursive-depth-auto is used. The default value is 450. > > inline-call-cost > Specify cost of call instruction relative to simple arithmetics operations (having cost of 1). Increasing this cost disqualify inlinining of non-leaf functions and at same time increase size of leaf function that is believed to reduce function size by being inlined. In effect it increase amount of inlining for code having large abstraction penalty (many functions that just pass the argumetns to other functions) and decrease inlining for code with low abstraction penalty. Default value is 16. > > **-finline-limit=n** > > By default, GCC limits the size of functions that can be inlined. This flag allows the control of this limit for functions that are explicitly marked as inline (i.e., marked with the inline keyword or defined within the class definition in c++). n is the size of functions that can be inlined in number of pseudo instructions (not counting parameter handling). The default value of n is 600. Increasing this value can result in more inlined code at the cost of compilation time and memory consumption. Decreasing usually makes the compilation faster and less code will be inlined (which presumably means slower programs). This option is particularly useful for programs that use inlining heavily such as those based on recursive templates with C++. Inlining is actually controlled by a number of parameters, which may be specified individually by using --param name=value. The -finline-limit=n option sets some of these parameters as follows: ``` max-inline-insns-single is set to n/2. max-inline-insns-auto is set to n/2. min-inline-insns is set to 130 or n/4, whichever is smaller. max-inline-insns-rtl is set to n. ``` See below for a documentation of the individual parameters controlling inlining. Note: pseudo instruction represents, in this particular context, an abstract measurement of function's size. In no way, it represents a count of assembly instructions and as such its exact meaning might change from one release to an another. GCC additionally has the -Winline compiler warning, which emits a warning whenever a function declared as inline was not inlined. Compilation-Level Inlining -- Metrowerks Metrowerks has a number of pragmas (and corresponding compiler settings) to control inlining. These include always_inline, inline_depth, inline_max_size, and inline max_total_size. > ``` > #pragma always_inline on | off | reset > ``` > > Controls the use of inlined functions. If you enable this pragma, the compiler ignores all inlining limits and attempts to inline all functions where it is legal to do so. This pragma is deprecated. Use the inline_depth pragma instead. > > ``` > #pragma inline_depth(n) > #pragma inline_depth(smart) > ``` > > Controls how many passes are used to expand inline function. Sets the number of passes used to expand inline function calls. The number n is an integer from 0 to 1024 or the smart specifier. It also represents the distance allowed in the call chain from the last function up. For example, if d is the total depth of a call chain, then functions below (d-n) are inlined if they do not exceed the inline_max_size and inline_max_total_size settings which are discussed directly below. > > ``` > #pragma inline_max_size(n); > #pragma inline_max_total_size(n); > ``` > > The first pragma sets the maximum function size to be considered for inlining; the second sets the maximum size to which a function is allowed to grow after the functions it calls are inlined. Here, n is the number of statements, operands, and operators in the function, which turns out to be roughly twice the number of instructions generated by the function. However, this number can vary from function to function. For the inline_max_size pragma, the default value of n is 256; for the inline_max_total_size pragma, the default value of n is 10000. The smart specifier is the default mode, with four passes where the passes 2-4 are limited to small inline functions. All inlineable functions are expanded if inline_depth is set to 1-1024. Function-Level Inlining -- VC++ > To force inline usage under VC++, you use this: > > ``` > __forceinline void foo(){ ... } > ``` > > It should be noted that __forceinline has no effect if the compiler is set to disable inlining. It merely tells the compiler that when inlining is enabled that it shouldn't use its judgment to decide if the function should be inlined but instead to always inline it. > > To disable inline usage under VC++, you need to use this: > > ``` > #pragma inline_depth(0) // Disable inlining. > void foo() { ... } > #pragma inline_depth() // Restore default. > ``` > > The above is essentially specifying compiler-level inlining control within the code for a specific function. **Function-Level Inlining -- GCC / Metrowerks** > To force inline usage under GCC 3.1+, you use this: > > `inline void foo() __attribute__((always_inline)) { ... }` > > or > > `inline __attribute__((always_inline)) void foo() { ... }` > > To disable inline usage under GCC 3+, you use this: > > `void foo() __attribute__((noinline)) { ... }` > > or > > `inline __attribute__((noinline)) void foo() { ... }` EABase has some wrappers for this, such as EA_FORCE_INLINE. ### Perf.12 C++ / EASTL seems to bloat my .obj files much more than C does. There is no need to worry. The way most C++ compilers compile templates, they compile all seen template code into the current .obj module, which results in larger .obj files and duplicated template code in multiple .obj files. However, the linker will (and in fact must) select only a single version of any given function for the application, and these linked functions will usually be located contiguously. Additionally, the debug information for template definitions is usually larger than that for non-templated C++ definitions, which itself is sometimes larger than C defintions due to name decoration. ### Perf.13 What are the best compiler settings for EASTL? We will discuss various aspects of this topic here. As of this writing, more EASTL research on this topic has been done on Microsoft compiler platforms (e.g. Win32) than GCC platforms. Thus currently this discussion focuses on VC++ optimization. Some of the concepts are applicable to GCC, though. EASTL has been sucessfully compiled and tested (the EASTL unit test) on our major development platforms with the highest optimization settings enabled, including GCC's infamous -O3 level. **Optimization Topics** * Function inlining. * Optimization for speed vs. optimization for size. * Link-time code generation (LTCG). * Profile-guided optimization (PGO). **Function inlining** EASTL is a template library and inlining is important for optimal speed. Compilers have various options for enabling inlining and those options are discussed in this FAQ in detail. Most users will want to enable some form of inlining when compiling EASTL and other templated libraries. For users that are most concerned about the compiler's inlining increasing code size may want to try the 'inline only functions marked as inline' compiler option. Here is a table of normalized results from the benchmark project (Win32 platform): | Inlining Disabled | Inline only 'inline' | Inline any | |------|------|------|------| | **Application size** | 100K | 86K | 86K | | **Execution time** | 100 | 75 | 75 | The above execution times are highly simplified versions of the actual benchmark data but convey a sense of the general average behaviour that can be expected. In practice, simple functions such as vector::operator[] will execute much faster with inlining enabled but complex functions such as map::insert may execute no faster within inlining enabled. **Optimization for Speed / Size** Optimization for speed results in the compiler inlining more code than it would otherwise. This results in the inlined code executing faster than if it was not inlined. As mentioned above, basic function inlining can result in smaller code as well as faster code, but after a certain point highly inlined code becomes greater in size than less inlined code and the performance advantages of inlining start to lessen. The EASTL Benchmark project is a medium sized application that is about 80% templated and thus acts as a decent measure of the practical tradeoff between speed and size. Here is a table of normalized results from the benchmark project (Windows platform): | Size | Speed | Speed + LTCG | Speed + LTCG + PGO | |------|------|------|------| | **Application size** | 80K | 100K | 98K | 98K | | **Execution time** | 100 | 90 | 83 | 75 | What the above table is saying is that if you are willing to have your EASTL code be 20% larger, it will be 10% faster. Note that it doesn't mean that your app will be 20% larger, only the templated code in it like EASTL will be 20% larger. **Link-time code generation (LTCG)** LTCG is a mechanism whereby the compiler compiles the application as if it was all in one big .cpp file instead of separate .cpp files that don't see each other. Enabling LTCG optimizations is done by simply setting some compiler and linker settings and results in slower link times. The benchmark results are presented above and for the EASTL Benchmark project show some worthwhile improvement. **Profile-guided optimization (PGO)** PGO is a mechanism whereby the compiler uses profiling information from one or more runs to optimize the compilation and linking of an application. Enabling PGO optimizations is done by setting some linker settings and doing some test runs of the application, then linking the app with the test run results. Doing PGO optimizations is a somewhat time-consuming task but the benchmark results above demonstrate that for the EASTL Benchmark project that PGO is worth the effort. ## Problems ### Prob.1 I'm getting screwy behavior in sorting algorithms or sorted containers. What's wrong? It may possible that you are seeing floating point roundoff problems. Many STL algorithms require object comparisons to act consistently. However, floating point values sometimes compare differently between uses because in one situation a value might be in 32 bit form in system memory, whereas in anther situation that value might be in an FPU register with a different precision. These are difficult problems to track down and aren't the fault of EASTL or whatever similar library you might be using. There are various solutions to the problem, but the important thing is to find a way to force the comparisons to be consistent. The code below was an example of this happening, whereby the object pA->mPos was stored in system memory while pB->mPos was stored in a register and comparisons were inconsistent and a crash ensued. ```cpp class SortByDistance : public binary_function { private: Vector3 mOrigin; public: SortByDistance(Vector3 origin) { mOrigin = origin; } bool operator()(WorldTreeObject* pA, WorldTreeObject* pB) const { return ((WorldObject*)pA)->mPos - mOrigin).GetLength() < ((WorldObject*)pB)->mPos - mOrigin).GetLength(); } }; ``` Another thing to watch out for is the following mistake: ```cpp struct ValuePair { uint32_t a; uint32_t b; }; // Improve speed by casting the struct to uint64_t bool operator<(const ValuePair& vp1, const ValuePair& vp2) { return *(uint64_t*)&vp1 < *(uint64_t*)&vp2; } ``` The problem is that the ValuePair struct has 32 bit alignment but the comparison assumes 64 bit alignment. The code above has been observed to crash on the PowerPC 64-based machines. The resolution is to declare ValuePair as having 64 bit alignment. ### Prob.2 I am getting compiler warnings (e.g. C4244, C4242 or C4267) that make no sense. Why? One cause of this occurs with VC++ when you have code compiled with the /Wp64 (detect 64 bit portability issues) option. This causes pointer types to have a hidden flag called __w64 attached to them by the compiler. So 'ptrdiff_t' is actually known by the compiler as '__w64 int', while 'int' is known by the compilers as simply 'int'. A problem occurs here when you use templates. For example, let's say we have this templated function ``` cpp template T min(const T a, const T b) { return b < a ? b : a; } ``` If you compile this code: ```cpp ptrdiff_t a = min(ptrdiff_t(0), ptrdiff_t(1)); int b = min((int)0, (int)1); ``` You will get the following warning for the second line, which is somewhat nonsensical: `warning C4244: 'initializing' : conversion from 'const ptrdiff_t' to 'int', possible loss of data` This could probably be considered a VC++ bug, but in the meantime you have little choice but to ignore the warning or disable it. ### Prob.3 I am getting compiler warning C4530, which complains about exception handling and "unwind semantics." What gives? VC++ has a compiler option (/EHsc) that allows you to enable/disable exception handling stack unwinding but still enable try/catch. This is useful because it can save a lot in the way of code generation for your application. Disabling stack unwinding will decrease the size of your executable on at least the Win32 platform by 10-12%. If you have stack unwinding disabled, but you have try/catch statements, VC++ will generate the following warning: `warning C4530: C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc` As of EASTL v1.0, this warning has been disabled within EASTL for EASTL code. However, non-EASTL code such as std STL code may still cause this warning to be triggered. In this case there is not much you can do about this other than to disable the warning. ### Prob.4 Why are tree-based EASTL containers hard to read with a debugger? **Short answer** Maximum performance and design mandates. **Long answer** You may notice that when you have a tree-based container (e.g. set, map) in the debugger that it isn't automatically able to recognize the tree nodes as containing instances of your contained object. You can get the debugger to do what you want with casting statements in the debug watch window, but this is not an ideal solution. The reason this is happening is that node-based containers always use an anonymous node type as the base class for container nodes. This is primarily done for performance, as it allows the node manipulation code to exist as a single non-templated library of functions and it saves memory because containers will have one or two base nodes as container 'anchors' and you don't want to allocate a node of the size of the user data when you can just use a base node. See list.h for an example of this and some additional in-code documentation on this. Additionally, EASTL has the design mandate that an empty container constructs no user objects. This is both for performance reasons and because it doing so would skew the user's tracking of object counts and might possibly break some expectation the user has about object lifetimes. Currently this debug issue exists only with tree-based containers. Other node-based containers such as list and slist use a trick to get around this problem in debug builds. See [Debug.2](#debug2-how-do-i-view-containers-if-the-visualizertooltip-support-is-not-present) for more. ### Prob.5 The EASTL source code is sometimes rather complicated looking. Why is that? **Short answer** Maximum performance. **Long answer** EASTL uses templates, type_traits, iterator categories, redundancy reduction, and branch reduction in order to achieve optimal performance. A side effect of this is that there are sometimes a lot of template parameters and multiple levels of function calls due to template specialization. The ironic thing about this is that this makes the code (an optimized build, at least) go faster, not slower. In an optimized build the compiler will see through the calls and template parameters and generate a direct optimized inline version. As an example of this, take a look at the implementation of the copy implementation in algorithm.h. If you are copying an array of scalar values or other trivially copyable values, the compiler will see how the code directs this to the memcpy function and will generate nothing but a memcpy in the final code. For non-memcpyable data types the compiler will automatically understand that in do the right thing. EASTL's primary objective is maximal performance, and it has been deemed worthwhile to make the code a little less obvious in order to achieve this goal. Every case where EASTL does something in an indirect way is by design and usually this is for the purpose of achieving the highest possible performance. ### Prob.6 When I get compilation errors, they are very long and complicated looking. What do I do? Assuming the bugs are all worked out of EASTL, these errors really do indicate that you have something wrong. EASTL is intentionally very strict about types, as it tries to minimize the chance of users errors. Unfortunately, there is no simple resolution to the problem of long compiler errors other than to deal with them. On the other hand, once you've dealt with them a few times, you tend to realize that most of time they are the same kinds of errors and Top five approaches to dealing with long compilation errors: 1. Look at the line where the compilation error occurred and ignore the text of the error and just look at obvious things that might be wrong. 2. Consider the most common typical causes of templated compilation errors and consider if any of these might be your problem. Usually one of them are. 3. Either read through the error (it's not as hard as it may look on the surface) or copy the error to a text file and remove the extraneous 4. Compile the code under GCC instead of MSVC, as GCC warnings and errors tend to be more helpful than MSVC's. Possibly also consider compiling an isolated version under Comeau C++'s free online compiler at www.comeaucomputing.com or the Dinkumware online compiler at http://dinkumware.com/exam/. 5. Try using an STL filter (http://www.bdsoft.com/tools/stlfilt.html) which automatically boils down template errors to simpler forms. We haven't tried this yet with EASTL. Also there is the more generic TextFilt (http://textfilt.sourceforge.net/). Top five causes of EASTL compilation errors: 1. const-correctness. Perhaps a quarter of container template errors are due to the user not specifying const correctly. 2. Missing hash function. hash_map, hash_set, etc. require that you either specify a hash function or one exists for your class. See functional.h for examples of declarations of hash functions for common data types. 3. Missing operators. Various containers and algorithms require that certain operators exist for your contained classes. For example, list requires that you can test contained objects for equivalence (i.e. operator==), while map requires that you can test contained objects for "less-ness" (operator <). If you define a Widget class and don't have a way to compare two Widgets, you will get errors when trying to put them into a map. 4. Specifying the wrong data type. For example, it is a common mistake to forget that when you insert into a map, you need to insert a pair of objects and not just your key or value type. 5. Incorrect template parameters. When declaring a template instantiation (e.g. map >) you simply need to get the template parameters correct. Also note that when you have ">>" next to each other that you need to separate them by one space (e.g. "> >"). ### Prob.7 Templates sometimes seem to take a long time to compile. Why do I do about that? C++ compilers are generally slower than C compilers, and C++ templates are generally slower to compile than regular C++ code. EASTL has some extra functionality (such as type_traits and algorithm specializations) that is not found in most other template libraries and significantly improves performance and usefulness but adds to the amount of code that needs to be compiled. Ironically, we have a case where more source code generates faster and smaller object code. The best solution to the problem is to use pre-compiled headers, which are available on all modern ~2002+) compilers, such as VC6.0+, GCC 3.2+, and Metrowerks 7.0+. In terms of platforms this means all 2002+ platforms. Some users have been speeding up build times by creating project files that put all the source code in one large .cpp file. This has an effect similar to pre-compiled headers. It can go even faster than pre-compiled headers but has downsides in the way of convenience and portability. ### Prob.8 I get the compiler error: "template instantiation depth exceeds maximum of 17. use -ftemplate-depth-NN to increase the maximum". This is a GCC error that occurs when a templated function calls a templated function which calls a templated function, etc. past a depth of 17. You can use the GCC command line argument -ftemplate-depth-40 (or some other high number) to get around this. As note below, the syntax starting with GCC 4.5 has changed slightly. The primary reason you would encounter this with EASTL is type traits that are used by algorithms. The type traits library is a (necessarily) highly templated set of types and functions which adds at most about nine levels of inlining. The copy and copy_backward algorithms have optimized pathways that add about four levels of inlining. If you have just a few more layers on top of that in container or user code then the default limit of 17 can be exceeded. We are investigating ways to reduce the template depth in the type traits library, but only so much can be done, as most compilers don't support type traits natively. Metrowerks is the current exception. From the GCC documentation: ``` -ftemplate-depth-n Set the maximum instantiation depth for template classes to n. A limit on the template instantiation depth is needed to detect endless recursions during template class instantiation ANSI/ISO C++ conforming programs must not rely on a maximum depth greater than 17. Note that starting with GCC 4.5 the syntax is -ftemplate-depth=N instead of -ftemplate-depth-n. ``` ### Prob.9 I'm getting errors about min and max while compiling. You need to define NOMINMAX under VC++ when this occurs, as it otherwise defines min and max macros that interfere. There may be equivalent issues with other compilers. Also, VC++ has a specific header file which defines min and max macros but which doesn't pay attention to NOMINMAX and so in that case there is nothing to do but not include that file or to undefine min and max. minmax.h is not a standard file and its min and max macros are not standard C or C++ macros or functions. ### Prob.10 C++ / EASTL seems to bloat my .obj files much more than C does. There is no need to worry. The way most C++ compilers compile templates, they compile all seen template code into the current .obj module, which results in larger .obj files and duplicated template code in multiple .obj files. However, the linker will (and must) select only a single version of any given function for the application, and these linked functions will usually be located contiguously. ### Prob.11 I'm getting compiler errors regarding placement operator new being previously defined. This can happen if you are attempting to define your own versions of placement new/delete. The C++ language standard does not allow the user to override these functions. Section 18.4.3 of the standard states: > Placement forms > 1. These functions are reserved, a C++ program may not define functions that displace the versions in the Standard C++ library. You may find that #defining __PLACEMENT_NEW_INLINE seems to fix your problems under VC++, but it can fail under some circumstances and is not portable and fails with other compilers, which don't have an equivalent workaround. ### Prob.12 I'm getting errors related to wchar_t string functions such as wcslen(). EASTL requires EABase-related items that the following be so. If not, then EASTL gets confused about what types it can pass to wchar_t related functions. * The #define EA_WCHAR_SIZE is equal to sizeof(wchar_t). * If sizeof(wchar_t) == 2, then char16_t is typedef'd to wchar_t. * If sizeof(wchar_t) == 4, then char32_t is typedef'd to wchar_t. EABase v2.08 and later automatically does this for most current generation and all next generation platforms. With GCC 2.x, the user may need to predefine EA_WCHAR_SIZE to the appropriate value, due to limitations with the GCC compiler. Note that GCC defaults to sizeof(wchar_t) ==4, but it can be changed to 2 with the -fshort_wchar compiler command line argument. If you are using EASTL without EABase, you will need to make sure the above items are correctly defined. ### Prob.13 I'm getting compiler warning C4619: there is no warning number Cxxxx (e.g. C4217). Compiler warning C4619 is a VC++ warning which is saying that the user is attempting to enable or disable a warning which the compiler doesn't recognize. This warning only occurs if the user has the compiler set to enable warnings that are normally disabled, regardless of the warning level. The problem, however, is that there is no easy way for user code to tell what compiler warnings any given compiler version will recognize. That's why Microsoft normally disables this warning. The only practical solution we have for this is for the user to disable warning 4619 globally or an a case-by-case basis. EA build systems such as nant/framework 2's eaconfig will usually disable 4619. In general, global enabling of 'warnings that are disabled by default' often result in quandrys such as this. ### Prob.14 My stack-based fixed_vector is not respecting the object alignment requirements. EASTL fixed_* containers rely on the compiler-supplied alignment directives, such as that implemented by EA_PREFIX_ALIGN. This is normally a good thing because it allows the memory to be local with the container. However, as documented by Microsoft at http://msdn2.microsoft.com/en-us/library/83ythb65(VS.71).aspx, this doesn't work for stack variables. The two primary means of working around this are: * Use something like AlignedObject<> from the EAStdC package's EAAllocator.h file. * Use eastl::vector with a custom allocator and have it provide aligned memory. EASTL automatically recognizes that the objects are aligned and will call the aligned version of your allocator allocate() function. You can get this aligned memory from the stack, if you need it, somewhat like how AlignedObject<> works. ### Prob.15 I am getting compiler errors when using GCC under XCode (Macintosh/iphone). The XCode environment has a compiler option which causes it to evaluate include directories recursively. So if you specify /a/b/c as an include directory, it will consider all directories underneath c to also be include directories. This option is enabled by default, though many XCode users disable it, as it is a somewhat dangerous option. The result of enabling this option with EASTL is that is used by the compiler when you say #include . The solution is to disable this compiler option. It's probably a good idea to disable this option anyway, as it typically causes problems for users yet provides minimal benefits. ### Prob.16 I am getting linker errors about Vsnprintf8 or Vsnprintf16. EASTL requires the user to provide a function called Vsnprintf8 if the string::sprintf function is used. vsnprintf is not a standard C function, but most C standard libraries provide some form of it, though in some ways their implementations differ, especially in what the return value means. Also, most implementations of vsnprintf are slow, mostly due to mutexes related to locale functionality. And you can't really use vendor vsnprintf on an SPU due to the heavy standard library size. EASTL is stuck because it doesn't want to depend on something with these problems. EAStdC provides a single consistent fast lightweight, yet standards-conforming, implementation in the form of Vsnprintf(char8_t*, ...), but EASTL can't have a dependency on EAStdC. So the user must provide an implementation, even if all it does is call EAStdC's Vsnprintf or the vendor vsnprintf for that matter. Example of providing Vsnprintf8 via EAStdC: ```cpp #include int Vsnprintf8(char8_t* pDestination, size_t n, const char8_t* pFormat, va_list arguments) { return EA::StdC::Vsnprintf(pDestination, n, pFormat, arguments); } int Vsnprintf16(char16_t* pDestination, size_t n, const char16_t* pFormat, va_list arguments) { return EA::StdC::Vsnprintf(pDestination, n, pFormat, arguments); } ``` Example of providing Vsnprintf8 via C libraries: ```cpp #include int Vsnprintf8(char8_t* p, size_t n, const char8_t* pFormat, va_list arguments) { #ifdef _MSC_VER return vsnprintf_s(p, n, _TRUNCATE, pFormat, arguments); #else return vsnprintf(p, n, pFormat, arguments); #endif } int Vsnprintf16(char16_t* p, size_t n, const char16_t* pFormat, va_list arguments) { #ifdef _MSC_VER return vsnwprintf_s(p, n, _TRUNCATE, pFormat, arguments); #else return vsnwprintf(p, n, pFormat, arguments); // Won't work on Unix because its libraries implement wchar_t as int32_t. #endif } ``` ### Prob.17 I am getting compiler errors about UINT64_C or UINT32_C. This is usually an order-of-include problem that comes about due to the implementation of __STDC_CONSTANT_MACROS in C++ Standard libraries. The C++ header file defineds UINT64_C only if __STDC_CONSTANT_MACROS has been defined by the user or the build system; the compiler doesn't automatically define it. The failure you are seeing occurs because user code is #including a system header before #including EABase and without defining __STDC_CONSTANT_MACROS itself or globally. EABase defines __STDC_CONSTANT_MACROS and #includes the appropriate system header. But if the system header was already previously #included and __STDC_CONSTANT_MACROS was not defined, then UINT64_C doesn't get defined by anybody. The real solution that the C++ compiler and standard library wants is for the app to globally define __STDC_CONSTANT_MACROS itself in the build. ### Prob.18 I am getting a crash with a global EASTL container. This usually due to compiler's lack of support for global (and static) C++ class instances. The crash is happening because the global variable exists but its constructor was not called on application startup and it's member data is zeroed bytes. To handle this you need to manually initialize such variables. There are two primary ways: Failing code: ```cpp eastl::list gIntList; // Global variable. void DoSomething() { gIntList.push_back(1); // Crash. gIntList was never constructed. } ``` Declaring a pointer solution: ```cpp eastl::list* gIntList = NULL; void DoSomething() { if(!gIntList) // Or move this to an init function. gIntList = new eastl::list; gIntList->push_back(1); // Success } ``` Manual constructor call solution: ```cpp eastl::list gIntList; void InitSystem() { new(&gIntList) eastl::list; } void DoSomething() { gIntList.push_back(1); // Success } ``` ### Prob.19 Why doesn't EASTL support passing NULL string functions? The primary argument is to make functions safer for use. Why crash on NULL pointer access when you can make the code safe? That's a good argument. The counter argument, which EASTL currently makes, is: > It breaks consistency with the C++ STL library and C libraries, which require strings to be valid. > > It makes the coder slower and bigger for all users, though few need NULL checks. The specification for how to handle NULL is simple for some cases but not simple for others. Operator < below a case where the proper handling of it in a consistent way is not simple, as all comparison code (<, >, ==, !=, >=, <=) in EASTL must universally and consistently handle the case where either or both sides are NULL. A NULL string seems similar to an empty string, but doesn't always work out so simply. > > What about other invalid string pointers? NULL is merely one invalid value of many, with its only distinction being that sometimes it's intentionally NULL (as opposed to being NULL due to not being initialized). How and where to implement the NULL checks in such a way as to do it efficiently is not always simple, given that public functions call public functions. > > It's arguable (and in fact the the intent of the C++ standard library) that using pointers that are NULL is a user/app mistake. If we really want to be safe then we should be using string objects for everything. You may not entirely buy this argument in practice, but on the other hand one might ask why is the caller of EASTL using a NULL pointer in the first place? The answer of course is that somebody gave it to him. ## Debug ### Debug.1 How do I set the VC++ debugger to display EASTL container data with tooltips? See [Cont.9](#cont9-how-do-i-set-the-vc-debugger-to-display-eastl-container-data-with-tooltips) ### Debug.2 How do I view containers if the visualizer/tooltip support is not present? Here is a table of answers about how to manually inspect containers in the debugger. | Container | Approach | |------|------| | slist
fixed_slist | slist is a singly-linked list. Look at the slist mNode variable. You can walk the list by looking at mNode.mpNext, etc. | | list
fixed_list | list is a doubly-linked list. Look at the list mNode variable. You can walk the list forward by looking at mNode.mpNext, etc. and backward by looking at mpPrev, etc. | | intrusive_list
intrusive_slist† | Look at the list mAnchor node. This lets you walk forward and backward in the list via mpNext and mpPrev. | | array | View the array mValue member in the debugger. It's simply a C style array. | | vector
fixed_vector | View the vector mpBegin value in the debugger. If the string is long, use ", N" to limit the view length, as with someVector.mpBegin, 32 | | vector_set
vector_multiset
vector_map
vector_multimap | These are containers that are implemented as a sorted vector, deque, or array. They are searched via a standard binary search. You can view them the same way you view a vector or deque. | | deque | deque is implemented as an array of arrays, where the arrays implement successive equally-sized segments of the deque. The mItBegin deque member points the deque begin() position. | | bitvector | Look at the bitvector mContainer variable. If it's a vector, then see vector above. | | bitset | Look at the bitset mWord variable. The bitset is nothing but one or more uint32_t mWord items. | | set
multiset
fixed_set
fixed_multiset | The set containers are implemented as a tree of elements. The set mAnchor.mpNodeParent points to the top of the tree; the mAnchor.mpNodeLeft points to the far left node of the tree (set begin()); the mAnchor.mpNodeRight points to the right of the tree (set end()). | | map
multimap
fixed_map
fixed_multimap | The map containers are implemented as a tree of pairs, where pair.first is the map key and pair.second is the map value. The map mAnchor.mpNodeParent points to the top of the tree; the mAnchor.mpNodeLeft points to the far left node of the tree (map begin()); the mAnchor.mpNodeRight points to the right of the tree (map end()). | | hash_map
hash_multimap
fixed_hash_map
fixed_hash_multimap | hash tables in EASTL are implemented as an array of singly-linked lists. The array is the mpBucketArray member. Each element in the list is a pair, where the first element of the pair is the map key and the second is the map value. | | intrusive_hash_map
intrusive_hash_multimap
intrusive_hash_set
intrusive_hash_multiset | intrusive hash tables in EASTL are implemented very similarly to regular hash tables. See the hash_map and hash_set entries for more info. | | hash_set
hash_multiset
fixed_hash_set
fixed_hash_map | hash tables in EASTL are implemented as an array of singly-linked lists. The array is the mpBucketArray member. | | basic_string
fixed_string
fixed_substring | View the string mpBegin value in the debugger. If the string is long, use ", N" to limit the view length, as with someString.mpBegin, 32 | | heap | A heap is an array of data (e.g. EASTL vector) which is organized in a tree whereby the highest priority item is array[0], The next two highest priority items are array[1] and [2]. Underneath [1] in priority are items [3] and [4], and underneath item [2] in priority are items [5] and [6]. etc. | | stack | View the stack member c value in the debugger. That member will typically be a list or deque. | | queue | View the queue member c value in the debugger. That member will typically be a list or deque. | | priority_queue | View the priority_queue member c value in the debugger. That member will typically be a vector or deque which is organized as a heap. See the heap section above for how to view a heap. | | smart_ptr | View the mpValue member. | ### Debug.3 The EASTL source code is sometimes rather complicated looking. Why is that? **Short answer** Maximum performance. **Long answer** EASTL uses templates, type_traits, iterator categories, redundancy reduction, and branch reduction in order to achieve optimal performance. A side effect of this is that there are sometimes a lot of template parameters and multiple levels of function calls due to template specialization. The ironic thing about this is that this makes the code (an optimized build, at least) go faster, not slower. In an optimized build the compiler will see through the calls and template parameters and generate a direct optimized inline version. As an example of this, take a look at the implementation of the copy implementation in algorithm.h. If you are copying an array of scalar values or other trivially copyable values, the compiler will see how the code directs this to the memcpy function and will generate nothing but a memcpy in the final code. For non-memcpyable data types the compiler will automatically understand that in do the right thing. EASTL's primary objective is maximal performance, and it has been deemed worthwhile to make the code a little less obvious in order to achieve this goal. Every case where EASTL does something in an indirect way is by design and usually this is for the purpose of achieving the highest possible performance. ### Debug.4 When I get compilation errors, they are very long and complicated looking. What do I do? Assuming the bugs are all worked out of EASTL, these errors really do indicate that you have something wrong. EASTL is intentionally very strict about types, as it tries to minimize the chance of users errors. Unfortunately, there is no simple resolution to the problem of long compiler errors other than to deal with them. On the other hand, once you've dealt with them a few times, you tend to realize that most of time they are the same kinds of errors and Top five approaches to dealing with long compilation errors: 1.Look at the line where the compilation error occurred and ignore the text of the error and just look at obvious things that might be wrong. 2. Consider the most common typical causes of templated compilation errors and consider if any of these might be your problem. Usually one of them are. 3. Either read through the error (it's not as hard as it may look on the surface) or copy the error to a text file and remove the extraneous 4. Compile the code under GCC instead of MSVC, as GCC warnings and errors tend to be more helpful than MSVC's. Possibly also consider compiling an isolated version under Comeau C++'s free online compiler at www.comeaucomputing.com or the Dinkumware online compiler at http://dinkumware.com/exam/. 5. Try using an STL filter (http://www.bdsoft.com/tools/stlfilt.html) which automatically boils down template errors to simpler forms. We haven't tried this yet with EASTL. Also there is the more generic TextFilt (http://textfilt.sourceforge.net/). Top five causes of EASTL compilation errors: 1. const-correctness. Perhaps a quarter of container template errors are due to the user not specifying const correctly. 2. Missing hash function. hash_map, hash_set, etc. require that you either specify a hash function or one exists for your class. See functional.h for examples of declarations of hash functions for common data types. 3. Missing operators. Various containers and algorithms require that certain operators exist for your contained classes. For example, list requires that you can test contained objects for equivalence (i.e. operator==), while map requires that you can test contained objects for "less-ness" (operator <). If you define a Widget class and don't have a way to compare two Widgets, you will get errors when trying to put them into a map. 4. Specifying the wrong data type. For example, it is a common mistake to forget that when you insert into a map, you need to insert a pair of objects and not just your key or value type. 5. Incorrect template parameters. When declaring a template instantiation (e.g. map >) you simply need to get the template parameters correct. Also note that when you have ">>" next to each other that you need to separate them by one space (e.g. "> >"). ### Debug.5 How do I measure hash table balancing? The following functionality lets you spelunk hash container layout. * There is the load_factor function which tells you the overall hashtable load, but doesn't tell you if a load is unevenly distributed. * You can control the load factor and thus the automated bucket redistribution with set_load_factor. * The local_iterator begin(size_type n) and local_iterator end(size_type) functions lets you iterate each bucket individually. You can use this to examine the elements in a bucket. * You can use the above to get the size of any bucket, but there is also simply the bucket_size(size_type n) function. * The bucket_count function tells you the count of buckets. So with this you can completely visualize the layout of the hash table. * There is also iterator find_by_hash(hash_code_t c), for what it's worth. The following function draws an ASCII bar graph of the hash table for easy visualization of bucket distribution: ```cpp #include #include #include template void VisualizeHashTableBuckets(const HashTable& h) { eastl_size_t bucketCount = h.bucket_count(); eastl_size_t largestBucketSize = 0; for(eastl_size_t i = 0; i < bucketCount; i++) largestBucketSize = eastl::max_alt(largestBucketSize, h.bucket_size(i)); YourPrintFunction("\n --------------------------------------------------------------------------------\n"); for(eastl_size_t i = 0; i < bucketCount; i++) { const eastl_size_t k = h.bucket_size(i) * 80 / largestBucketSize; char buffer[16]; sprintf(buffer, "%3u|", (unsigned)i); YourPrintFunction(buffer); for(eastl_size_t j = 0; j < k; j++) YourPrintFunction("*"); YourPrintFunction("\n"); } YourPrintFunction(" --------------------------------------------------------------------------------\n"); } ``` This results in a graph that looks like the following (with one horizontal bar per bucket). This hashtable has a large number of collisions in each of its 10 buckets. ``` ------------------------------------------------------ 0|******************************************** 1|************************************************ 2|*************************************** 3|******************************************** 4|***************************************************** 5|************************************************* 6|**************************************** 7|*********************************************** 8|******************************************** 9|************************************** 10|******************************************** ----------------------------------------------------- ``` ## Containers ### Cont.1 Why do some containers have "fixed" versions (e.g. fixed_list) but others(e.g. deque) don't have fixed versions? Recall that fixed containers are those that are implemented via a single contiguous block of memory and don't use a general purpose heap to allocate memory from. For example, fixed_list is a list container that implements its list by a user-configurable fixed block of memory. Such containers have an upper limit to how many items they can hold, but have the advantage of being more efficient with memory use and memory access coherency. The reason why some containers don't have fixed versions is that such functionality doesn't make sense with these containers. Containers which don't have fixed versions include: ``` array, deque, bitset, stack, queue, priority_queue, intrusive_list, intrusive_hash_map, intrusive_hash_set, intrusive_hash_multimap, intrusive_hash_multimap, vector_map, vector_multimap, vector_set, vector_multiset. ``` Some of these containers are adapters which wrap other containers and thus there is no need for a fixed version because you can just wrap a fixed container. In the case of intrusive containers, the user is doing the allocation and so there are no memory allocations. In the case of array, the container is a primitive type which doesn't allocate memory. In the case of deque, it's primary purpose for being is to dynamically resize and thus the user would likely be better of using a fixed_vector. ### Cont.2 Can I mix EASTL with standard C++ STL? This is possible to some degree, though the extent depends on the implementation of C++ STL. One of things that makes interoperability is something called iterator categories. Containers and algorithms recognize iterator types via their category and STL iterator categories are not recognized by EASTL and vice versa. Things that you definitely can do: * #include both EASTL and standard STL headers from the same .cpp file. * Use EASTL containers to hold STL containers. * Construct an STL reverse_iterator from an EASTL iterator. * Construct an EASTL reverse_iterator from an STL iterator. Things that you probably will be able to do, though a given std STL implementation may prevent it: * Use STL containers in EASTL algorithms. * Use EASTL containers in STL algorithms. * Construct or assign to an STL container via iterators into an EASTL container. * Construct or assign to an EASTL container via iterators into an STL container. Things that you would be able to do if the given std STL implementation is bug-free: * Use STL containers to hold EASTL containers. Unfortunately, VC7.x STL has a confirmed bug that prevents this. Similarly, STLPort versions prior to v5 have a similar but. Things that you definitely can't do: * Use an STL allocator directly with an EASTL container (though you can use one indirectly). * Use an EASTL allocator directly with an STL container (though you can use one indirectly). ### Cont.3 Why are there so many containers? EASTL has a large number of container types (e.g vector, list, set) and often has a number of variations of given types (list, slist, intrusive_list, fixed_list). The reason for this is that each container is tuned and to a specific need and there is no single container that works for all needs. The more the user is concerned about squeezing the most performance out of their system, the more the individual container variations become significant. It's important to note that having additional container types generally does not mean generating additional code or code bloat. Templates result in generated code regardless of what templated class they come from, and so for the most part you get optimal performance by choosing the optimal container for your needs. ### Cont.4 Don't STL and EASTL containers fragment memory? They only fragment memory if you use them in a way that does so. This is no different from any other type of container used in a dynamic way. There are various solutions to this problem, and EASTL provides additional help as well: For vectors, use the reserve function (or the equivalent constructor) to set aside a block of memory for the container. The container will not reallocate memory unless you try grow beyond the capacity you reserve. EASTL has "fixed" variations of containers which allow you to specify a fixed block of memory which the container uses for its memory. The container will not allocate any memory with these types of containers and all memory will be cache-friendly due to its locality. You can assign custom allocators to containers instead of using the default global allocator. You would typically use an allocator that has its own private pool of memory. Where possible, add all a container's elements to it at once up front instead of adding them over time. This avoids memory fragmentation and increase cache coherency. ### Cont.5 I don't see container optimizations for equivalent scalar types such as pointer types. Why? Metrowerks (and no other, as of this writing) STL has some container specializations for type T* which maps them to type void*. The idea is that a user who declares a list of Widget* and a list of Gadget* will generate only one container: a list of void*. As a result, code generation will be smaller. Often this is done only in optimized builds, as such containers are harder to view in debug builds due to type information being lost. The addition of this optimization is under consideration for EASTL, though it might be noted that optimizing compilers such as VC++ are already capable of recognizing duplicate generated code and folding it automatically as part of link-time code generation (LTCG) (a.k.a. "whole program optimization"). This has been verified with VC++, as the following code and resulting disassembly demonstrate: ```cpp eastl::list intPtrList; eastl::list toPtrList; eastl_size_t n1 = intPtrList.size(); eastl_size_t n2 = toPtrList.size(); 0042D288 lea edx,[esp+14h] 0042D28C call eastl::list::size (414180h) 0042D291 push eax 0042D292 lea edx,[esp+24h] 0042D296 call eastl::list::size (414180h) ``` Note that in the above case the compiler folded the two implementations of size() into a single implementation. ### Cont.6 What about alternative container and algorithm implementations (e.g. treaps, skip lists, avl trees)? EASTL chooses to implement some alternative containers and algorithms and not others. It's a matter of whether or not the alternative provides truly complementary or improved functionality over existing containers. The following is a list of some implemented and non-implemented alternatives and the rationale behind each: Implemented: * intrusive_list, etc. -- Saves memory and improves cache locality. * vector_map, etc. -- Saves memory and improves cache locality. * ring_buffer -- Useful for some types of operations and has no alternative. * shell_sort -- Useful sorting algorithm. * sparse_matrix -- Useful for some types of operations and has no alternative. Not implemented: * skip lists (alternative to red-black tree) -- These use more memory and usually perform worse than rbtrees. * treap (alternative to red-black tree) -- These are easier and smaller than rbtrees, but perform worse. * avl tree (alternative to red-black tree) -- These have slightly better search performance than rbtrees, but significantly worse * * insert/remove performance. * btree (alternative to red-black tree) -- These are no better than rbtrees. If you have an idea of something that should be implemented, please suggest it or even provide at least a prototypical implementation. ### Cont.7 Why are tree-based EASTL containers hard to read with a debugger? **Short answer** Maximum performance and design mandates. **Long answer** You may notice that when you have a tree-based container (e.g. set, map) in the debugger that it isn't automatically able to recognize the tree nodes as containing instances of your contained object. You can get the debugger to do what you want with casting statements in the debug watch window, but this is not an ideal solution. The reason this is happening is that node-based containers always use an anonymous node type as the base class for container nodes. This is primarily done for performance, as it allows the node manipulation code to exist as a single non-templated library of functions and it saves memory because containers will have one or two base nodes as container 'anchors' and you don't want to allocate a node of the size of the user data when you can just use a base node. See list.h for an example of this and some additional in-code documentation on this. Additionally, EASTL has the design mandate that an empty container constructs no user objects. This is both for performance reasons and because it doing so would skew the user's tracking of object counts and might possibly break some expectation the user has about object lifetimes. Currently this debug issue exists only with tree-based containers. Other node-based containers such as list and slist use a trick to get around this problem in debug builds. ### Cont.8 How do I assign a custom allocator to an EASTL container? There are two ways of doing this: 1. Use the set_allocator function that is present in each container. 2. Specify a new allocator type via the Allocator template parameter that is present in each container. For item #1, EASTL expects that you provide an instance of an allocator of the type that EASTL recognizes. This is simple but has the disadvantage that all such allocators must be of the same class. The class would need to have C++ virtual functions in order to allow a given instance to act differently from another instance. For item #2, you specify that the container use your own allocator class. The advantage of this is that your class can be implemented any way you want and doesn't require virtual functions for differentiation from other instances. Due to the way C++ works your class would necessarily have to use the same member function names as the default allocator class type. In order to make things easier, we provide a skeleton allocator here which you can copy and fill in with your own implementation. ```cpp class custom_allocator { public: custom_allocator(const char* pName = EASTL_NAME_VAL("custom allocator")) { #if EASTL_NAME_ENABLED mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME; #endif // Possibly do something here. } custom_allocator(const allocator& x, const char* pName = EASTL_NAME_VAL("custom allocator")); { #if EASTL_NAME_ENABLED mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME; #endif // Possibly copy from x here. } ~custom_allocator(); { // Possibly do something here. } custom_allocator& operator=(const custom_allocator& x) { // Possibly copy from x here. return *this; } void* allocate(size_t n, int flags = 0) { // Implement the allocation here. } void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0) { // Implement the allocation here. } void deallocate(void* p, size_t n) { // Implement the deallocation here. } const char* get_name() const { #if EASTL_NAME_ENABLED return mpName; #else return "custom allocator"; #endif } void set_name(const char* pName) { #if EASTL_NAME_ENABLED mpName = pName; #endif } protected: // Possibly place instance data here. #if EASTL_NAME_ENABLED const char* mpName; // Debug name, used to track memory. #endif }; inline bool operator==(const allocator& a, const allocator& b) { // Provide a comparison here. } inline bool operator!=(const allocator& a, const allocator& b) { // Provide a negative comparison here. } ``` Here's an example of how to use the above custom allocator: ```cpp // Declare a Widget list and have it default construct. list widgetList; // Declare a Widget list and have it construct with a copy of some global allocator. list widgetList2(gSomeGlobalAllocator); // Declare a Widget list and have it default construct, but assign // an underlying implementation after construction. list widgetList; widgetList.get_allocator().mpIAllocator = new WidgetAllocatorImpl; ``` ### Cont.9 How do I set the VC++ debugger to display EASTL container data with tooltips? Visual Studio supports this via the AutoExp.dat file, an example of which is [present](./html/AutoExp.dat) with this documentation. Sometimes the AutoExp.dat doesn't seem to work. Avery Lee's explanation: > If I had to take a guess, the problem is most likely in the cast to the concrete node type. These are always tricky because, for some strange reason, the debugger is whitespace sensitive with regard to specifying template types. You might try manually checking one of the routines of the specific map instantiation and checking that the placement of whitespace and const within the template expression still matches exactly. In some cases the compiler uses different whitespace rules depending on the value type which makes it impossible to correctly specify a single visualizer – this was the case for eastl::list<>, for which I was forced to include sections for both cases. The downside is that you have a bunch of (error) entries either way. ### Cont.10 How do I use a memory pool with a container? Using custom memory pools is a common technique for decreasing memory fragmentation and increasing memory cache locality. EASTL gives you the flexibility of defining your own memory pool systems for containers. There are two primary ways of doing this: * Assign a custom allocator to a container. eastl::fixed_pool provides an implementation. * Use one of the EASTL fixed containers, such as fixed_list. **Custom Allocator** In the custom allocator case, you will want to create a memory pool and assign it to the container. For purely node-based containers such as list, slist, map, set, multimap, and multiset, your pool simply needs to be able to allocate list nodes. Each of these containers has a member typedef called node_type which defines the type of node allocated by the container. So if you have a memory pool that has a constructor that takes the size of pool items and the count of pool items, you would do this (assuming that MemoryPool implements the Allocator interface): ```cpp typedef list WidgetList; // Declare your WidgetList type. MemoryPool myPool(sizeof(WidgetList::node_type), 100); // Make a pool of 100 Widget nodes. WidgetList myList(&myPool); // Create a list that uses the pool. ``` In the case of containers that are array-based, such as vector and basic_string, memory pools don't work very well as these containers work on a realloc-basis instead of by adding incremental nodes. What what want to do with these containers is assign a sufficient block of memory to them and reserve() the container's capacity to the size of the memory. In the case of mixed containers which are partly array-based and partly node based, such as hash containers and deque, you can use a memory pool for the nodes but will need a single array block to supply for the buckets (hash containers and deque both use a bucket-like system). You might consider using eastl::fixed_pool as such an allocator, as it provides such functionality and allows the user to provide the actual memory used for the pool. Here is some example code: ```cpp char buffer[256]; list myList; myList.get_allocator().init(buffer, 256); Fixed Container In the fixed container case, the container does all the work for you. To use a list which implements a private pool of memory, just declare it like so: fixed_list fixedList; // Declare a fixed_list that can hold 100 Widgets ``` ### Cont.11 How do I write a comparison (operator<()) for a struct that contains two or more members? See [Algo.2](#algo2-how-do-i-write-a-comparison-operator-for-a-struct-that-contains-two-or-more-members). ### Cont.12 Why doesn't container X have member function Y? Why don't the list or vector containers have a find() function? Why doesn't the vector container have a sort() function? Why doesn't the string container have a mid() function? These are common examples of such questions. The answer usually boils down to two reasons: * The functionality exists in a more centralized location elsewhere, such as the algorithms. * The functionality can be had by using other member functions. In the case of find and sort functions not being part of containers, the find algorithm and sort algorithm are centralized versions that apply to any container. Additionally, the algorithms allow you to specify a sub-range of the container on which to apply the algorithm. So in order to find an element in a list, you would do this: `list::iterator i = find(list.begin(), list.end(), 3);` And in order to sort a vector, you would do this: ```cpp quick_sort(v.begin(), v.end()); // Sort the entire array. quick_sort(&v[3], &v[8]); // Sort the items at the indexes in the range of [3, 8). ``` In the case of functionality that can be had by using other member functions, note that EASTL follows the philosophy that duplicated functionality should not exist in a container, with exceptions being made for cases where mistakes and unsafe practices commonly happen if the given function isn't present. In the case of string not having a mid function, this is because there is a string constructor that takes a sub-range of another string. So to make a string out of the middle of another, you would do this: `string strMid(str, 3, 5); // Make a new string of the characters from the source range of [3, 3+5).` It might be noted that the EASTL string class is unique among EASTL containers in that it sometimes violates the minimum functionality rule. This is so because the std C++ string class similarly does so and EASTL aims to be compatible. ### Cont.13 How do I search a map of strings via a char pointer efficiently? If I use map.find("hello") it creates a temporary string, which is inefficient. The problem is illustrated with this example: ```cpp map swMap; ... auto it = swMap.find("blue"); // A temporary string object is created here, which allocates. ``` In this example, the find function expects a string object and not a string literal and so the compiler uses the string conversion operator to (silently!) construct a temporary string object. There are several solutions to this problem: * Make the map a map of char pointers instead of string objects. Don't forget to write a custom compare or else the default comparison function will compare pointer values instead of string contents. * Declare the map type with a [transparent comparison type](https://en.cppreference.com/w/cpp/utility/functional#Transparent_function_objects) (eg. [`less`](https://en.cppreference.com/w/cpp/utility/functional/less_void)). The above example becomes: ```cpp map> swMap; ... auto it = swMap.find("blue"); // No string object created. Uses heterogeneous lookup, which calls less::operator()(const char*). ``` This is advantageous over `find_as` because eastl containers support this optimization for additional member functions that take a `key_type` parameter, ie. heterogeneous lookup, insertion and erasure. * Use the EASTL `find_as` function, which allows you to find an item in a container via an alternative key than the one the container uses. Using a [transparent comparison type](https://en.cppreference.com/w/cpp/utility/functional#Transparent_function_objects) with the container is safer than using `find_as` because the latter requires the user specify a comparison object which must have the same semantics as the container's comparison object, otherwise the behaviour is undefined. ### Cont.14 Why are set and hash_set iterators const (i.e. const_iterator)? The situation is illustrated with this example: ```cpp set intSet; intSet.insert(1); set::iterator i = intSet.begin(); *i = 2; // Error: iterator i is const. ``` In this example, the iterator is a regular iterator and not a const_iterator, yet the compiler gives an error when trying to change the iterator value. The reason this is so is that a set is an ordered container and changing the value would make it out of order. Thus, set and multiset iterators are always const_iterators. If you need to change the value and are sure the change will not alter the container order, use const_cast or declare mutable member variables for your contained object. This resolution is the one blessed by the C++ standardization committee. ### Cont.15 How do I prevent my hash container from re-hashing? If you want to make a hashtable never re-hash (i.e. increase/reallocate its bucket count), call set_max_load_factor with a very high value such as 100000.f. Similarly, you can control the bucket growth factor with the rehash_policy function. By default, when buckets reallocate, they reallocate to about twice their previous count. You can control that value as with the example code here: ```cpp hash_set hashSet; hashSet.rehash_policy().mfGrowthFactor = 1.5f ``` ### Cont.16 Which uses less memory, a map or a hash_map? A hash_map will virtually always use less memory. A hash_map will use an average of two pointers per stored element, while a map uses three pointers per stored element. ### Cont.17 How do I write a custom hash function? You can look at the existing hash functions in functional.h, but we provide a couple examples here. To write a specific hash function for a Widget class, you would do this: ```cpp struct WidgetHash { size_t operator()(const Widget& w) const { return w.id; } }; hash_set widgetHashSet; ``` To write a generic (templated) hash function for a set of similar classes (in this case that have an id member), you would do this: ```cpp template struct GeneralHash { size_t operator()(const T& t) const { return t.id; } }; hash_set > widgetHashSet; hash_set > doggetHashSet; ``` ### Cont.18 How do I write a custom compare function for a map or set? The sorted containers require that an operator< exist for the stored values or that the user provide a suitable custom comparison function. A custom can be implemented like so: ```cpp struct WidgetLess { bool operator()(const Widget& w1, const Widget& w2) const { return w.id < w2.id; } }; set wSet; ``` It's important that your comparison function must be consistent in its behaviour, else the container will either be unsorted or a crash will occur. This concept is called "strict weak ordering." ### Cont.19 How do I force my vector or string capacity down to the size of the container? You can simply use the set_capacity() member function which is present in both vector and string. This is a function that is not present in std STL vector and string functions. ```cpp eastl::vector x; x.set_capacity(); // Shrink x's capacity to be equal to its size. eastl::vector x; x.set_capacity(0); // Completely clear x. ``` To compact your vector or string in a way that would also work with std STL you need to do the following. How to shrink a vector's capacity to be equal to its size: ```cpp std::vector x; std::vector(x).swap(x); // Shrink x's capacity. ``` How to completely clear a std::vector (size = 0, capacity = 0, no allocation): ```cpp std::vector x; std::vector().swap(x); // Completely clear x. ``` ### Cont.20 How do I iterate a container while (selectively) removing items from it? All EASTL containers have an erase function which takes an iterator as an argument and returns an iterator to the next item. Thus, you can erase items from a container while iterating it like so: ```cpp set intSet; set::iterator i = intSet.begin(); while(i != intSet.end()) { if(*i & 1) // Erase all odd integers from the container. i = intSet.erase(i); else ++i; } ``` ### Cont.21 How do I store a pointer in a container? The problem with storing pointers in containers is that clearing the container will not free the pointers automatically. There are two conventional resolutions to this problem: Manually free pointers when removing them from containers. Store the pointer as a smart pointer instead of a "raw"pointer. The advantage of the former is that it makes the user's intent obvious and prevents the possibility of smart pointer "thrashing" with some containers. The disadvantage of the former is that it is more tedicous and error-prone. The advantage of the latter is that your code will be cleaner and will always be error-free. The disadvantage is that it is perhaps slightly obfuscating and with some uses of some containers it can cause smart pointer thrashing, whereby a resize of a linear container (e.g. vector) can cause shared pointers to be repeatedly incremented and decremented with no net effect. It's important that you use a shared smart pointer and not an unshared one such as C++ auto_ptr, as the latter will result in crashes upon linear container resizes. Here we provide an example of how to create a list of smart pointers: ```cpp list< shared_ptr > wList; wList.push_back(shared_ptr(new Widget)); wList.pop_back(); // The Widget will be freed. ``` ### Cont.22 How do I make a union of two containers? difference? intersection? The best way to accomplish this is to sort your container (or use a sorted container such as set) and then apply the set_union, set_difference, or set_intersection algorithms. ### Cont.23 How do I override the default global allocator? There are multiple ways to accomplish this. The allocation mechanism is defined in EASTL/internal/config.h and in allocator.h/cpp. Overriding the default global allocator means overriding these files, overriding what these files refer to, or changing these files outright. Here is a list of things you can do, starting with the simplest: * Simply provide the following versions of operator new (which EASTL requires, actually): ```cpp void* operator new[](size_t size, const char* pName, int flags, unsigned debugFlags, const char* file, int line); void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line); ``` * Predefine the config.h macros for EASTLAlloc, EASTLFree, etc. See config.h for this. * Override config.h entirely via EASTL_USER_CONFIG_HEADER. See config.h for this. * Provide your own version of allocator.h/cpp * Provide your own version of config.h. If you redefine the allocator class, you can make it work however you want. Note that config.h defines EASTLAllocatorDefault, which returns the default allocator instance. As documented in config.h, this is not a global allocator which implements all container allocations but is the allocator that is used when EASTL needs to allocate memory internally. There are very few cases where EASTL allocates memory internally, and in each of these it is for a sensible reason that is documented to behave as such. ### Cont.24 How do I do trick X with the string container? There seem to be many things users want to do with strings. Perhaps the most commonly requested EASTL container extensions are string class shortcut functions. While some of these requests are being considered, we provide some shortcut functions here. **find_and_replace** ```cpp template void find_and_replace(String& s, const typename String::value_type* pFind, const typename String::value_type* pReplace) { for(size_t i; (i = source.find(pFind)) != T::npos; ) s.replace(i, eastl::CharStrlen(pFind), pReplace); } Example: find_and_replace(s, "hello", "hola"); ``` **trim front (multiple chars)** ```cpp template void trim_front(String& s, const typename String::value_type* pValues) { s.erase(0, s.find_first_not_of(pValues)); } Example: trim_front(s, " \t\n\r"); ``` **trim back (multiple chars)** ```cpp template void trim_front(String& s, const typename String::value_type* pValues) { s.resize(s.find_last_not_of(pValues) + 1); } Example: trim_back(s, " \t\n\r"); ``` **prepend** ```cpp template void prepend(String& s, const typename String::value_type* p) { s.insert(0, p); } Example: prepend(s, "log: "); ``` **begins_with** ```cpp template bool begins_with(const String& s, const typename String::value_type* p) { return s.compare(0, eastl::CharStrlen(p), p) == 0; } Example: if(begins_with(s, "log: ")) ... ``` **ends_with** ```cpp template bool ends_with(const String& s, const typename String::value_type* p) { const typename String::size_type n1 = s.size(); const typename String::size_type n2 = eastl::CharStrlen(p); return ((n1 >= n2) && s.compare(n1 - n2, n2, p) == 0); } Example: if(ends_with(s, "test.")) ... ``` **tokenize** Here is a simple tokenization function that acts very much like the C strtok function. ```cpp template size_t tokenize(const String& s, const typename String::value_type* pDelimiters, String* resultArray, size_t resultArraySize) { size_t n = 0; typename String::size_type lastPos = s.find_first_not_of(pDelimiters, 0); typename String::size_type pos = s.find_first_of(pDelimiters, lastPos); while((n < resultArraySize) && (pos != String::npos) || (lastPos != String::npos)) { resultArray[n++].assign(s, lastPos, pos - lastPos); lastPos = s.find_first_not_of(pDelimiters, pos); pos = s.find_first_of(pDelimiters, lastPos); } return n; } Example: string resultArray[32]; tokenize(s, " \t", resultArray, 32)); ``` ### Cont.25 How do EASTL smart pointers compare to Boost smart pointers? EASTL's smart pointers are nearly identical to Boost (including all that crazy member template and dynamic cast functionality in shared_ptr), but are not using the Boost source code. EA legal has already stated that it is fine to have smart pointer classes with the same names and functionality as those present in Boost. EA legal specifically looked at the smart pointer classes in EASTL for this. There are two differences between EASTL smart pointers and Boost smart pointers: * EASTL smart pointers don't have thread safety built-in. It was deemed that this is too much overhead and that thread safety is something best done at a higher level. By coincidence the C++ library proposal to add shared_ptr also omits the thread safety feature. FWIW, I put a thread-safe shared_ptr in EAThread, though it doesn't attempt to do all the fancy member template things that Boost shared_ptr does. Maybe I'll add that some day if people care. * EASTL shared_ptr object deletion goes through a deletion object instead of through a virtual function interface. 95% of the time this makes no difference (aside from being more efficient), but the primary case where it matters is when you have shared_ptr and assign to is something like "new Widget". The problem is that shared_ptr doesn't know what destructor to call and so doesn't call a destructor unless you specify a custom destructor object as part of the template specification. I don't know what to say about this one, as it is less safe, but forcing everybody to have the overhead of additional templated classes and virtual destruction functions doesn't seem to be in the spirit of high performance or lean game development. There is the possibility of making a shared_ptr_boost which is completely identical to Boost shared_ptr. So perhaps that will be done some day. ### Cont.26 How do your forward-declare an EASTL container? Here is are some examples of how to do this: ```cpp namespace eastl { template class basic_string; typedef basic_string string8; // Forward declare EASTL's string8 type. template class vector; typedef vector CharArray; template class hash_set; template class map; } ``` The forward declaration can be used to declare a pointer or reference to such a class. It cannot be used to declare an instance of a class or refer to class data, static or otherwise. Nevertheless, forward declarations for pointers and references are useful for reducing the number of header files a header file needs to include. ### Cont.27 How do I make two containers share a memory pool? EASTL (and std STL) allocators are specified by value semantics and not reference semantics. Value semantics is more powerful (because a value can also be a reference, but not the other way around), but is not always what people expects if they're used to writing things the other way. Here is some example code: ```cpp struct fixed_pool_reference { public: fixed_pool_reference() { mpFixedPool = NULL; } fixed_pool_reference(eastl::fixed_pool& fixedPool) { mpFixedPool = &fixedPool; } fixed_pool_reference(const fixed_pool_reference& x) { mpFixedPool = x.mpFixedPool; } fixed_pool_reference& operator=(const fixed_pool_reference& x) { mpFixedPool = x.mpFixedPool; return *this; } void* allocate(size_t /*n*/, int /*flags*/ = 0) { return mpFixedPool->allocate(); } void* allocate(size_t /*n*/, size_t /*alignment*/, size_t /*offset*/, int /*flags*/ = 0) { return mpFixedPool->allocate(); } void deallocate(void* p, size_t /*n*/) { return mpFixedPool->deallocate(p); } const char* get_name() const { return "fixed_pool_reference"; } void set_name(const char* /*pName*/) { } protected: friend bool operator==(const fixed_pool_reference& a, const fixed_pool_reference& b); friend bool operator!=(const fixed_pool_reference& a, const fixed_pool_reference& b); eastl::fixed_pool* mpFixedPool; }; inline bool operator==(const fixed_pool_reference& a, const fixed_pool_reference& b) { return (a.mpFixedPool == b.mpFixedPool); } inline bool operator!=(const fixed_pool_reference& a, const fixed_pool_reference& b) { return (a.mpFixedPool != b.mpFixedPool); } ``` Example usage of the above: ```cpp typedef eastl::list IntList; IntList::node_type buffer[2]; eastl::fixed_pool myPool(buffer, sizeof(buffer), sizeof(Int::node_type), 2); IntList myList1(myPool); IntList myList2(myPool); myList1.push_back(37); myList2.push_back(39); ``` ### Cont.28 Can I use a std (STL) allocator with EASTL? No. EASTL allocators are similar in interface to std STL allocators, but not 100% compatible. If it was possible to make them compatible with std STL allocators but also match the design of EASTL then compatibility would exist. The primary reasons for lack of compatibility are: * EASTL allocators have a different allocate function signature. * EASTL allocators have as many as four extra required functions: ctor(name), get_name(), set_name(), allocate(size, align, offset). * EASTL allocators have an additional allocate function specifically for aligned allocations, as listed directly above. ### What are the requirements of classes stored in containers? Class types stored in containers must have: * a public copy constructor * a public assignment operator * a public destructor * an operator < that compares two such classes (sorted containers only). * an operator == that compares two such classes (hash containers only). Recall that the compiler generates basic versions these functions for you when you don't implement them yourself, so you can omit any of the above if the compiler-generated version is sufficient. For example, the following code will act incorrectly, because the user forgot to implement an assignment operator. The compiler-generated assignment operator will assign the refCount value, which the user doesn't want, and which will be called by the vector during resizing. ```cpp struct NotAPod { NotAPod(const NotAPod&) {} // Intentionally don't copy the refCount int refCount; // refCounts should not be copied between NotAPod instances. }; eastl::vector v; ``` ## Algorithms ### Algo.1 I'm getting screwy behavior in sorting algorithms or sorted containers. What's wrong? It may possible that you are seeing floating point roundoff problems. Many STL algorithms require object comparisons to act consistently. However, floating point values sometimes compare differently between uses because in one situation a value might be in 32 bit form in system memory, whereas in anther situation that value might be in an FPU register with a different precision. These are difficult problems to track down and aren't the fault of EASTL or whatever similar library you might be using. There are various solutions to the problem, but the important thing is to find a way to force the comparisons to be consistent. The code below was an example of this happening, whereby the object pA->mPos was stored in system memory while pB->mPos was stored in a register and comparisons were inconsistent and a crash ensued. ```cpp class SortByDistance : public binary_function { private: Vector3 mOrigin; public: SortByDistance(Vector3 origin) { mOrigin = origin; } bool operator()(WorldTreeObject* pA, WorldTreeObject* pB) const { return ((WorldObject*)pA)->mPos - mOrigin).GetLength() < ((WorldObject*)pB)->mPos - mOrigin).GetLength(); } }; ``` ### Algo.2 How do I write a comparison (operator<()) for a struct that contains two or more members? For a struct with two members such as the following: ```cpp struct X { Blah m1; Blah m2; }; ``` You would write the comparison function like this: ```cpp bool operator<(const X& a, const X& b) { return (a.m1 == b.m1) ? (a.m2 < b.m2) : (a.m1 < b.m1); } ``` or, using only operator < but more instructions: ```cpp bool operator<(const X& a, const X& b) { return (a.m1 < b.m1) || (!(b.m1 < a.m1) && (a.m2 < b.m2)); } ``` For a struct with three members, you would have: ```cpp bool operator<(const X& a, const X& b) { if(a.m1 != b.m1) return (a.m1 < b.m1); if(a.m2 != b.m2) return (a.m2 < b.m2); return (a.mType < b.mType); } ``` And a somewhat messy implementation if you wanted to use only operator <. Note also that you can use the above technique to implement operator < for spatial types such as vectors, points, and rectangles. You would simply treat the members of the stuct as an array of values and ignore the fact that they have spatial meaning. All operator < cares about is that things order consistently. ```cpp bool operator<(const Point2D& a, const Point2D& b) { return (a.x == b.x) ? (a.y < b.y) : (a.x < b.x); } ``` ### Algo.3 How do I sort something in reverse order? Normally sorting puts the lowest value items first in the sorted range. You can change this by simply reversing the comparison. For example: `sort(intVector.begin(), intVector.end(), greater());` It's important that you use operator > instead of >=. The comparison function must return false for every case where values are equal. ### Algo.4 I'm getting errors about min and max while compiling. You need to define NOMINMAX under VC++ when this occurs, as it otherwise defines min and max macros that interfere. There may be equivalent issues with other compilers. Also, VC++ has a specific header file which defines min and max macros but which doesn't pay attention to NOMINMAX and so in that case there is nothing to do but not include that file or to undefine min and max. minmax.h is not a standard file and its min and max macros are not standard C or C++ macros or functions. ### Algo.5 Why don't algorithms take a container as an argument instead of iterators? A container would be more convenient. Having algorithms that use containers instead of algorithms would reduce reduce functionality with no increase in performance. This is because the use of iterators allows for the application of algorithms to sub-ranges of containers and allows for the application of algorithms to containers aren't formal C++ objects, such as C-style arrays. Providing additional algorithms that use containers would introduce redundancy with respect to the existing algorithms that use iterators. ### Algo.6 Given a container of pointers, how do I find an element by value (instead of by pointer)? Functions such as find_if help you find a T element in a container of Ts. But if you have a container of pointers such as vector, these functions will enable you to find an element that matches a given Widget* pointer, but they don't let you find an element that matches a given Widget object. You can write your own iterating 'for' loop and compare values, or you can use a generic function object to do the work if this is a common task: ```cpp template struct dereferenced_equal { const T& mValue; dereferenced_equal(const T& value) : mValue(value) { } bool operator==(const T* pValue) const { return *pValue == mValue; } }; ... find_if(container.begin(), container.end(), dereferenced_equal(someWidget)); ``` ### Algo.7 When do stored objects need to support operator < vs. when do they need to support operator ==? Any object which is sorted needs to have operator < defined for it, implicitly via operator < or explicitly via a user-supplied Compare function. Sets and map containers require operator <, while sort, binary search, and min/max algorithms require operator <. Any object which is compareed for equality needs to have operator == defined for it, implicitly via operator == or explicitly via a user-supplied BinaryPredicate function. Hash containers required operator ==, while many of the algorithms other than those mentioned above for operator < require operator ==. Some algorithms and containers require neither < nor ==. Interestingly, no algorithm or container requires both < and ==. ### Algo.8 How do I sort via pointers or array indexes instead of objects directly? Pointers ```cpp vector toArray; vector topArray; for(eastl_size_t i = 0; i < 32; i++) toArray.push_back(TestObject(rng.RandLimit(20))); for(eastl_size_t i = 0; i < 32; i++) // This needs to be a second loop because the addresses might change in the first loop due to container resizing. topArray.push_back(&toArray[i]); struct TestObjectPtrCompare { bool operator()(TestObject* a, TestObject* b) { return a->mX < a->mX; } }; quick_sort(topArray.begin(), topArray.end(), TestObjectPtrCompare()); ``` Array indexes ```cpp vector toArray; vector toiArray; for(eastl_size_t i = 0; i < 32; i++) { toArray.push_back(TestObject(rng.RandLimit(20))); toiArray.push_back(i); } struct TestObjectIndexCompare { vector* mpArray; TestObjectIndexCompare(vector* pArray) : mpArray(pArray) { } TestObjectIndexCompare(const TestObjectIndexCompare& x) : mpArray(x.mpArray){ } TestObjectIndexCompare& operator=(const TestObjectIndexCompare& x) { mpArray = x.mpArray; return *this; } bool operator()(eastl_size_t a, eastl_size_t b) { return (*mpArray)[a] < (*mpArray)[b]; } }; quick_sort(toiArray.begin(), toiArray.end(), TestObjectIndexCompare(&toArray)); ``` Array indexes (simpler version using toArray as a global variable) ```cpp vector toArray; vector toiArray; for(eastl_size_t i = 0; i < 32; i++) { toArray.push_back(TestObject(rng.RandLimit(20))); toiArray.push_back(i); } struct TestObjectIndexCompare { bool operator()(eastl_size_t a, eastl_size_t b) { return toArray[a] < toArray[b]; } }; quick_sort(toiArray.begin(), toiArray.end(), TestObjectIndexCompare(&toArray)); ``` ## Iterators ### Iter.1 What's the difference between iterator, const iterator, and const_iterator? An iterator can be modified and item it points to can be modified. A const iterator cannot be modified, but the items it points to can be modified. A const_iterator can be modified, but the items it points to cannot be modified. A const const_iterator cannot be modified, nor can the items it points to. This situation is much like with char pointers: | Iterator type | Pointer equivalent | |------|------| | iterator | char* | | const iterator | char* const | | const_iterator | const char* | | const const_iterator | const char* const | ### Iter.2 How do I tell from an iterator what type of thing it is iterating? Use the value_type typedef from iterator_traits, as in this example ```cpp template void DoSomething(Iterator first, Iterator last) { typedef typename iterator_traits::value_type; // use value_type } ``` ### Iter.3 How do I iterate a container while (selectively) removing items from it? All EASTL containers have an erase function which takes an iterator as an argument and returns an iterator to the next item. Thus, you can erase items from a container while iterating it like so: ```cpp set intSet; set::iterator i = intSet.begin(); while(i != intSet.end()) { if(*i & 1) // Erase all odd integers from the container. i = intSet.erase(i); else ++i; } ``` ### Iter.4 What is an insert_iterator? An insert_iterator is a utility class which is like an iterator except that when you assign a value to it, the insert_iterator inserts the value into the container (via insert()) and increments the iterator. Similarly, there are front_insert_iterator and back_insert_iterator, which are similar to insert_iterator except that assigning a value to them causes then to call push_front and push_back, respectively, on the container. These utilities may seem a slightly abstract, but they have uses in generic programming. ---------------------------------------------- End of document ================================================ FILE: doc/Glossary.md ================================================ # EASTL Glossary This document provides definitions to various terms related to EASTL. Items that are capitalized are items that are used as template parameters. | | | |------|------| | adapter | An adapter is something that encapsulates a component to provide another interface, such as a C++ class which makes a stack out of a list. | | algorithm | Algorithms are standalone functions which manipulate data which usually but not necessarily comes from a container. Some algorithms change the data while others don't. Examples are reverse, sort, find, and remove. | | associative container | An associative container is a variable-sized container that supports efficient retrieval of elements (values) based on keys. It supports insertion and removal of elements, but differs from a sequence in that it does not provide a mechanism for inserting an element at a specific position. Associative containers include map, multimap, set, multiset, hash_map, hash_multimap, hash_set, hash_multiset. | | array | An array is a C++ container which directly implements a C-style fixed array but which adds STL container semantics to it. | | basic_string | A templated string class which is usually used to store char or wchar_t strings. | | begin | The function used by all conventional containers to return the first item in the container. | | BidirectionalIterator | An input iterator which is like ForwardIterator except it can be read in a backward direction as well. | | BinaryOperation  | A function which takes two arguments and returns a value (which will usually be assigned to a third object). | | BinaryPredicate | A function which takes two arguments and returns true if some criteria is met (e.g. they are equal). | | binder1st, binder2nd | These are function objects which convert one function object into another.  In particular, they implement a binary function whereby you can specify one of the arguments.This is a somewhat abstract concept but has its uses. | | bit vector | A specialized container that acts like vector but is implemented via one bit per entry. STL vector is usually implemented as a bit vector but EASTL avoids this in favor of a specific bit vector container. | | bitset | An extensible yet efficient implementation of bit flags. Not strictly a conventional STL container and not the same thing as vector or a bit_vector, both of which are formal iterate-able containers. | | capacity | Refers to the amount of total storage available in an array-based container such as vector, string, and array. Capacity is always >= container size and is > size in order to provide extra space for a container to grow into. | | const_iterator | An iterator whose iterated items are cannot be modified. A const_iterator is akin to a const pointer such as 'const char*'. | | container | A container is an object that stores other objects (its elements), and that has methods for accessing its elements. In particular, every type that is a model of container has an associated iterator type that can be used to iterate through the container's elements. | | copy constructor | A constructor for a type which takes another object of that type as its argument. For a hypothetical Widget class, the copy constructor is of the form Widget(const Widget& src); | | Compare | A function which takes two arguments and returns the lesser of the two. | | deque | The name deque is pronounced "deck" and stands for "double-ended queue."

A deque is very much like a vector: like vector, it is a sequence that supports random access to elements, constant time insertion and removal of elements at the end of the sequence, and linear time insertion and removal of elements in the middle.

The main way in which deque differs from vector is that deque also supports constant time insertion and removal of elements at the beginning of the sequence. Additionally, deque does not have any member functions analogous to vector's capacity() and reserve(), and does not provide the guarantees on iterator validity that are associated with those member functions. | | difference_type | The typedef'd type used by all conventional containers and iterators to define the distance between two iterators. It is usually the same thing as the C/C++ ptrdiff_t data type. | | empty | The function used by all conventional containers to tell if a container has a size of zero. In many cases empty is more efficient than checking for size() == 0. | | element | An element refers to a member of a container. | | end | The function used by all conventional containers to return one-past the last item in the container. | | equal_range | equal_range is a version of binary search: it attempts to find the element value in an ordered range [first, last). The value returned by equal_range is essentially a combination of the values returned by lower_bound and upper_bound: it returns a pair of iterators i and j such that i is the first position where value could be inserted without violating the ordering and j is the last position where value could be inserted without violating the ordering. It follows that every element in the range [i, j) is equivalent to value, and that [i, j) is the largest subrange of [first, last) that has this property. | | explicit instantiation | Explicit instantiation lets you create an instantiation of a templated class or function without actually using it in your code. Since this is useful when you are creating library files that use templates for distribution, uninstantiated template definitions are not put into object files. An example of the syntax for explicit instantiation is:
`template class vector;`
`template void min(int, int);`
`template void min(int, int);` | | ForwardIterator | An input iterator which is like InputIterator except it can be reset back to the beginning. | | Function | A function which takes one argument and applies some operation to the target. | | function object, functor | A function object or functor is a class that has the function-call operator (operator()) defined. | | Generator | A function which takes no arguments and returns a value (which will usually be assigned to an object). | | hash_map, hash_multimap, hash_set, hash_multiset | The hash containers are implementations of map, multimap, set, and multiset via a hashtable instead of via a tree. Searches are O(1) (fast) but the container is not sorted. | | heap | A heap is a data structure which is not necessarily sorted but is organized such that the highest priority item is at the top. A heap is synonymous with a priority queue and has numerous applications in computer science. | | InputIterator | An input iterator (iterator you read from) which allows reading each element only once and only in a forward direction. | | intrusive_list, intrusive_hash_map, etc. | Intrusive containers are containers which don't allocate memory but instead use their contained object to manage the container's memory. While list allocates nodes (with mpPrev/mpNext pointers) that contain the list items, intrusive_list doesn't allocate nodes but instead the container items have the mpPrev/mpNext pointers. | | intrusive_ptr | intrusive_ptr is a smart pointer which doesn't allocate memory but instead uses the contained object to manage lifetime via addref and release functions. | | iterator | An iterator is the fundamental entity of reading and enumerating values in a container. Much like a pointer can be used to walk through a character array, an iterator is used to walk through a linked list. | | iterator category | An iterator category defines the functionality the iterator provides. The conventional iterator categories are InputIterator, ForwardIterator, BidirectionalIterator, RandomAccessIterator, and OutputIterator. See the definitions of each of these for more information.Iterator category is synonymous with iterator_tag. | | iterator_tag | See iterator category. | | key_type, Key | A Key or key_type is the identifier used by associative (a.k.a. dictionary) containers (e.g. map, hash_map) to identify the type used to index the mapped_type. If you have a dictionary of strings that you access by an integer id, the ids are the keys and the strings are the mapped types. | | lexicographical compare | A lexicographical compare is a comparison of two containers that compares them element by element, much like the C strcmp function compares two strings. | | linked_ptr | A linked_ptr is a shared smart pointer which implements object lifetime via a linked list of all linked_ptrs that are referencing the object. linked_ptr, like intrusive_ptr, is a non-memory-allocating alternative to shared_ptr. | | list | A list is a doubly linked list. It is a sequence that supports both forward and backward traversal, and (amortized) constant time insertion and removal of elements at the beginning or the end, or in the middle. Lists have the important property that insertion and splicing do not invalidate iterators to list elements, and that even removal invalidates only the iterators that point to the elements that are removed. The ordering of iterators may be changed (that is, list::iterator might have a different predecessor or successor after a list operation than it did before), but the iterators themselves will not be invalidated or made to point to different elements unless that invalidation or mutation is explicit. | | lower_bound | lower_bound is a version of binary search: it attempts to find the element value in an ordered range [first, last). Specifically, it returns the first position where value could be inserted without violating the ordering. | | map | Map is a sorted associative container that associates objects of type Key with objects of type T. Map is a pair associative container, meaning that its value type is pair. It is also a unique associative container, meaning that no two elements have the same key. It is implemented with a tree structure. | | mapped_type | A mapped_type is a typedef used by associative containers to identify the container object which is accessed by a key. If you have a dictionary of strings that you access by an integer id, the ids are the keys and the strings are the mapped types. | | member template | A member template is a templated function of a templated class. Thus with a member template function there are two levels of templating -- the class and the function. | | multimap,  | Multimap is a sorted associative container that associates objects of type Key with objects of type T. multimap is a pair associative container, meaning that its value type is pair. It is also a multiple associative container, meaning that there is no limit on the number of elements with the same key.It is implemented with a tree structure. | | multiset | Multiset is a sorted associative container that stores objects of type Key. Its value type, as well as its key type, is Key. It is also a multiple associative container, meaning that two or more elements may be identical. It is implemented with a tree structure. | | node | A node is a little holder class used by many containers to hold the contained items. A linked-list, for example, defines a node which has three members: mpPrev, mpNext, and T (the contained object). | | npos | npos is used by the string class to identify a non-existent index. Some string functions return npos to indicate that the function failed. | | rel_ops | rel_ops refers to "relational operators" and is a set of templated functions which provide operator!= for classes that  have only operator== and provide operator > for classes that have only operator <, etc. Unfortunately, rel_ops have a habit of polluting the global operator space and creating conflicts. They must be used with discretion. | | reverse_iterator | A reverse_iterator is an iterator which wraps a bidirectional or random access iterator and allows the iterator to be read in reverse direction. The difference between using reverse_iterators and just decrementing regular iterators is that reverse_iterators use operator++ to move backwards and thus work in any algorithm that calls ++ to move through a container. | | OutputIterator | An output iterator (iterator you write to) which allows writing each element only once in only in a forward direction. | | POD | POD means Plain Old Data. It refers to C++ classes which act like built-in types and C structs. These are useful to distinguish because some algorithms can be made more efficient when they can detect that they are working with PODs instead of regular classes.  | | Predicate | A function which takes one argument returns true if the argument meets some criteria. | | priority_queue | A priority_queue is an adapter container which implements a heap via a random access container such as vector or deque. | | queue | A queue is an adapter container which implements a FIFO (first-in, first-out) container with which you can add items to the back and get items from the front. | | RandomAccessIterator | An input iterator which can be addressed like an array. It is a superset of all other input iterators. | | red-black tree | A red-black tree is a binary tree which has the property of being always balanced. The colors red and black are somewhat arbitrarily named monikers for nodes used to measure the balance of the tree. Red-black trees are considered the best all-around data structure for sorted containers. | | scalar | A scalar is a data type which is implemented via a numerical value. In C++ this means integers, floating point values, enumerations, and pointers.  | | scoped_ptr | A scoped_ptr is a smart pointer which is the same as C++ auto_ptr except that it cannot be copied. | | set | Set is a sorted associative container that stores objects of type Key. Its value type, as well as its key type, is Key. It is also a unique associative container, meaning that no two elements are the same.It is implemented with a tree structure. | | sequence | A sequence is a variable-sized container whose elements are arranged in a strict linear (though not necessarily contiguous) order. It supports insertion and removal of elements. Sequence containers include vector, deque, array, list, slist. | | size | All conventional containers have a size member function which returns the count of elements in the container. The efficiency of the size function differs between containers. | | size_type | The type that a container uses to define its size and counts. This is similar to the C/C++ size_t type but may be specialized for the container. | | skip list | A skip-list is a type of container which is an alternative to a binary tree for finding data. | | shared_ptr | A shared_ptr is a smart pointer which allows multiple references (via multiple shared_ptrs) to the same object. When the last shared_ptr goes away, the pointer is freed. shared_ptr is implemented via a shared count between all instances. | | slist | An slist is like a list but is singly-linked instead of doubly-linked. It can only be iterated in a forward-direction. | | smart pointer | Smart pointer is a term that identifies a family of utility classes which store pointers and free them when the class instance goes out of scope. Examples of smart pointers are shared_ptr, linked_ptr, intrusive_ptr, and scoped_ptr. | | splice | Splicing refers to the moving of a subsequence of one Sequence into another Sequence. | | stack | A stack is a adapter container which implements LIFO (last-in, first, out) access via another container such as a list or deque. | | STL | Standard Template Library.  | | StrictWeakOrdering | A BinaryPredicate that compares two objects, returning true if the first precedes the second. Like Compare but has additional requirements. Used for sorting routines.

This predicate must satisfy the standard mathematical definition of a strict weak ordering. A StrictWeakOrdering has to behave the way that "less than" behaves: if a is less than b then b is not less than a, if a is less than b and b is less than c then a is less than c, and so on. | | string | See basic_string. | | T | T is the template parameter name used by most containers to identify the contained element type.  | | template parameter | A template parameter is the templated type used to define a template function or class. In the declaration 'template class vector{ },'  T is a template parameter. | | template specialization | A template specialization is a custom version of a template which overrides the default version and provides alternative functionality, often for the purpose of providing improved or specialized functionality. | | treap | A tree-like structure implemented via a heap. This is an alternative to a binary tree (e.g. red-black tree), skip-list, and sorted array as a mechanism for a fast-access sorted container. | | type traits | Type traits are properties of types. If you have a templated type T and you want to know if it is a pointer, you would use the is_pointer type trait. If you want to know if the type is a POD, you would use the is_pod type trait. Type traits are very useful for allowing the implementation of optimized generic algorithms and for asserting that types have properties expected by the function or class contract. For example, you can use type_traits to tell if a type can be copied via memcpy instead of a slower element-by-element copy. | | typename | Typename is a C++ keyword used in templated function implementations which identifies to the compiler that the following expression is a type and not a value. It is used extensively in EASTL, particularly in the algorithms. | | UnaryOperation | A function which takes one argument and returns a value (which will usually be assigned to second object). | | upper_bound | upper_bound is a version of binary search: it attempts to find the element value in an ordered range [first, last). Specifically, it returns the last position where value could be inserted without violating the ordering. | | value_type, Value | A value_type is a typedef used by all containers to identify the elements they contain. In most cases value_type is simply the same thing as the user-supplied T template parameter. The primary exception is the associative containers whereby value_type is the pair of key_type and mapped_type. | | vector | A vector is a Sequence that supports random access to elements, constant time insertion and removal of elements at the end, and linear time insertion and removal of elements at the beginning or in the middle. The number of elements in a vector may vary dynamically; memory management is automatic. Vector is the simplest of the container classes, and in many cases the most efficient. | | vector_map, vector_multimap, vector_set, vector_multiset | These are containers that implement the functionality of map, multimap, set, and multiset via a vector or deque instead of a tree. They use less memory and find items faster, but are slower to modify and modification invalidates iterators. | | weak_ptr | A weak_ptr is an adjunct to shared_ptr which doesn't increment the reference on the contained object but can safely tell you if the object still exists and access it if so. It has uses in preventing circular references in shared_ptrs. | ---------------------------------------------- End of document ================================================ FILE: doc/Gotchas.md ================================================ # EASTL Gotchas There are some cases where the EASTL design results in "gotchas" or behavior that isn't necessarily what the new user would expect. These are all situations in which this behavior may be undesirable. One might ask, "Why not change EASTL to make these gotchas go away?" The answer is that in each case making the gotchas go away would either be impossible or would compromise the functionality of the library. ## Summary The descriptions here are intentionally terse; this is to make them easier to visually scan. 1. [map::operator[] can create elements.](#mapoperator-can-create-elements) 2. [char* converts to string silently.](#char-converts-to-string-silently) 3. [char* is compared by ptr and not by contents.](#char-is-compared-by-ptr-and-not-by-contents) 4. [Iterators can be invalidated by container mutations.](#iterators-can-be-invalidated-by-container-mutations) 5. [Vector resizing may cause ctor/dtor cascades.](#vector-resizing-may-cause-ctordtor-cascades) 6. [Vector and string insert/push_back/resize can reallocate.](#vector-and-string-insertpush_backresize-can-reallocate) 7. [Deriving from containers may not work.](#deriving-from-containers-may-not-work) 8. [set::iterator is const_iterator.](#setiterator-is-const_iterator) 9. [Inserting elements means copying by value.](#inserting-elements-means-copying-by-value) 10. [Containers of pointers can leak if you aren't careful.](#containers-of-pointers-can-leak-if-you-arent-careful) 11. [Containers of auto_ptrs can crash.](#containers-of-auto_ptrs-can-crash) 12. [Remove algorithms don't actually remove elements.](#remove-algorithms-dont-actually-remove-elements) 13. [list::size() is O(n).](#listsize-is-on) 14. [vector and deque::size() may incur integer division.](#vector-and-dequesize-may-incur-integer-division) 15. [Be careful making custom Compare functions.](#be-careful-making-custom-compare-functions) 16. [Comparisons involving floating point are dangerous.](#comparisons-involving-floating-point-are-dangerous) 17. [Writing beyond string::size and vector::size is dangerous.](#writing-beyond-stringsize-and-vectorsize-is-dangerous) 18. [Container operator=() doesn't copy allocators.](#container-operator-doesnt-copy-allocators) ## Detail ### map::operator[] can create elements. By design, map operator[] creates a value for you if it isn't already present. The reason for this is that the alternative behavior would be to throw an exception, and such behavior isn't desirable. The resolution is to simply use the map::find function instead of operator[]. ### char* converts to string silently. The string class has a non-explicit constructor that takes char* as an argument. Thus if you pass char* to a function that takes a string object, a temporary string will be created. In some cases this is undesirable behavior but the user may not notice it right away, as the compiler gives no warnings. The reason that the string constructor from char* is not declared explicit is that doing so would prevent the user from expressions such as: string s = "hello". In this example, no temporary string object is created, but the syntax is not possible if the char* constructor is declared explicit. Thus a decision to make the string char* constructor explicit involves tradeoffs. There is an EASTL configuration option called EASTL_STRING_EXPLICIT which makes the string char* ctor explicit and avoids the behaviour described above. ### char* is compared by ptr and not by contents. If you have a set of strings declared as set, the find function will compare via the pointer value and not the string contents. The workaround is to make a set of string objects or, better, to supply a custom string comparison function to the set. The workaround is not to declare a global operator< for type char*, as that could cause other systems to break. ### Iterators can be invalidated by container mutations With some containers, modifications of them may invalidate iterators into them. With other containers, modifications of them only an iterator if the modification involves the element that iterator refers to. Containers in the former category include vector, deque, basic_string (string), vector_map, vector_multimap, vector_set, and vector_multiset. Containers in the latter category include list, slist, map, multimap, multiset, all hash containers, and all intrusive containers. ### Vector resizing may cause ctor/dtor cascades. If elements are inserted into a vector in middle of the sequence, the elements from the insertion point to the end will be copied upward. This will necessarily cause a series of element constructions and destructions as the elements are copied upward. Similarly, if an element is appended to a vector but the vector capacity is exhausted and needs to be reallocated, the entire vector will undergo a construction and destruction pass as the values are copied to the new storage. This issue exists for deque as well, though to a lesser degree. For vector, the resolution is to reserve enough space in your vector to prevent such reallocation. For deque the resolution is to set its subarray size to enough to prevent such reallocation. Another solution that can often be used is to design your element type to be trivially copyable. A good way to communicate this requirement/optimization is to static assert is_trivially_copyable. eastl optimizes such types to a memcpy/memmove, eliding a call to the copy constructor. ### Vector and string insert/push_back/resize can reallocate. If you create an empty vector and use push_back to insert 100 elements, the vector will reallocate itself at least three or four times during the operation. This can be an undesirable thing. The best thing to do if possible is to reserve the size you will need up front in the vector constructor or before you add any elements. ### Deriving from containers may not work. EASTL containers are not designed with the guarantee that they can be arbitrarily subclassed. This is by design and is done for performance reasons, as such guarantees would likely involve making containers use virtual functions. However, some types of subclassing can be successful and EASTL does such subclassing internally to its advantage. The primary problem with subclassing results when a parent class function calls a function that the user wants to override. The parent class cannot see the overridden function and silent unpredictable behavior will likely occur. If your derived container acts strictly as a wrapper for the container then you will likely be able to successfully subclass it. ### set::iterator is const_iterator. The reason this is so is that a set is an ordered container and changing the value referred to by an iterator could make the set be out of order. Thus, set and multiset iterators are always const_iterators. If you need to change the value and are sure the change will not alter the container order, use const_cast or declare mutable member variables for your contained object. This resolution is the one blessed by the C++ standardization committee. This issue is addressed in more detail in the EASTL FAQ. ### Inserting elements means copying by value. When you insert an element into a (non-intrusive) container, the container makes a copy of the element. There is no provision to take over ownership of an object from the user. The exception to this is of course when you use a container of pointers instead of a container of values. See the entry below regarding containers of pointers. Intrusive containers (e.g. intrusive_list) do in fact take over the user-provided value, and thus provide another advantage over regular containers in addition to avoiding memory allocation. ### Containers of pointers can leak if you aren't careful. Containers of points don't know or care about the possibility that the pointer may have been allocated and need to be freed. Thus if you erase such elements from a container they are not freed. The resolution is to manually free the pointers when removing them or to instead use a container of smart pointers (shared smart pointers, in particular). This issue is addressed in more detail in the EASTL FAQ and the auto_ptr-related entry below. ### Containers of auto_ptrs can crash We suggested above that the user can use a container of smart pointers to automatically manage contained pointers. However, you don't want to use auto_ptr, as auto_ptrs cannot be safely assigned to each other; doing so results in a stale pointer and most likely a crash. ### Remove algorithms don't actually remove elements. Algorithms such as remove, remove_if, remove_heap, and unique do not erase elements from the sequences they work on. Instead, they return an iterator to the new end of the sequence and the user must call erase with that iterator in order to actually remove the elements from the container. This behavior exists because algorithms work on sequences via iterators and don't know how to work with containers. Only the container can know how to best erase its own elements. In each case, the documentation for the algorithm reminds the user of this behavior. Similarly, the copy algorithm copies elements from one sequence to another and doesn't modify the size of the destination sequence. So the destination must hold at least as many items as the source, and if it holds more items, you may want to erase the items at the end after the copy. ### list::size() is O(n). By this we mean that calling size() on a list will iterate the list and add the size as it goes. Thus, getting the size of a list is not a fast operation, as it requires traversing the list and counting the nodes. We could make list::size() be fast by having a member mSize variable. There are reasons for having such functionality and reasons for not having such functionality. We currently choose to not have a member mSize variable as it would add four bytes to the class, add processing to functions such as insert and erase, and would only serve to improve the size function, but no other function. The alternative argument is that the C++ standard states that std::list should be an O(1) operation (i.e. have a member size variable), most C++ standard library list implementations do so, the size is but an integer which is quick to update, and many users expect to have a fast size function. All of this applies to slist and intrusive_list as well. Note that EASTL's config.h file has an option in it to cause list and slist to cache their size with an mSize variable and thus make size() O(1). This option is disabled by default. ### vector and deque::size() may incur integer division. Some containers (vector and deque in particular) calculate their size by pointer subtraction. For example, the implementation of vector::size() is 'return mpEnd - mpBegin'. This looks like a harmless subtraction, but if the size of the contained object is not an even power of two then the compiler will likely need to do an integer division to calculate the value of the subtracted pointers. One might suggest that vector use mpBegin and mnSize as member variables instead of mpBegin and mpEnd, but that would incur costs in other vector operations. The suggested workaround is to iterate a vector instead of using a for loop and operator[] and for those cases where you do use a for loop and operator[], get the size once at the beginning of the loop instead of repeatedly during the condition test. ### Be careful making custom Compare functions. A Compare function compares two values and returns true if the first is less than the second. This is easy to understand for integers and strings, but harder to get right for more complex structures. Many a time have people decided to come up with a fancy mechanism for comparing values and made mistakes. The FAQ has a couple entries related to this. See ["Writing a sort comparison function"](https://devblogs.microsoft.com/oldnewthing/20031023-00/?p=42063) for a story about how this can go wrong by being overly clever. All comparisons in std STL and eastl, including user defined comparisons, must have strict weak ordering. See the [Compare](https://en.cppreference.com/w/cpp/named_req/Compare) named requirement for more info. ### Comparisons involving floating point are dangerous. Floating point comparisons between two values that are very nearly equal can result in inconsistent results. Similarly, floating point comparisons between NaN values will always generate inconsistent results, as NaNs by definition always compare as non-equal. You thus need to be careful when using comparison functions that work with floating point values. Conversions to integral values may help the problem, but not necessarily. ### Writing beyond string::size and vector::size is dangerous. A trick that often comes to mind when working with strings is to set the string capacity to some maximum value, strcpy data into it, and then resize the string when done. This can be done with EASTL, but only if you resize the string to the maximum value and not reserve the string to the maximum value. The reason is that when you resize a string from size (n) to size (n + count), the count characters are zeroed and overwrite the characters that you strcpyd. The following code is broken: ```cpp string mDataDir; mDataDir.reserve(kMaxPathLength); // reserve strcpy(&mDataDir[0], "blah/blah/blah"); mDataDir.resize(strlen(&mDataDir[0])); // Overwrites your blah/... with 00000... ``` This following code is OK: ```cpp string mDataDir; mDataDir.resize(kMaxPathLength); // resize strcpy(&mDataDir[0], "blah/blah/blah"); mDataDir.resize(strlen(&mDataDir[0])); ``` ### Container operator=() doesn't copy allocators. EASTL container assignment (e.g. vector::operator=(const vector&)) doesn't copy the allocator. There are good and bad reasons for doing this, but that's how it acts. So you need to beware that you need to assign the allocator separately or make a container subclass which overrides opeator=() and does this. ---------------------------------------------- End of document ================================================ FILE: doc/Introduction.md ================================================ # EASTL Introduction EASTL stands for Electronic Arts Standard Template Library. It is a C++ template library of containers, algorithms, and iterators useful for runtime and tool development across multiple platforms. It is a fairly extensive and robust implementation of such a library and has an emphasis on high performance above all other considerations. ## Intended Audience This is a short document intended to provide a basic introduction to EASTL for those new to the concept of EASTL or STL. If you are familiar with the C++ STL or have worked with other templated container/algorithm libraries, you probably don't need to read this. If you have no familiarity with C++ templates at all, then you probably will need more than this document to get you up to speed. In this case you need to understand that templates, when used properly, are powerful vehicles for the ease of creation of optimized C++ code. A description of C++ templates is outside the scope of this documentation, but there is plenty of such documentation on the Internet. See the EASTL FAQ.html document for links to information related to learning templates and STL. ## EASTL Modules EASTL consists primarily of containers, algorithms, and iterators. An example of a container is a linked list, while an example of an algorithm is a sort function; iterators are the entities of traversal for containers and algorithms. EASTL containers a fairly large number of containers and algorithms, each of which is a very clean, efficient, and unit-tested implementation. We can say with some confidence that you are not likely to find better implementations of these (commercial or otherwise), as these are the result of years of wisdom and diligent work. For a detailed list of EASTL modules, see EASTL Modules.html. ## EASTL Suitability What uses are EASTL suitable for? Essentially any situation in tools and shipping applications where the functionality of EASTL is useful. Modern compilers are capable of producing good code with templates and many people are using them in both current generation and future generation applications on multiple platforms from embedded systems to servers and mainframes. ---------------------------------------------- End of document ================================================ FILE: doc/Maintenance.md ================================================ # EASTL Maintenance ## Introduction The purpose of this document is to provide some necessary background for anybody who might do work on EASTL. Writing generic templated systems like EASTL can be surprisingly tricky. There are numerous details of the C++ language that you need to understand which don't usually come into play during the day-to-day C++ coding that many people do. It is easy to make a change to some function that seems proper and works for your test case but either violates the design expectations or simply breaks under other circumstances. It may be useful to start with an example. Here we provide an implementation of the count algorithm which is seems simple enough. Except it is wrong and while it will compile in some cases it won't compile in others: ```cpp int count(InputIterator first, InputIterator last, const T& value) {     int result = 0;     for(; first < last; ++first){         if(*first == value)             ++result;     }     return result; } ``` The problem is with the comparison 'first < last'. The count algorithm takes an InputIterator and operator< is not guaranteed to exist for any given InputIterator (and indeed while operator< exists for vector::iterator, it doesn't exist for list::iterator). The comparison in the above algorithm must instead be implemented as 'first != last'. If we were working with a RandomAccessIterator then 'first < last' would be valid. In the following sections we cover various topics of interest regarding the development and maintentance of EASTL. Unfortunately, this document can't cover every aspect of EASTL maintenance issues, but at least it should give you a sense of the kinds of issues. ## C++ Language Standard First and foremost, you need to be familiar with the C++ standard. In particular, the sections of the standard related to containers, algorithms, and iterators are of prime significance. We'll talk about some of this in more detail below. Similarly, a strong understanding of the basic data types is required. What is the difference between ptrdiff_t and intptr_t; unsigned int and size_t; char and signed char? In addition to the C++ language standard, you'll want to be familiar with the C++ Defect Report. This is a continuously updated document which lists flaws in the original C++ language specification and the current thinking as the resolutions of those flaws. You will notice various references to the Defect Report in EASTL source code. Additionally, you will want to be familiar with the C++ Technical Report 1 (as of this writing there is only one). This document is the evolving addendum to the C++ standard based on both the Defect Report and based on desired additions to the C++ language and standard library. Additionally, you will probably want to have some familiarity with Boost. It also helps to keep an eye on comp.std.c++ Usenet discussions. However, watch out for what people say on Usenet. They tend to defend GCC, Unix, std STL, and C++ to a sometimes unreasonable degree. Many discussions ignore performance implications and concentrate only on correctness and sometimes academic correctness above usability. ## Language Use Macros are (almost) not allowed in EASTL. A prime directive of EASTL is to be easier to read by users and most of the time macros are an impedence to this. So we avoid macros at all costs, even if it ends up making our development and maintenance more difficult. That being said, you will notice that the EASTL config.h file uses macros to control various options. This is an exception to the rule; when we talk about not using macros, we mean with the EASTL implementation itself. EASTL assumes a compliant and intelligent C++ compiler, and thus all language facilities are usable. However, we nevertheless choose to stay away from some language functionality. The primary language features we avoid are: * RTTI (run-time-type-identification) (this is deemed too costly) * Template export (few compilers support this) * Exception specifications (most compilers ignore them) Use of per-platform or per-compiler code should be avoided when possible but where there is a significant advantage to be gained it can and indeed should be used. An example of this is the GCC __builtin_expect feature, which allows the user to give the compiler a hint about whether an expression is true or false. This allows for the generation of code that executes faster due to more intelligent branch prediction. ## Prime Directives The implementation of EASTL is guided foremost by the following directives which are listed in order of importance. 1. Efficiency (speed and memory usage) 2. Correctness (doesn't have bugs) 3. Portability (works on all required platforms with minimal specialized code) 4. Readability (code is legible and comments are present and useful) Note that unlike commercial STL implementations which must put correctness above all, we put a higher value on efficiency. As a result, some functionality may have some usage limitation that is not present in other similar systems but which allows for more efficient operation, especially on the platforms of significance to us. Portability is significant, but not critical. Yes, EASTL must compile and run on all platforms that we will ship games for. But we don't take that to mean under all compilers that could be conceivably used for such platforms. For example, Microsoft VC6 can be used to compile Windows programs, but VC6's C++ support is too weak for EASTL and so you simply cannot use EASTL under VC6. Readability is something that EASTL achieves better than many other templated libraries, particularly Microsoft STL and STLPort. We make every attempt to make EASTL code clean and sensible. Sometimes our need to provide optimizations (particularly related to type_traits and iterator types) results in less simple code, but efficiency happens to be our prime directive and so it overrides all other considerations. ## Coding Conventions Here we provide a list of coding conventions to follow when maintaining or adding to EASTL, starting with the three language use items from above: * No RTTI use. * No use of exception specifications (e.g. appending the 'throw' declarator to a function). * No use of exception handling itself except where explicitly required by the implementation (e.g. vector::at). * Exception use needs to savvy to EASTL_EXCEPTIONS_ENABLED. * No use of macros (outside of config.h). Macros make things more difficult for the user. * No use of static or global variables. * No use of global new, delete, malloc, or free. All memory must be user-specifyable via an Allocator parameter (default-specified or explicitly specified). * Containers use protected member data and functions as opposed to private. This is because doing so allows subclasses to extend the container without the creation of intermediary functions. Recall from our [prime directives](#Prime_Directives) above that performance and simplicity overrule all. * No use of multithreading primitives.  * No use of the export keyword. * We don't have a rule about C-style casts vs. C++ static_cast<>, etc. We would always use static_cast except that debuggers can't evaluate them and so in practice they can get in the way of debugging and tracing. However, if the cast is one that users don't tend to need to view in a debugger, C++ casts are preferred. * No external library dependencies whatsoever, including standard STL. EASTL is dependent on only EABase and the C++ compiler.  * All code must be const-correct. This isn't just for readability -- compilation can fail unless const-ness is used correctly everywhere.  * Algorithms do not refer to containers; they refer only to iterators. * Algorithms in general do not allocate memory. If such a situation arises, there should be a version of the algorithm which allows the user to provide the allocator. * No inferior implementations. No facility should be added to EASTL unless it is of professional quality. * The maintainer should emulate the EASTL style of code layout, regardless of the maintainer's personal preferences. When in Rome, do as the Romans do. EASTL uses 4 spaces for indents, which is how the large majority of code within EA is written. * No major changes should be done without consulting a peer group. ## Compiler Issues Historically, templates are the feature of C++ that has given C++ compilers the most fits. We are still working with compilers that don't completely and properly support templates. Luckily, most compilers are now good enough to handle what EASTL requires. Nevertheless, there are precautions we must take. It turns out that the biggest problem in writing portable EASTL code is that VC++ allows you to make illegal statements which are not allowed by other compilers. For example, VC++ will allow you to neglect using the typename keyword in template references, whereas GCC (especially 3.4+) requires it. In order to feel comfortable that your EASTL code is C++ correct and is portable, you must do at least these two things: * Test under at least VS2005, GCC 3.4+, GCC 4.4+, EDG, and clang. * Test all functions that you write, as compilers will often skip the compilation of a template function if it isn't used. The two biggest issues to watch out for are 'typename' and a concept called "dependent names". In both cases VC++ will accept non-conforming syntax whereas most other compilers will not. Whenever you reference a templated type (and not a templated value) in a template, you need to prefix it by 'typename'. Whenever your class function refers to a base class member (data or function), you need to refer to it by "this->", "base_type::", or by placing a "using" statement in your class to declare that you will be referencing the given base class member. ## Iterator Issues The most important thing to understand about iterators is the concept of iterator types and their designated properties. In particular, we need to understand the difference between InputIterator, ForwardIterator, BidirectionalIterator, RandomAccessIterator, and OutputIterator. These differences dictate both how we implement our algorithms and how we implement our optimizations. Please read the C++ standard for a reasonably well-implemented description of these iterator types. Here's an example from EASTL/algorithm.h which demonstrates how we use iterator types to optimize the reverse algorithm based on the kind of iterator passed to it: ```cpp template inline void reverse_impl(BidirectionalIterator first, BidirectionalIterator last, bidirectional_iterator_tag) { for(; (first != last) && (first != --last); ++first) // We are not allowed to use operator <, <=, >, >= with iter_swap(first, last); // a generic (bidirectional or otherwise) iterator. } template inline void reverse_impl(RandomAccessIterator first, RandomAccessIterator last, random_access_iterator_tag) { for(; first < --last; ++first) // With a random access iterator, we can use operator < to more efficiently implement iter_swap(first, last); // this algorithm. A generic iterator doesn't necessarily have an operator < defined. } template inline void reverse(BidirectionalIterator first, BidirectionalIterator last) { typedef typename iterator_traits::iterator_category IC; reverse_impl(first, last, IC()); } ``` ## Exception Handling You will notice that EASTL uses try/catch in some places (particularly in containers) and uses the EASTL_EXCEPTIONS_ENABLED define. For starters, any EASTL code that uses try/catch should always be wrapped within #if EASTL_EXCEPTIONS_ENABLED (note: #if, not #ifdef). This is simple enough, but what you may be wondering is how it is that EASTL decides to use try/catch for some sections of code and not for others. EASTL follows the C++ standard library conventions with respect to exception handling, and you will see similar exception handling in standard STL. The code that you need to wrap in try/catch is code that can throw a C++ exception (not to be confused with CPU exception) and needs to have something unwound (or fixed) as a result. The important thing is that the container be in a valid state after encountering such exceptions. In general the kinds of things that require such try/catch are: * Memory allocation failures (which throw exceptions) * Constructor exceptions Take a look at the cases in EASTL where try/catch is used and see what it is doing. ## Type Traits EASTL provides a facility called type_traits which is very similar to the type_traits being proposed by the C++ TR1 (see above). type_traits are useful because they tell you about properties of types at compile time. This allows you to do things such as assert that a data type is scalar or that a data type is const. The way we put them to use in EASTL is to take advantage of them to implement different pathways for functions based on types. For example, we can copy a contiguous array of scalars much faster via memcpy than we can via a for loop, though we could not safely employ the for loop for a non-trivial C++ class. As mentioned in the GeneralOptimizations section below, EASTL should take advantage of type_traits information to the extent possible to achive maximum effiiciency. ## General Optimizations One of the primary goals of EASTL is to achieve the highest possible efficiency. In cases where EASTL functionality overlaps standard C++ STL functionality, standard STL implementations provided by compiler vendors are a benchmark upon which EASTL strives to beat. Indeed EASTL is more efficient than all other current STL implementations (with some exception in the case of some Metrowerks STL facilities). Here we list some of the things to look for when considering optimization of EASTL code These items can be considered general optimization suggestions for any code, but this particular list applies to EASTL: * Take advantage of type_traits to the extent possible (e.g. to use memcpy to move data instead of a for loop when possible). * Take advantage of iterator types to the extent possible. * Take advantage of the compiler's expectation that if statements are expected to evaluate as true and for loop conditions are expected to evaluate as false. * Make inline-friendly code. This often means avoiding temporaries to the extent possible. * Minimize branching (i.e. minimize 'if' statements). Where branching is used, make it so that 'if' statements execute as true. * Use EASTL_LIKELY/EASTL_UNLIKELY to give branch hints to the compiler when you are confident it will be beneficial. * Use restricted pointers (EABase's EA_RESTRICT or various compiler-specific versions of __restrict). * Compare unsigned values to < max instead of comparing signed values to >= 0 && < max. * Employ power of 2 integer math instead of math with any kind of integer. * Use template specialization where possible to implement improved functionality. * Avoid function calls when the call does something trivial. This improves debug build speed (which matters) and sometimes release build speed as well, though sometimes makes the code intent less clear. A comment next to the code saying what call it is replacing makes the intent clear without sacrificing performance. ## Unit Tests Writing robust templated containers and algorithms is difficult or impossible without a heavy unit test suite in place. EASTL has a pretty extensive set of unit tests for all containers and algorithms. While the successful automated unit testing of shipping application programs may be a difficult thing to pull off, unit testing of libraries such as this is of huge importance and cannot be understated. * When making a new unit test, start by copying one of the existing unit tests and follow its conventions. * Test containers of both scalars and classes. * Test algorithms on both container iterators (e.g. vector.begin()) and pointer iterators (e.g. int*). * Make sure that algorithm or container member functions which take iterators work with the type of iterator they claim to (InputIterator, ForwardIterator, BidirectionalIterator, RandomAccessIterator).  * Test for const-correctness. If a user is allowed to modify something that is supposed to be const, silent errors can go undetected. * Make sure that unit tests cover all functions and all pathways of the tested code. This means that in writing the unit test you need to look at the source code to understand all the pathways. * Consider using a random number generator (one is provided in the test library) to do 'monkey' testing whereby unexpected input is given to a module being tested. When doing so, make sure you seed the generator in a way that problems can be reproduced. * While we avoid macros in EASTL user code, macros to assist in unit tests aren't considered a problem. However, consider that a number of macros could be replaced by templated functions and thus be easier to work with. * Unit tests don't need to be efficient; feel free to take up all the CPU power and time you need to test a module sufficiently. * EASTL containers are not thread-safe, by design. Thus there is no need to do multithreading tests as long as you stay away from the usage of static and global variables. * Unit tests must succeed with no memory leaks and of course no memory corruption. The heap system should be configured to test for this, and heap validation functions are available to the unit tests while in the middle of runs. ## Things to Keep in Mind * When referring to EASTL functions and types from EASTL code, make sure to preface the type with the EASTL namespace. If you don't do this you can get collisions due to the compiler not knowing if it should use the EASTL namespace or the namespace of the templated type for the function or type. * Newly constructed empty containers do no memory allocation. Some STL and other container libraries allocate an initial node from the class memory allocator. EASTL containers by design never do this. If a container needs an initial node, that node should be made part of the container itself or be a static empty node object. * Empty containers (new or otherwise) contain no constructed objects, including those that might be in an 'end' node. Similarly, no user object (e.g. of type T) should be constructed unless required by the design and unless documented in the cotainer/algorithm contract.  * When creating a new container class, it's best to copy from an existing similar class to the extent possible. This helps keep the library consistent and resolves subtle problems that can happen in the construction of containers. * Be very careful about tweaking the code. It's easy to think (for example) that a > could be switch to a >= where instead it is a big deal. Just about every line of code in EASTL has been thought through and has a purpose. Unit tests may or may not currently test every bit of EASTL, so you can't necessarily rely on them to give you 100% confidence in changes. If you are not sure about something, contact the original author and he will tell you for sure. * Algorithm templates always work with iterators and not containers. A given container may of course implement an optimized form or an algorithm itself. * Make sure everything is heavily unit tested. If somebody finds a bug, fix the bug and make a unit test to make sure the bug doesn't happen again. * It's easy to get iterator categories confused or forgotten while implementing algorithms and containers. * Watch out for the strictness of GCC 3.4+. There is a bit of syntax — especially related to templates — that other compilers accept but GCC 3.4+ will not. * Don't forget to update the config.h EASTL_VERSION define before publishing. * The vector and string classes define iterator to be T*. We want to always leave this so — at least in release builds — as this gives some algorithms an advantage that optimizers cannot get around. ---------------------------------------------- End of document ================================================ FILE: doc/Modules.md ================================================ # EASTL Modules ## Introduction We provide here a list of all top-level modules present or planned for future presence in EASTL. In some cases (e.g. algorithm), the module consists of many smaller submodules which are not described in detail here. In those cases you should consult the source code for those modules or consult the detailed documentation for those modules. This document is a high level overview and not a detailed document. ## Module List | Module | Description | |------|------| | config | Configuration header. Allows for changing some compile-time options. | | slist
fixed_slist | Singly-linked list.
fixed_slist is a version which is implemented via a fixed block of contiguous memory.| | list
fixed_list | Doubly-linked list. | | intrusive_list
intrusive_slist | List whereby the contained item provides the node implementation. | | array | Wrapper for a C-style array which extends it to act like an STL container. | | vector
fixed_vector | Resizable array container. | vector_set
vector_multiset | Set implemented via a vector instead of a tree. Speed and memory use is improved but resizing is slower. | | vector_map
vector_multimap | Map implemented via a vector instead of a tree. Speed and memory use is improved but resizing is slower. | | deque | Double-ended queue, but also with random access. Acts like a vector but insertions and removals are efficient. | | bit_vector | Implements a vector of bool, but the actual storage is done with one bit per bool. Not the same thing as a bitset. | | bitset | Implements an efficient arbitrarily-sized bitfield. Note that this is not strictly the same thing as a vector of bool (bit_vector), as it is optimized to act like an arbitrary set of flags and not to be a generic container which can be iterated, inserted, removed, etc. | | set
multiset
fixed_set
fixed_multiset | A set is a sorted unique collection, multiset is sorted but non-unique collection. | | map
multimap
fixed_map
fixed_multimap | A map is a sorted associative collection implemented via a tree. It is also known as dictionary. | | hash_map
hash_multimap
fixed_hash_map
fixed_hash_multimap | Map implemented via a hash table. | | intrusive_hash_map
intrusive_hash_multimap
intrusive_hash_set
intrusive_hash_multiset | hash_map whereby the contained item provides the node implementation, much like intrusive_list. | | hash_set
hash_multiset
fixed_hash_set
fixed_hash_map | Set implemented via a hash table. | basic_string
fixed_string
fixed_substring | basic_string is a character string/array.
fixed_substring is a string which is a reference to a range within another string or character array.
cow_string is a string which implements copy-on-write. | | algorithm | min/max, find, binary_search, random_shuffle, reverse, etc. | | sort | Sorting functionality, including functionality not in STL. quick_sort, heap_sort, merge_sort, shell_sort, insertion_sort, etc. | | numeric | Numeric algorithms: accumulate, inner_product, partial_sum, adjacent_difference, etc. | | heap | Heap structure functionality: make_heap, push_heap, pop_heap, sort_heap, is_heap, remove_heap, etc. | | stack | Adapts any container into a stack. | | queue | Adapts any container into a queue. | | priority_queue | Implements a conventional priority queue via a heap structure. | | type_traits | Type information, useful for writing optimized and robust code. Also used for implementing optimized containers and algorithms. | | utility | pair, make_pair, rel_ops, etc. | | functional | Function objects. | | iterator | Iteration for containers and algorithms. | | smart_ptr | Smart pointers: shared_ptr, shared_array, weak_ptr, scoped_ptr, scoped_array, linked_ptr, linked_array, intrusive_ptr. |   ## Module Behaviour The overhead sizes listed here refer to an optimized release build; debug builds may add some additional overhead. Some of the overhead sizes may be off by a little bit (usually at most 4 bytes). This is because the values reported here are those that refer to when EASTL's container optimizations have been complete. These optimizations may not have been completed as you are reading this. | Container |Stores | Container Overhead (32 bit) | Container Overhead (64 bit) | Node Overhead (32 bit) | Node Overhead (64 bit) | Iterator category | size() efficiency | operator[] efficiency | Insert efficiency | Erase via Iterator efficiency | Find efficiency | Sort efficiency | |------|------|------|------|------|------|------|------|------|------|------|------|------| | slist | T | 8 | 16 | 4 | 8 | f | n | - | 1 | 1 | n | n+ | | list | T | 12 | 24 | 8 | 16 | b | n | - | 1 | 1 | n | n log(n) | | intrusive_slist | T | 4 | 8 | 4 | 8 | f | n | - | 1 | 1 | 1 | n+ | | intrusive_list | T | 8 | 16 | 8 | 16 | b | n | - | 1 | 1 | 1 | n log(n) | | array | T | 0 | 0 | 0 | 0 | r | 1 | 1 | - | - | n | n log(n) | | vector | T | 16 | 32 | 0 | 0 | r | 1 | 1 | 1 at end, else n | 1 at end, else n | n | n log(n) | | vector_set | T | 16 | 32 | 0 | 0 | r | 1 | 1 | 1 at end, else n | 1 at end, else n | log(n) | 1 | | vector_multiset | T | 16 | 32 | 0 | 0 | r | 1 | 1 | 1 at end, else n | 1 at end, else n | log(n) | 1 | | vector_map | Key, T | 16 | 32 | 0 | 0 | r | 1 | 1 | 1 at end, else n | 1 at end, else n | log(n) | 1 | | vector_multimap | Key, T | 16 | 32 | 0 | 0 | r | 1 | 1 | 1 at end, else n | 1 at end, else n | log(n) | 1 | | deque | T | 44 | 84 | 0 | 0 | r | 1 | 1 | 1 at begin or end, else n / 2 | 1 at begin or end, else n / 2 | n | n log(n) | | bit_vector | bool | 8 | 16 | 0 | 0 | r | 1 | 1 | 1 at end, else n | 1 at end, else n | n | n log(n) | | string (all types) | T | 16 | 32 | 0 | 0 | r | 1 | 1 | 1 at end, else n | 1 at end, else n | n | n log(n) | | set | T | 24 | 44 | 16 | 28 | b | 1 | - | log(n) | log(n) | log(n) | 1 | | multiset | T | 24 | 44 | 16 | 28 | b | 1 | - | log(n) | log(n) | log(n) | 1 | | map | Key, T | 24 | 44 | 16 | 28 | b | 1 | log(n) | log(n) | log(n) | log(n) | 1 | | multimap | Key, T | 24 | 44 | 16 | 28 | b | 1 | - | log(n) | log(n) | log(n) | 1 | | hash_set | T | 16 | 20 | 4 | 8 | b | 1 | - | 1 | 1 | 1 | - | | hash_multiset | T | 16 | 20 | 4 | 8 | b | 1 | - | 1 | 1 | 1 | - | | hash_map | Key, T | 16 | 20 | 4 | 8 | b | 1 | - | 1 | 1 | 1 | - | | hash_multimap | Key, T | 16 | 20 | 4 | 8 | b | 1 | - | 1 | 1 | 1 | - | | intrusive_hash_set | T | 16 | 20 | 4 | 8 | b | 1 | - | 1 | 1 | 1 | - | | intrusive_hash_multiset | T | 16 | 20 | 4 | 8 | b | 1 | - | 1 | 1 | 1 | - | | intrusive_hash_map | T (Key == T) | 16 | 20 | 4 | 8 | b | 1 | - | 1 | 1 | 1 | - | | intrusive_hash_multimap | T (Key == T)  | 16 | 20 | 4 | 8 | b | 1 | - | 1 | 1 | 1 | - | * \- means that the operation does not exist. * 1 means amortized constant time. Also known as O(1) * n means time proportional to the container size. Also known as O(n) * log(n) means time proportional to the natural logarithm of the container size. Also known as O(log(n)) * n log(n) means time proportional to log(n) times the size of the container. Also known as O(n log(n)) * n+ means that the time is at least n, and possibly higher. * Iterator meanings are: f = forward iterator; b = bidirectional iterator, r = random iterator. * Overhead indicates approximate per-element overhead memory required in bytes. Overhead doesn't include possible additional overhead that may be imposed by the memory heap used to allocate nodes. General heaps tend to have between 4 and 16 bytes of overhead per allocation, depending on the heap. * Some overhead values are dependent on the structure alignment characteristics in effect. The values reported here are those that would be in effect for a system that requires pointers to be aligned on boundaries of their size and allocations with a minimum of 4 bytes (thus one byte values get rounded up to 4). * Some overhead values are dependent on the size_type used by containers. We assume a size_type of 4 bytes, even for 64 bit machines, as this is the EASTL default. * Inserting at the end of a vector may cause the vector to be resized; resizing a vector is O(n). However, the amortized time complexity for vector insertions at the end is constant. * Sort assumes the usage of the best possible sort for a large container of random data. Some sort algorithms (e.g. quick_sort) require random access iterators and so the sorting of some containers requires a different sort algorithm. We do not include bucket or radix sorts, as they are always O(n). * Some containers (e.g. deque, hash*) have unusual data structures that make per-container and per-node overhead calculations not quite account for all memory. ---------------------------------------------- End of document ================================================ FILE: doc/html/EASTL Benchmarks.html ================================================ EASTL Benchmarks

EASTL Benchmarks

Introduction

This document provides a number of benchmark results of EASTL. Where possible, these benchmarks are implemented as comparisons with equivalent functionality found in other libraries such as compiler STL libraries or other well-known libraries. These comparison benchmarks concentrate on highlighting the differences between implementations rather than the similarities. In many mundane cases -- such as accessing a vector element via operator [] -- virtually all vector/array implementations you are likely to run into will have identical performance.

It's also important to note that the platform you run on can make a significant difference in the results. On a modern 3+GHz Windows PC many operations are fast due to large memory caches, intelligent branch prediction, and parallel instruction execution. However, on embedded or console systems none of these may be the case.

While EASTL generally outperforms std STL, there are some benchmarks here in which EASTL is slower than std STL. There are three primary explanations of this:

  1. EASTL is making some kind of speed, memory, or design tradeoff that results in the given speed difference. In may such cases, EASTL goes slower on one benchmark in order to go faster on another benchmark deemed more important. This explanation constitutes about 60% of the cases.
  2. Compiler optimizations and resulting code generation is coincidencally favoring one kind of implementation over another, often when they are visually virtually identical. This explantation constitutes about 30% of the cases.
  3. EASTL is simply not yet as optimized as it could be. This explanation constitutes about 10% of the cases (as of this writing there are about three such functions throughout EASTL).

Benchmarks

Below is a table of links to detailed benchmark results derived from the Benchmark test present in the EASTL package. The detailed results are present below the table. Additional platforms will be added as results become available for those platforms. Debug benchmarks are present because (lack of) debug performance can be significant for highly templated libraries. EASTL has specific optimizations to enhance debug performance relative to other standard libraries; in some cases it is 10x or more faster than alternatives (though there are exceptions where EASTL is slower). Feel free to submit results for additional compilers/platforms.

Platform Compiler STL type Build Results
Win32 VC++ 7.1 Microsoft (Dinkumware) Debug Detail
Win32 VC++ 7.1 Microsoft (Dinkumware) Release Detail
Win32 VC++ 7.1 STLPort Debug Detail
Win32 VC++ 7.1 STLPort Release Detail

Win32.VC71.MS.Debug

EASTL version: 0.96.00
Platform: Windows on X86
Compiler: Microsoft Visual C++ compiler, version 1310
Allocator: PPMalloc::GeneralAllocatorDebug. Thread safety enabled.
Build: Debug. Inlining disabled. STL debug features disabled.

Values are times to complete tests; smaller values are better.
Alarm indicates a greater than 10% difference.

Test VC++ EASTL Ratio Alarm
----------------------------------------------------------------------------------------
algorithm/adj_find/vector<TestObject> 33061345 6497757 5.09 *
algorithm/copy/vector<LargePOD> 5844906 4876076 1.20 *
algorithm/copy/vector<uint32_t> 1634346 166065 9.84 *
algorithm/copy_backward/vector<LargePOD> 4515974 4638892 0.97
algorithm/copy_backward/vector<uint32_t> 1821168 121746 14.96 *
algorithm/count/vector<uint64_t> 17048884 2720766 6.27 *
algorithm/equal_range/vector<uint64_t> 1111147812 448756888 2.48 *
algorithm/fill/bool[] 1728722 91936 18.80 *
algorithm/fill/char[]/'d' 1299200 33745 38.50 *
algorithm/fill/vector<char>/'d' 10205092 33796 100.00 *
algorithm/fill/vector<char>/0 10200748 33805 100.00 *
algorithm/fill/vector<uint64_t> 10416538 1399687 7.44 *
algorithm/fill/vector<void*> 10221837 1307700 7.82 *
algorithm/fill_n/bool[] 1399033 34196 40.91 *
algorithm/fill_n/char[] 1299225 33754 38.49 *
algorithm/fill_n/vector<uint64_t> 5961637 1371900 4.35 *
algorithm/find_end/string/end 16569373 2657372 6.24 *
algorithm/find_end/string/middle 16558638 20242410 0.82 *
algorithm/find_end/string/none 16811207 40480468 0.42 *
algorithm/lex_cmp/schar[] 1749674 194429 9.00 *
algorithm/lex_cmp/vector<TestObject> 32824195 5253587 6.25 *
algorithm/lex_cmp/vector<uchar> 29852034 202658 100.00 *
algorithm/lower_bound/vector<TestObject> 798624462 350027935 2.28 *
algorithm/min_element/vector<TestObject> 21675298 5314676 4.08 *
algorithm/rand_shuffle/vector<uint64_t> 84236190 43677506 1.93 *
algorithm/reverse/list<TestObject> 3007292 2105799 1.43 *
algorithm/reverse/vector<TestObject> 2974618 2124796 1.40 *
algorithm/search/string<char> 16228158 3594268 4.52 *
algorithm/search_n/string<char> 16926985 1522096 11.12 *
algorithm/unique/vector<TestObject> 54206243 9988002 5.43 *
algorithm/unique/vector<uint32_t> 26940079 1741991 15.47 *
algorithm/unique/vector<uint64_t> 47621344 5213127 9.13 *
algorithm/upper_bound/vector<uint32_t> 372381295 137901552 2.70 *

bitset<1500>/>>=/1 90196544 92539832 0.97
bitset<1500>/count 50753832 53742117 0.94
bitset<1500>/flip 86935875 85121117 1.02
bitset<1500>/reset 78153837 79922611 0.98
bitset<1500>/set() 79214968 79360658 1.00
bitset<1500>/set(i) 11300589 12199651 0.93
bitset<1500>/test 11282679 13186450 0.86 *

bitset<15>/>>=/1 10500577 6000559 1.75 *
bitset<15>/count 4000356 6399753 0.63 *
bitset<15>/flip 7268877 5647944 1.29 *
bitset<15>/reset 8564235 5800163 1.48 *
bitset<15>/set() 9935523 5914012 1.68 *
bitset<15>/set(i) 11199703 12503637 0.90 *
bitset<15>/test 10600623 12899592 0.82 *

bitset<35>/>>=/1 13076052 6599834 1.98 *
bitset<35>/count 4800384 11500330 0.42 *
bitset<35>/flip 7915439 5816313 1.36 *
bitset<35>/reset 9400049 5803180 1.62 *
bitset<35>/set() 10701152 5840316 1.83 *
bitset<35>/set(i) 11342936 12271128 0.92
bitset<35>/test 10670799 13099682 0.81 *

bitset<75>/>>=/1 14198834 17151088 0.83 *
bitset<75>/count 5795530 8576373 0.68 *
bitset<75>/flip 8516703 8922995 0.95
bitset<75>/reset 9999970 8526095 1.17 *
bitset<75>/set() 11124877 9009686 1.23 *
bitset<75>/set(i) 11300563 12531618 0.90 *
bitset<75>/test 11031913 13100523 0.84 *

deque<ValuePair>/erase 743801706 335646802 2.22 *
deque<ValuePair>/insert 742331809 341912866 2.17 *
deque<ValuePair>/iteration 29097030 16315827 1.78 *
deque<ValuePair>/operator[] 49859598 24026313 2.08 *
deque<ValuePair>/push_back 424807033 34497608 12.31 *
deque<ValuePair>/push_front 402313373 38006322 10.59 *
deque<ValuePair>/sort 725101017 581796551 1.25 *

hash_map<string, uint32_t>/clear 559462 961019 0.58 *
hash_map<string, uint32_t>/count 53377807 8091448 6.60 *
hash_map<string, uint32_t>/erase pos 613573 858084 0.72 *
hash_map<string, uint32_t>/erase range 5488748 461134 11.90 *
hash_map<string, uint32_t>/erase val 35760096 16379858 2.18 *
hash_map<string, uint32_t>/find 43490335 10324823 4.21 *
hash_map<string, uint32_t>/find_as/char* 49343818 8617139 5.73 *
hash_map<string, uint32_t>/insert 107420281 168690439 0.64 *
hash_map<string, uint32_t>/iteration 2456356 1255153 1.96 *
hash_map<string, uint32_t>/operator[] 47209502 12581624 3.75 *

hash_map<uint32_t, TestObject>/clear 533172 546449 0.98
hash_map<uint32_t, TestObject>/count 28667432 2899997 9.89 *
hash_map<uint32_t, TestObject>/erase pos 683239 538289 1.27 *
hash_map<uint32_t, TestObject>/erase range 9632676 253037 38.07 *
hash_map<uint32_t, TestObject>/erase val 25466026 7752188 3.29 *
hash_map<uint32_t, TestObject>/find 20048253 4678502 4.29 *
hash_map<uint32_t, TestObject>/insert 71085798 37686187 1.89 *
hash_map<uint32_t, TestObject>/iteration 1460318 1338317 1.09
hash_map<uint32_t, TestObject>/operator[] 23226692 7888748 2.94 *

heap (uint32_t[])/make_heap 5399966 6961305 0.78 *
heap (uint32_t[])/pop_heap 108060534 103511318 1.04
heap (uint32_t[])/push_heap 22595661 16640688 1.36 *
heap (uint32_t[])/sort_heap 93559424 83076731 1.13 *

heap (vector<TestObject>)/make_heap 91770743 21724870 4.22 *
heap (vector<TestObject>)/pop_heap 1175599317 284007398 4.14 *
heap (vector<TestObject>)/push_heap 207804541 45918046 4.53 *
heap (vector<TestObject>)/sort_heap 970394145 208321477 4.66 *

list<TestObject>/ctor(it) 805539509 760938607 1.06
list<TestObject>/ctor(n) 80959236 75106995 1.08
list<TestObject>/erase 1052543704 1044976137 1.01
list<TestObject>/find 97785267 75970884 1.29 *
list<TestObject>/insert 873895175 807051107 1.08
list<TestObject>/push_back 812797710 780742425 1.04
list<TestObject>/remove 1850600714 1436980599 1.29 *
list<TestObject>/reverse 180270465 80466636 2.24 *
list<TestObject>/size/1 440148 599642 0.73 *
list<TestObject>/size/10 439433 1329817 0.33 * EASTL intentionally implements list::size as O(n).
list<TestObject>/size/100 439595 11030060 0.04 * EASTL intentionally implements list::size as O(n).
list<TestObject>/splice 177106094 69383027 2.55 *

map<TestObject, uint32_t>/clear 508283 470807 1.08
map<TestObject, uint32_t>/count 43145354 14280357 3.02 *
map<TestObject, uint32_t>/equal_range 38594004 16520447 2.34 *
map<TestObject, uint32_t>/erase/key 33948082 16123175 2.11 *
map<TestObject, uint32_t>/erase/pos 578332 455201 1.27 * MS uses a code bloating implementation of erase.
map<TestObject, uint32_t>/erase/range 387345 284538 1.36 *
map<TestObject, uint32_t>/find 22897224 12766100 1.79 *
map<TestObject, uint32_t>/insert 61665800 47286928 1.30 *
map<TestObject, uint32_t>/iteration 1977202 745391 2.65 *
map<TestObject, uint32_t>/lower_bound 19892941 12260928 1.62 *
map<TestObject, uint32_t>/operator[] 24199084 15429634 1.57 *
map<TestObject, uint32_t>/upper_bound 19842409 12064441 1.64 *

set<uint32_t>/clear 1027625 1000901 1.03
set<uint32_t>/count 39730182 13329565 2.98 *
set<uint32_t>/equal_range 34681649 14768827 2.35 *
set<uint32_t>/erase range 841458 602030 1.40 *
set<uint32_t>/erase/pos 1380485 1084303 1.27 * MS uses a code bloating implementation of erase.
set<uint32_t>/erase/val 31617425 13344023 2.37 *
set<uint32_t>/find 19582428 10788864 1.82 *
set<uint32_t>/insert 61434014 48232086 1.27 *
set<uint32_t>/iteration 1512057 667820 2.26 *
set<uint32_t>/lower_bound 18394885 10402785 1.77 *
set<uint32_t>/upper_bound 17189083 10554425 1.63 *

sort/q_sort/TestObject[] 87088799 15037988 5.79 *
sort/q_sort/TestObject[]/sorted 21502892 3284299 6.55 *
sort/q_sort/vector<TestObject> 87962047 15004677 5.86 *
sort/q_sort/vector<TestObject>/sorted 21396523 3341163 6.40 *
sort/q_sort/vector<ValuePair> 80334589 10429161 7.70 *
sort/q_sort/vector<ValuePair>/sorted 22133295 3230553 6.85 *
sort/q_sort/vector<uint32> 72195388 5940302 12.15 *
sort/q_sort/vector<uint32>/sorted 19635171 995495 19.72 *

string<char16_t>/compare 523013373 534722089 0.98
string<char16_t>/erase/pos,n 3446597 3439492 1.00
string<char16_t>/find/p,pos,n 383873158 441902786 0.87 *
string<char16_t>/find_first_not_of/p,pos,n 174157 134131 1.30 *
string<char16_t>/find_first_of/p,pos,n 11715423 8520944 1.37 *
string<char16_t>/find_last_of/p,pos,n 1871556 1226457 1.53 *
string<char16_t>/insert/pos,p 3624877 3357058 1.08
string<char16_t>/iteration 6766787933 581916665 11.63 *
string<char16_t>/operator[] 4820827 2335579 2.06 *
string<char16_t>/push_back 59812962 6757466 8.85 *
string<char16_t>/replace/pos,n,p,n 4371279 4459713 0.98
string<char16_t>/reserve 2307530 1919386 1.20 *
string<char16_t>/rfind/p,pos,n 734826 372615 1.97 *
string<char16_t>/size 41608 28866 1.44 *
string<char16_t>/swap 1033932 1490994 0.69 *

string<char8_t>/compare 63086797 64194771 0.98
string<char8_t>/erase/pos,n 2045687 1960270 1.04
string<char8_t>/find/p,pos,n 123872549 471364764 0.26 *
string<char8_t>/find_first_not_of/p,pos,n 140013 130271 1.07
string<char8_t>/find_first_of/p,pos,n 8051906 8749994 0.92
string<char8_t>/find_last_of/p,pos,n 1318835 1230715 1.07
string<char8_t>/insert/pos,p 1770610 1724234 1.03
string<char8_t>/iteration 28112136 2544475 11.05 *
string<char8_t>/operator[] 4810525 2255841 2.13 *
string<char8_t>/push_back 54869634 6127447 8.95 *
string<char8_t>/replace/pos,n,p,n 2737578 2847900 0.96
string<char8_t>/reserve 1123395 394902 2.84 *
string<char8_t>/rfind/p,pos,n 737299 368518 2.00 *
string<char8_t>/size 42245 26801 1.58 *
string<char8_t>/swap 1036142 1491028 0.69 *

vector<uint64>/erase 56417135 55770251 1.01
vector<uint64>/insert 56617761 56100468 1.01
vector<uint64>/iteration 10413895 1291269 8.06 *
vector<uint64>/operator[] 23507193 3479390 6.76 *
vector<uint64>/push_back 34687939 13806627 2.51 *
vector<uint64>/sort 256886550 84669657 3.03 *

Win32.VC71.MS.Release

EASTL version: 0.96.00
Platform: Windows on X86
Compiler: Microsoft Visual C++ compiler, version 1310
Allocator: PPMalloc::GeneralAllocator. Thread safety enabled.
Build: Full optimization. Inlining enabled.

Values are times to complete tests; smaller values are better.
Alarm indicates a greater than 10% difference.

Test VC++ EASTL Ratio Alarm
----------------------------------------------------------------------------------------
algorithm/adj_find/vector<TestObject> 2783546 2750660 1.01
algorithm/copy/vector<LargePOD> 6474025 4972738 1.30 *
algorithm/copy/vector<uint32_t> 157267 173162 0.91
algorithm/copy_backward/vector<LargePOD> 4836406 4374780 1.11 *
algorithm/copy_backward/vector<uint32_t> 104780 120912 0.87 *
algorithm/count/vector<uint64_t> 1368440 1368696 1.00
algorithm/equal_range/vector<uint64_t> 114199387 102783938 1.11 *
algorithm/fill/bool[] 253215 27353 9.26 *
algorithm/fill/char[]/'d' 253164 27404 9.24 *
algorithm/fill/vector<char>/'d' 253105 27362 9.25 *
algorithm/fill/vector<char>/0 253275 27353 9.26 *
algorithm/fill/vector<uint64_t> 397001 394323 1.01
algorithm/fill/vector<void*> 547196 642362 0.85 *
algorithm/fill_n/bool[] 229177 27361 8.38 *
algorithm/fill_n/char[] 228845 27404 8.35 *
algorithm/fill_n/vector<uint64_t> 565233 1376822 0.41 *
algorithm/find_end/string/end 2107116 82356 25.59 *
algorithm/find_end/string/middle 2111672 664283 3.18 *
algorithm/find_end/string/none 2110423 1519596 1.39 *
algorithm/lex_cmp/schar[] 741021 176162 4.21 *
algorithm/lex_cmp/vector<TestObject> 2610494 2642183 0.99
algorithm/lex_cmp/vector<uchar> 697595 167866 4.16 *
algorithm/lower_bound/vector<TestObject> 62462233 58146664 1.07
algorithm/min_element/vector<TestObject> 4350385 2671227 1.63 *
algorithm/rand_shuffle/vector<uint64_t> 10868261 11300818 0.96
algorithm/reverse/list<TestObject> 483718 470024 1.03
algorithm/reverse/vector<TestObject> 476739 484322 0.98
algorithm/search/string<char> 2560387 1259496 2.03 *
algorithm/search_n/string<char> 2770991 458524 6.04 *
algorithm/unique/vector<TestObject> 4194520 4658910 0.90 *
algorithm/unique/vector<uint32_t> 538730 787924 0.68 *
algorithm/unique/vector<uint64_t> 3169829 2575636 1.23 *
algorithm/upper_bound/vector<uint32_t> 27495562 25321593 1.09

bitset<1500>/>>=/1 33464228 33469719 1.00
bitset<1500>/count 18736116 18814903 1.00
bitset<1500>/flip 19299309 18605438 1.04
bitset<1500>/reset 22200487 15262847 1.45 *
bitset<1500>/set() 14418193 17557319 0.82 *
bitset<1500>/set(i) 1599250 1599199 1.00
bitset<1500>/test 1599241 1599233 1.00

bitset<15>/>>=/1 2199222 2264442 0.97
bitset<15>/count 1399406 1399193 1.00
bitset<15>/flip 1266712 1199197 1.06
bitset<15>/reset 1399364 1399109 1.00
bitset<15>/set() 1199197 999201 1.20 *
bitset<15>/set(i) 1599258 1462952 1.09
bitset<15>/test 1599275 1599224 1.00

bitset<35>/>>=/1 2599266 1933376 1.34 *
bitset<35>/count 2599240 2592559 1.00
bitset<35>/flip 1693124 1199188 1.41 *
bitset<35>/reset 1399406 999201 1.40 *
bitset<35>/set() 1599403 1199205 1.33 *
bitset<35>/set(i) 1599241 1599190 1.00
bitset<35>/test 1599250 1599232 1.00

bitset<75>/>>=/1 4199332 4199213 1.00
bitset<75>/count 2999497 2199341 1.36 *
bitset<75>/flip 2399499 1830178 1.31 *
bitset<75>/reset 2199468 1199197 1.83 *
bitset<75>/set() 1999387 1199851 1.67 *
bitset<75>/set(i) 1599266 1599198 1.00
bitset<75>/test 1599241 1662651 0.96

deque<ValuePair>/erase 90444165 37113253 2.44 *
deque<ValuePair>/insert 93299349 36175167 2.58 *
deque<ValuePair>/iteration 2756414 2122076 1.30 *
deque<ValuePair>/operator[] 5117969 4632075 1.10
deque<ValuePair>/push_back 30300757 3060357 9.90 *
deque<ValuePair>/push_front 25498529 2808392 9.08 *
deque<ValuePair>/sort 142283047 111292464 1.28 *

hash_map<string, uint32_t>/clear 146769 389699 0.38 *
hash_map<string, uint32_t>/count 13059434 3460324 3.77 *
hash_map<string, uint32_t>/erase pos 184246 331925 0.56 *
hash_map<string, uint32_t>/erase range 382432 167237 2.29 *
hash_map<string, uint32_t>/erase val 6187898 3302114 1.87 *
hash_map<string, uint32_t>/find 11289369 3459024 3.26 *
hash_map<string, uint32_t>/find_as/char* 13559192 3662387 3.70 *
hash_map<string, uint32_t>/insert 17514012 14095176 1.24 *
hash_map<string, uint32_t>/iteration 801014 218450 3.67 *
hash_map<string, uint32_t>/operator[] 11457065 3690385 3.10 *

hash_map<uint32_t, TestObject>/clear 141865 265379 0.53 *
hash_map<uint32_t, TestObject>/count 1766045 703613 2.51 *
hash_map<uint32_t, TestObject>/erase pos 172337 218458 0.79 *
hash_map<uint32_t, TestObject>/erase range 537846 102340 5.26 *
hash_map<uint32_t, TestObject>/erase val 2220132 1441787 1.54 *
hash_map<uint32_t, TestObject>/find 1612994 1043953 1.55 *
hash_map<uint32_t, TestObject>/insert 7141547 4348056 1.64 *
hash_map<uint32_t, TestObject>/iteration 199512 169328 1.18 *
hash_map<uint32_t, TestObject>/operator[] 1831733 1519707 1.21 *

heap (uint32_t[])/make_heap 3366247 1949093 1.73 *
heap (uint32_t[])/pop_heap 57280514 53779440 1.07
heap (uint32_t[])/push_heap 9700217 7582935 1.28 *
heap (uint32_t[])/sort_heap 47227751 46131948 1.02

heap (vector<TestObject>)/make_heap 11458442 11510819 1.00
heap (vector<TestObject>)/pop_heap 122897267 119061132 1.03
heap (vector<TestObject>)/push_heap 21688481 21176220 1.02
heap (vector<TestObject>)/sort_heap 90867380 88869523 1.02

list<TestObject>/ctor(it) 74591104 69845817 1.07
list<TestObject>/ctor(n) 6243998 5838582 1.07
list<TestObject>/erase 299509298 206013676 1.45 *
list<TestObject>/find 40927185 14514243 2.82 *
list<TestObject>/insert 71277251 47234534 1.51 *
list<TestObject>/push_back 73780527 44116725 1.67 *
list<TestObject>/remove 786197776 326434612 2.41 *
list<TestObject>/reverse 49283128 25029678 1.97 *
list<TestObject>/size/1 159741 139400 1.15 *
list<TestObject>/size/10 159324 346579 0.46 * EASTL intentionally implements list::size as O(n).
list<TestObject>/size/100 159188 97235419 0.00 * EASTL intentionally implements list::size as O(n).
list<TestObject>/splice 63548584 19322931 3.29 *

map<TestObject, uint32_t>/clear 167408 170501 0.98
map<TestObject, uint32_t>/count 10213685 4748346 2.15 *
map<TestObject, uint32_t>/equal_range 9515053 5677558 1.68 *
map<TestObject, uint32_t>/erase/key 6646260 4302300 1.54 *
map<TestObject, uint32_t>/erase/pos 297135 327938 0.91 MS uses a code bloating implementation of erase.
map<TestObject, uint32_t>/erase/range 148614 163702 0.91
map<TestObject, uint32_t>/find 5637531 4767055 1.18 *
map<TestObject, uint32_t>/insert 9591128 9030349 1.06
map<TestObject, uint32_t>/iteration 323595 325261 0.99
map<TestObject, uint32_t>/lower_bound 5398239 4784089 1.13 *
map<TestObject, uint32_t>/operator[] 5631250 5141166 1.10
map<TestObject, uint32_t>/upper_bound 5436336 4762431 1.14 *

set<uint32_t>/clear 155983 156026 1.00
set<uint32_t>/count 9635965 4392146 2.19 *
set<uint32_t>/equal_range 8504157 5247832 1.62 *
set<uint32_t>/erase range 140488 119408 1.18 *
set<uint32_t>/erase/pos 260678 286697 0.91 MS uses a code bloating implementation of erase.
set<uint32_t>/erase/val 6008225 4012825 1.50 *
set<uint32_t>/find 5145432 4381945 1.17 *
set<uint32_t>/insert 8087129 8697251 0.93
set<uint32_t>/iteration 271507 304538 0.89 *
set<uint32_t>/lower_bound 4666228 4404250 1.06
set<uint32_t>/upper_bound 4623600 4402974 1.05

sort/q_sort/TestObject[] 9596169 5578652 1.72 *
sort/q_sort/TestObject[]/sorted 602463 1016132 0.59 *
sort/q_sort/vector<TestObject> 9674828 5430199 1.78 *
sort/q_sort/vector<TestObject>/sorted 606908 1111647 0.55 *
sort/q_sort/vector<ValuePair> 6284194 3423452 1.84 *
sort/q_sort/vector<ValuePair>/sorted 711629 569364 1.25 *
sort/q_sort/vector<uint32> 5453379 2916146 1.87 *
sort/q_sort/vector<uint32>/sorted 537047 419144 1.28 *

string<char16_t>/compare 435083295 251985824 1.73 *
string<char16_t>/erase/pos,n 3454842 3451858 1.00
string<char16_t>/find/p,pos,n 401954723 165298157 2.43 *
string<char16_t>/find_first_not_of/p,pos,n 131452 65374 2.01 *
string<char16_t>/find_first_of/p,pos,n 11657444 4144515 2.81 *
string<char16_t>/find_last_of/p,pos,n 1604248 567571 2.83 *
string<char16_t>/insert/pos,p 3398734 3355460 1.01
string<char16_t>/iteration 218856504 218771844 1.00
string<char16_t>/operator[] 714161 240023 2.98 *
string<char16_t>/push_back 34968235 2444897 14.30 *
string<char16_t>/replace/pos,n,p,n 4226693 4198498 1.01
string<char16_t>/reserve 1901765 390805 4.87 *
string<char16_t>/rfind/p,pos,n 195483 150985 1.29 *
string<char16_t>/size 11169 11245 0.99
string<char16_t>/swap 1459280 419807 3.48 *

string<char8_t>/compare 63071275 77209580 0.82 *
string<char8_t>/erase/pos,n 2008652 1944494 1.03
string<char8_t>/find/p,pos,n 123201023 167536164 0.74 *
string<char8_t>/find_first_not_of/p,pos,n 93372 67864 1.38 *
string<char8_t>/find_first_of/p,pos,n 7542492 3375758 2.23 *
string<char8_t>/find_last_of/p,pos,n 933972 583576 1.60 *
string<char8_t>/insert/pos,p 1737213 1750847 0.99
string<char8_t>/iteration 893834 899130 0.99
string<char8_t>/operator[] 817879 313437 2.61 *
string<char8_t>/push_back 20857734 2004410 10.41 *
string<char8_t>/replace/pos,n,p,n 2578696 2607655 0.99
string<char8_t>/reserve 915127 85289 10.73 *
string<char8_t>/rfind/p,pos,n 196103 148894 1.32 *
string<char8_t>/size 11619 11220 1.04
string<char8_t>/swap 1461056 419874 3.48 *

vector<uint64>/erase 55235116 55284587 1.00
vector<uint64>/insert 55166046 55142755 1.00
vector<uint64>/iteration 553954 509719 1.09
vector<uint64>/operator[] 1284239 798516 1.61 *
vector<uint64>/push_back 5399549 3867959 1.40 *
vector<uint64>/sort 43636314 42619952 1.02

Win32.VC71.STLPort.Debug

EASTL version: 0.96.00
Platform: Windows on X86
Compiler: Microsoft Visual C++ compiler, version 1310
Allocator: PPMalloc::GeneralAllocatorDebug. Thread safety enabled.
Build: Debug. Inlining disabled. STL debug features disabled.

Values are times to complete tests; smaller values are better.
Alarm indicates a greater than 10% difference.

Test STLPort EASTL Ratio Alarm
----------------------------------------------------------------------------------------
algorithm/adj_find/vector<TestObject> 5661170 5689517 1.00
algorithm/copy/vector<LargePOD> 5573815 5124428 1.09
algorithm/copy/vector<uint32_t> 148273 125782 1.18 *
algorithm/copy_backward/vector<LargePOD> 5429791 4834510 1.12 *
algorithm/copy_backward/vector<uint32_t> 156765 163038 0.96
algorithm/count/vector<uint64_t> 2730922 2730072 1.00
algorithm/equal_range/vector<uint64_t> 639366489 452896251 1.41 *
algorithm/fill/bool[] 1299326 27361 47.49 *
algorithm/fill/char[]/'d' 27378 27361 1.00
algorithm/fill/vector<char>/'d' 34459 27361 1.26 *
algorithm/fill/vector<char>/0 1299224 27361 47.48 *
algorithm/fill/vector<uint64_t> 1400647 1400145 1.00
algorithm/fill/vector<void*> 1308779 1309085 1.00
algorithm/fill_n/bool[] 1299156 27352 47.50 *
algorithm/fill_n/char[] 1299258 27369 47.47 *
algorithm/fill_n/vector<uint64_t> 1451162 1313632 1.10
algorithm/find_end/string/end 13089999 2526412 5.18 *
algorithm/find_end/string/middle 12627412 20190101 0.63 *
algorithm/find_end/string/none 12704185 40728803 0.31 *
algorithm/lex_cmp/schar[] 1749844 195806 8.94 *
algorithm/lex_cmp/vector<TestObject> 5060968 4799882 1.05
algorithm/lex_cmp/vector<uchar> 1668354 189490 8.80 *
algorithm/lower_bound/vector<TestObject> 450240945 353437573 1.27 *
algorithm/min_element/vector<TestObject> 5861744 5326371 1.10
algorithm/rand_shuffle/vector<uint64_t> 40780449 45780090 0.89 *
algorithm/reverse/list<TestObject> 2657678 2130627 1.25 *
algorithm/reverse/vector<TestObject> 2666424 2124889 1.25 *
algorithm/search/string<char> 3110379 3613460 0.86 *
algorithm/search_n/string<char> 3061665 1521261 2.01 *
algorithm/unique/vector<TestObject> 12423684 9485439 1.31 *
algorithm/unique/vector<uint32_t> 3718699 1726596 2.15 *
algorithm/unique/vector<uint64_t> 6205110 4591631 1.35 *
algorithm/upper_bound/vector<uint32_t> 185391094 139336317 1.33 *

bitset<1500>/>>=/1 120666960 92449816 1.31 * STLPort is broken, neglects wraparound check.
bitset<1500>/count 201709793 52874726 3.81 *
bitset<1500>/flip 87360297 81737071 1.07
bitset<1500>/reset 23950178 77390323 0.31 *
bitset<1500>/set() 84608107 76912011 1.10
bitset<1500>/set(i) 18023620 12229604 1.47 *
bitset<1500>/test 18006553 13276396 1.36 *

bitset<15>/>>=/1 11935904 6012695 1.99 * STLPort is broken, neglects wraparound check.
bitset<15>/count 9368581 6022742 1.56 *
bitset<15>/flip 11600706 6533635 1.78 *
bitset<15>/reset 5830957 5874690 0.99
bitset<15>/set() 11695328 5701621 2.05 *
bitset<15>/set(i) 16363205 12570216 1.30 *
bitset<15>/test 16743172 13201452 1.27 *

bitset<35>/>>=/1 22950918 6774457 3.39 * STLPort is broken, neglects wraparound check.
bitset<35>/count 12655309 11736256 1.08
bitset<35>/flip 13738575 5800042 2.37 *
bitset<35>/reset 15561434 5800510 2.68 *
bitset<35>/set() 13564283 5600709 2.42 *
bitset<35>/set(i) 18519689 12199973 1.52 *
bitset<35>/test 18000569 13103566 1.37 *

bitset<75>/>>=/1 25579525 16669664 1.53 * STLPort is broken, neglects wraparound check.
bitset<75>/count 18740698 8480492 2.21 *
bitset<75>/flip 13555630 8300335 1.63 *
bitset<75>/reset 15200133 8200000 1.85 *
bitset<75>/set() 14408112 8001959 1.80 *
bitset<75>/set(i) 18137741 12374257 1.47 *
bitset<75>/test 18422135 13100038 1.41 *

deque<ValuePair>/erase 651933790 326443043 2.00 *
deque<ValuePair>/insert 659786183 333304660 1.98 *
deque<ValuePair>/iteration 23734592 16173706 1.47 *
deque<ValuePair>/operator[] 59126816 23911774 2.47 *
deque<ValuePair>/push_back 58056988 31859266 1.82 *
deque<ValuePair>/push_front 57780891 31743199 1.82 *
deque<ValuePair>/sort 818414195 596568113 1.37 *

hash_map<string, uint32_t>/clear 3422133 2204517 1.55 *
hash_map<string, uint32_t>/count 9869545 8624924 1.14 *
hash_map<string, uint32_t>/erase pos 3256350 2069299 1.57 *
hash_map<string, uint32_t>/erase range 3230203 1151392 2.81 *
hash_map<string, uint32_t>/erase val 16860362 15939778 1.06
hash_map<string, uint32_t>/find 10286971 9920910 1.04
hash_map<string, uint32_t>/find_as/char* 118136025 9458468 12.49 *
hash_map<string, uint32_t>/insert 188948336 174490082 1.08
hash_map<string, uint32_t>/iteration 4037049 2021036 2.00 *
hash_map<string, uint32_t>/operator[] 11472127 12887699 0.89 *

hash_map<uint32_t, TestObject>/clear 2522264 1331848 1.89 *
hash_map<uint32_t, TestObject>/count 3210739 2897063 1.11 *
hash_map<uint32_t, TestObject>/erase pos 1862281 1304783 1.43 *
hash_map<uint32_t, TestObject>/erase range 698079 579606 1.20 *
hash_map<uint32_t, TestObject>/erase val 8806722 7041298 1.25 *
hash_map<uint32_t, TestObject>/find 3604875 4709645 0.77 *
hash_map<uint32_t, TestObject>/insert 40785711 40376342 1.01
hash_map<uint32_t, TestObject>/iteration 3064088 1508834 2.03 *
hash_map<uint32_t, TestObject>/operator[] 6053742 8176906 0.74 *

heap (uint32_t[])/make_heap 5799813 5738596 1.01
heap (uint32_t[])/pop_heap 113775168 102076134 1.11 *
heap (uint32_t[])/push_heap 21649151 16854845 1.28 *
heap (uint32_t[])/sort_heap 97535213 83290735 1.17 *

heap (vector<TestObject>)/make_heap 22215557 22277063 1.00
heap (vector<TestObject>)/pop_heap 275392171 277340039 0.99
heap (vector<TestObject>)/push_heap 51479442 47342577 1.09
heap (vector<TestObject>)/sort_heap 214474736 218497540 0.98

list<TestObject>/ctor(it) 767753795 753421427 1.02
list<TestObject>/ctor(n) 74185322 73386245 1.01
list<TestObject>/erase 1021003824 1033873589 0.99
list<TestObject>/find 77666072 74917622 1.04
list<TestObject>/insert 788071150 774188737 1.02
list<TestObject>/push_back 760490154 737327348 1.03
list<TestObject>/remove 1682511938 1434771006 1.17 *
list<TestObject>/reverse 87237327 80394623 1.09
list<TestObject>/size/1 3828111 599530 6.39 *
list<TestObject>/size/10 9600605 1329535 7.22 * EASTL intentionally implements list::size as O(n).
list<TestObject>/size/100 62952334 15022551 4.19 * EASTL intentionally implements list::size as O(n).
list<TestObject>/splice 96536412 60804817 1.59 *

map<TestObject, uint32_t>/clear 1142127 1099066 1.04
map<TestObject, uint32_t>/count 19659726 14647548 1.34 *
map<TestObject, uint32_t>/equal_range 36680687 18219086 2.01 *
map<TestObject, uint32_t>/erase/key 28892154 16037774 1.80 *
map<TestObject, uint32_t>/erase/pos 1209643 1185495 1.02
map<TestObject, uint32_t>/erase/range 715402 670539 1.07
map<TestObject, uint32_t>/find 21020992 13429575 1.57 *
map<TestObject, uint32_t>/insert 59530871 51120640 1.16 *
map<TestObject, uint32_t>/iteration 972825 1191946 0.82 *
map<TestObject, uint32_t>/lower_bound 18852651 12495034 1.51 *
map<TestObject, uint32_t>/operator[] 22889573 16676736 1.37 *
map<TestObject, uint32_t>/upper_bound 18603584 12406922 1.50 *

set<uint32_t>/clear 919555 882988 1.04
set<uint32_t>/count 17561110 12461084 1.41 *
set<uint32_t>/equal_range 31522488 15230282 2.07 *
set<uint32_t>/erase range 687582 564765 1.22 *
set<uint32_t>/erase/pos 1044352 1045355 1.00
set<uint32_t>/erase/val 25525304 12940774 1.97 *
set<uint32_t>/find 17140751 10704866 1.60 *
set<uint32_t>/insert 56035051 45555664 1.23 *
set<uint32_t>/iteration 682669 640831 1.07
set<uint32_t>/lower_bound 16339932 10475740 1.56 *
set<uint32_t>/upper_bound 17779424 10652599 1.67 *

sort/q_sort/TestObject[] 17000866 14823515 1.15 *
sort/q_sort/TestObject[]/sorted 6658559 3263328 2.04 *
sort/q_sort/vector<TestObject> 17476629 14953285 1.17 *
sort/q_sort/vector<TestObject>/sorted 6667034 3327435 2.00 *
sort/q_sort/vector<ValuePair> 15391357 10820848 1.42 *
sort/q_sort/vector<ValuePair>/sorted 6617122 3232949 2.05 *
sort/q_sort/vector<uint32> 8343906 6014846 1.39 *
sort/q_sort/vector<uint32>/sorted 3039430 1003127 3.03 *

string<char16_t>/compare 1489709846 532664000 2.80 *
string<char16_t>/erase/pos,n 3528690 3439864 1.03
string<char16_t>/find/p,pos,n 2521448321 443752189 5.68 *
string<char16_t>/find_first_not_of/p,pos,n 661206 137419 4.81 *
string<char16_t>/find_first_of/p,pos,n 54746434 8521335 6.42 *
string<char16_t>/find_last_of/p,pos,n 10607778 1212414 8.75 *
string<char16_t>/insert/pos,p 3445016 3360126 1.03
string<char16_t>/iteration 580955636 579452556 1.00
string<char16_t>/operator[] 2206353 1987809 1.11 *
string<char16_t>/push_back 22421368 6007808 3.73 *
string<char16_t>/replace/pos,n,p,n 5138454 4464786 1.15 *
string<char16_t>/reserve 4922413418 335622 100.00 *
string<char16_t>/rfind/p,pos,n 1440308 380578 3.78 *
string<char16_t>/size 25355 25398 1.00
string<char16_t>/swap 2122704 1490823 1.42 *

string<char8_t>/compare 77222134 77443134 1.00
string<char8_t>/erase/pos,n 1965344 1956521 1.00
string<char8_t>/find/p,pos,n 2468091951 474205522 5.20 *
string<char8_t>/find_first_not_of/p,pos,n 660960 130211 5.08 *
string<char8_t>/find_first_of/p,pos,n 55020899 9240171 5.95 *
string<char8_t>/find_last_of/p,pos,n 10576210 1239053 8.54 *
string<char8_t>/insert/pos,p 1822756 1750880 1.04
string<char8_t>/iteration 2617889 2540148 1.03
string<char8_t>/operator[] 2254794 2256443 1.00
string<char8_t>/push_back 12463022 5210321 2.39 *
string<char8_t>/replace/pos,n,p,n 3744862 2855260 1.31 *
string<char8_t>/reserve 1372046888 218815 100.00 *
string<char8_t>/rfind/p,pos,n 1446232 366902 3.94 *
string<char8_t>/size 26859 25431 1.06
string<char8_t>/swap 2123350 1490509 1.42 *

vector<uint64>/erase 55164013 56417449 0.98
vector<uint64>/insert 55872973 56432664 0.99
vector<uint64>/iteration 1329102 1324623 1.00
vector<uint64>/operator[] 5264738 3136746 1.68 *
vector<uint64>/push_back 14903245 13171175 1.13 *
vector<uint64>/sort 88429095 88542171 1.00

Win32.VC71.STLPort.Release

EASTL version: 0.96.00
Platform: Windows on X86
Compiler: Microsoft Visual C++ compiler, version 1310
Allocator: PPMalloc::GeneralAllocator. Thread safety enabled.
Build: Full optimization. Inlining enabled.

Values are times to complete tests; smaller values are better.
Alarm indicates a greater than 10% difference.

Test STLPort EASTL Ratio Alarm
----------------------------------------------------------------------------------------
algorithm/adj_find/vector<TestObject> 2741046 2731441 1.00
algorithm/copy/vector<LargePOD> 6065923 5085142 1.19 *
algorithm/copy/vector<uint32_t> 158304 165555 0.96
algorithm/copy_backward/vector<LargePOD> 4710258 4896476 0.96
algorithm/copy_backward/vector<uint32_t> 146030 142630 1.02
algorithm/count/vector<uint64_t> 1395921 1406334 0.99
algorithm/equal_range/vector<uint64_t> 211692764 118969493 1.78 *
algorithm/fill/bool[] 366078 33737 10.85 *
algorithm/fill/char[]/'d' 33736 33771 1.00
algorithm/fill/vector<char>/'d' 28466 33720 0.84 *
algorithm/fill/vector<char>/0 366086 33728 10.85 *
algorithm/fill/vector<uint64_t> 466250 401591 1.16 *
algorithm/fill/vector<void*> 521603 693481 0.75 *
algorithm/fill_n/bool[] 599709 33762 17.76 *
algorithm/fill_n/char[] 599573 33711 17.79 *
algorithm/fill_n/vector<uint64_t> 434971 1374084 0.32 *
algorithm/find_end/string/end 1494742 85349 17.51 *
algorithm/find_end/string/middle 1480700 687208 2.15 *
algorithm/find_end/string/none 1540540 1546431 1.00
algorithm/lex_cmp/schar[] 921638 178797 5.15 *
algorithm/lex_cmp/vector<TestObject> 2623559 2643551 0.99
algorithm/lex_cmp/vector<uchar> 960899 183608 5.23 *
algorithm/lower_bound/vector<TestObject> 60630534 56531528 1.07
algorithm/min_element/vector<TestObject> 4209022 2768527 1.52 *
algorithm/rand_shuffle/vector<uint64_t> 13762010 15969052 0.86 *
algorithm/reverse/list<TestObject> 673387 731825 0.92
algorithm/reverse/vector<TestObject> 634576 754511 0.84 *
algorithm/search/string<char> 1262599 1387608 0.91
algorithm/search_n/string<char> 1166242 458592 2.54 *
algorithm/unique/vector<TestObject> 4912193 5336317 0.92
algorithm/unique/vector<uint32_t> 809387 809081 1.00
algorithm/unique/vector<uint64_t> 4371814 2414255 1.81 *
algorithm/upper_bound/vector<uint32_t> 31899081 29555596 1.08

bitset<1500>/>>=/1 63308136 40553560 1.56 * STLPort is broken, neglects wraparound check.
bitset<1500>/count 62523178 22799473 2.74 *
bitset<1500>/flip 20302845 19919232 1.02
bitset<1500>/reset 18892015 15403148 1.23 *
bitset<1500>/set() 15803302 17322192 0.91
bitset<1500>/set(i) 2799271 2999310 0.93
bitset<1500>/test 2999293 2799262 1.07

bitset<15>/>>=/1 1199239 3199256 0.37 * STLPort is broken, neglects wraparound check.
bitset<15>/count 3599461 2199231 1.64 *
bitset<15>/flip 1199231 1199188 1.00
bitset<15>/reset 1199188 1199180 1.00
bitset<15>/set() 1199214 1199180 1.00
bitset<15>/set(i) 2599257 1399262 1.86 *
bitset<15>/test 2599274 2599283 1.00

bitset<35>/>>=/1 6643974 4599239 1.44 * STLPort is broken, neglects wraparound check.
bitset<35>/count 5151331 5399438 0.95
bitset<35>/flip 1999404 1199273 1.67 *
bitset<35>/reset 9805285 1399313 7.01 *
bitset<35>/set() 2799279 1199248 2.33 *
bitset<35>/set(i) 2799246 1599241 1.75 *
bitset<35>/test 2999234 2999251 1.00

bitset<75>/>>=/1 7002045 6999333 1.00 STLPort is broken, neglects wraparound check.
bitset<75>/count 5999351 3002259 2.00 *
bitset<75>/flip 3599334 3599163 1.00
bitset<75>/reset 9799344 3399218 2.88 *
bitset<75>/set() 3599232 3599062 1.00
bitset<75>/set(i) 2799228 1599284 1.75 *
bitset<75>/test 2999250 2799339 1.07

deque<ValuePair>/erase 127108651 115258113 1.10
deque<ValuePair>/insert 137727889 116552332 1.18 *
deque<ValuePair>/iteration 7144182 6009899 1.19 *
deque<ValuePair>/operator[] 34241222 20535039 1.67 *
deque<ValuePair>/push_back 6585800 3932126 1.67 *
deque<ValuePair>/push_front 6805865 3993513 1.70 *
deque<ValuePair>/sort 395352323 348778188 1.13 *

hash_map<string, uint32_t>/clear 426640 447015 0.95
hash_map<string, uint32_t>/count 4359344 3883089 1.12 *
hash_map<string, uint32_t>/erase pos 584392 458142 1.28 *
hash_map<string, uint32_t>/erase range 221034 196078 1.13 *
hash_map<string, uint32_t>/erase val 3539867 3790813 0.93
hash_map<string, uint32_t>/find 3966831 3811910 1.04
hash_map<string, uint32_t>/find_as/char* 11591612 4243710 2.73 *
hash_map<string, uint32_t>/insert 16763887 16719194 1.00
hash_map<string, uint32_t>/iteration 909968 478609 1.90 *
hash_map<string, uint32_t>/operator[] 4360041 4108313 1.06

hash_map<uint32_t, TestObject>/clear 302634 283722 1.07
hash_map<uint32_t, TestObject>/count 916487 907426 1.01
hash_map<uint32_t, TestObject>/erase pos 388042 321385 1.21 *
hash_map<uint32_t, TestObject>/erase range 122680 116280 1.06
hash_map<uint32_t, TestObject>/erase val 1710931 1729529 0.99
hash_map<uint32_t, TestObject>/find 1089462 1346527 0.81 *
hash_map<uint32_t, TestObject>/insert 4560310 5072350 0.90 *
hash_map<uint32_t, TestObject>/iteration 960117 495354 1.94 *
hash_map<uint32_t, TestObject>/operator[] 1872830 1890595 0.99

heap (uint32_t[])/make_heap 3528418 3327257 1.06
heap (uint32_t[])/pop_heap 63243859 61011853 1.04
heap (uint32_t[])/push_heap 11602424 10045869 1.15 *
heap (uint32_t[])/sort_heap 52965362 48744729 1.09

heap (vector<TestObject>)/make_heap 13191456 13089711 1.01
heap (vector<TestObject>)/pop_heap 148555656 144787742 1.03
heap (vector<TestObject>)/push_heap 28696689 26618830 1.08
heap (vector<TestObject>)/sort_heap 112473989 114018643 0.99

list<TestObject>/ctor(it) 80186731 74006287 1.08
list<TestObject>/ctor(n) 6232311 6128007 1.02
list<TestObject>/erase 344556374 212877808 1.62 *
list<TestObject>/find 39859075 14591347 2.73 *
list<TestObject>/insert 86935153 56138233 1.55 *
list<TestObject>/push_back 79569180 46700641 1.70 *
list<TestObject>/remove 785786758 324201016 2.42 *
list<TestObject>/reverse 45248186 24852759 1.82 *
list<TestObject>/size/1 219844 219496 1.00
list<TestObject>/size/10 519563 519579 1.00 EASTL intentionally implements list::size as O(n).
list<TestObject>/size/100 4567194 101230266 0.05 * EASTL intentionally implements list::size as O(n).
list<TestObject>/splice 68321087 23601687 2.89 *

map<TestObject, uint32_t>/clear 168011 180540 0.93
map<TestObject, uint32_t>/count 4830439 5139287 0.94
map<TestObject, uint32_t>/equal_range 8700090 6158531 1.41 *
map<TestObject, uint32_t>/erase/key 6696776 4617038 1.45 *
map<TestObject, uint32_t>/erase/pos 309273 333183 0.93
map<TestObject, uint32_t>/erase/range 137419 136068 1.01
map<TestObject, uint32_t>/find 4773498 4931352 0.97
map<TestObject, uint32_t>/insert 9651877 9311699 1.04
map<TestObject, uint32_t>/iteration 372946 416364 0.90 *
map<TestObject, uint32_t>/lower_bound 4784234 4915797 0.97
map<TestObject, uint32_t>/operator[] 5040254 5183147 0.97
map<TestObject, uint32_t>/upper_bound 4724292 4915984 0.96

set<uint32_t>/clear 165300 173289 0.95
set<uint32_t>/count 4958654 4885086 1.02
set<uint32_t>/equal_range 8434134 5698681 1.48 *
set<uint32_t>/erase range 145554 133960 1.09
set<uint32_t>/erase/pos 299914 324760 0.92
set<uint32_t>/erase/val 6506155 4335034 1.50 *
set<uint32_t>/find 4866879 4556043 1.07
set<uint32_t>/insert 8340523 8957257 0.93
set<uint32_t>/iteration 294465 343442 0.86 *
set<uint32_t>/lower_bound 4548095 4756498 0.96
set<uint32_t>/upper_bound 4559196 4521498 1.01

sort/q_sort/TestObject[] 7316766 7013894 1.04
sort/q_sort/TestObject[]/sorted 1668439 1332885 1.25 *
sort/q_sort/vector<TestObject> 7331530 7017260 1.04
sort/q_sort/vector<TestObject>/sorted 1601629 1247120 1.28 *
sort/q_sort/vector<ValuePair> 7071643 7067869 1.00
sort/q_sort/vector<ValuePair>/sorted 2136390 1703799 1.25 *
sort/q_sort/vector<uint32> 3292891 2943627 1.12 *
sort/q_sort/vector<uint32>/sorted 653693 473612 1.38 *

string<char16_t>/compare 356579259 432760228 0.82 *
string<char16_t>/erase/pos,n 3430422 3428645 1.00
string<char16_t>/find/p,pos,n 229263402 225830975 1.02
string<char16_t>/find_first_not_of/p,pos,n 187391 81404 2.30 *
string<char16_t>/find_first_of/p,pos,n 4411831 4413532 1.00
string<char16_t>/find_last_of/p,pos,n 731655 726155 1.01
string<char16_t>/insert/pos,p 3408628 3319726 1.03
string<char16_t>/iteration 309993861 310333547 1.00
string<char16_t>/operator[] 580839 579904 1.00
string<char16_t>/push_back 3983338 2975553 1.34 *
string<char16_t>/replace/pos,n,p,n 4361095 4211504 1.04
string<char16_t>/reserve 935141729 247010 100.00 *
string<char16_t>/rfind/p,pos,n 248956 223397 1.11 *
string<char16_t>/size 13311 13107 1.02
string<char16_t>/swap 519129 579445 0.90 *

string<char8_t>/compare 76695559 76828015 1.00
string<char8_t>/erase/pos,n 1951566 1947282 1.00
string<char8_t>/find/p,pos,n 185878944 185605039 1.00
string<char8_t>/find_first_not_of/p,pos,n 196877 81600 2.41 *
string<char8_t>/find_first_of/p,pos,n 4147685 4145356 1.00
string<char8_t>/find_last_of/p,pos,n 605897 598222 1.01
string<char8_t>/insert/pos,p 1781592 1768264 1.01
string<char8_t>/iteration 921502 921272 1.00
string<char8_t>/operator[] 361250 359873 1.00
string<char8_t>/push_back 3363288 2530493 1.33 *
string<char8_t>/replace/pos,n,p,n 2682600 2633130 1.02
string<char8_t>/reserve 672517501 78387 100.00 *
string<char8_t>/rfind/p,pos,n 226202 200013 1.13 *
string<char8_t>/size 11280 11109 1.02
string<char8_t>/swap 519393 559759 0.93

vector<uint64>/erase 55184856 55192217 1.00
vector<uint64>/insert 56764267 55682726 1.02
vector<uint64>/iteration 423122 424039 1.00
vector<uint64>/operator[] 1189397 860991 1.38 *
vector<uint64>/push_back 5626609 4027317 1.40 *
vector<uint64>/sort 49227036 49231362 1.00



End of document




================================================ FILE: doc/html/EASTL Best Practices.html ================================================ EASTL Best Practices

EASTL Best Practices

In this document we discuss best practices for using EASTL. The primary emphasis is on performance with a secondary emphasis on correctness and maintainability. Some best practices apply only to some situations, and these will be pointed out as we go along. In order to be easily digestible, we present these practices as a list of items in the tone of the Effective C++ series of books.

Summary

The descriptions here are intentionally terse; this is to make them easier to visually scan.

1 Consider intrusive containers.
2 Consider fixed-size containers.
3 Consider custom allocators.
4 Consider hash tables instead of maps.
5 Consider a vector_map (a.k.a. sorted vector) for unchanging data.
6 Consider slist instead of list.
7 Avoid redundant end() and size() in loops.
8 Iterate containers instead of using operator[].
9 Learn to use the string class appropriately.
10 Cache list size if you want size() to be O(1).
11 Use empty() instead of size() when possible.
12 Know your container efficiencies.
13 Use vector::reserve.
14 Use vector::set_capacity to trim memory usage.
15 Use swap() instead of a manually implemented version.
16 Consider storing pointers instead of objects.
17 Consider smart pointers instead of raw pointers.
18 Use iterator pre-increment instead of post-increment.
19 Make temporary references so the code can be traced/debugged.
20 Consider bitvector or bitset instead of vector<bool>.
21 Vectors can be treated as contiguous memory.
22 Search hash_map<string> via find_as() instead of find().
23 Take advantage of type_traits (e.g. EASTL_DECLARE_TRIVIAL_RELOCATE).
24 Name containers to track memory usage.
25 Learn the algorithms.
26 Pass and return containers by reference instead of value.
27 Consider using reset_lose_memory() for fast container teardown.
28 Consider using fixed_substring instead of copying strings.
29 Consider using vector::push_back(void).

Detail

1 Consider intrusive containers.

Intrusive containers (such as intrusive_list) differ from regular containers (such as list) in that they use the stored objects to manage the linked list instead of using nodes allocated from a memory heap. The result is better usage of memory. Additionally intrusive_list objects can be removed from their list without knowing what list they belong to. To make an intrusive_list of Widgets, you have Widget inherit from intrusive_list_node or simply have mpPrev/mpNext member variables.

To create an intrusive_list container, you can use the following code:

class Widget : public intrusive_list_node
{ };

intrusive_list<Widget> widgetList;
widgetList.push_back(someWidget);

2 Consider fixed-size containers.

Fixed-size containers (such as fixed_list) are variations of regular containers (such as list) in that they allocate from a fixed block of local memory instead of allocating from a generic heap. The result is better usage of memory due to reduced fragmentation, better cache behavior, and faster allocation/deallocation. The presence of fixed-size containers negate the most common complaint that people have about STL: that it fragments the heap or "allocates all over the place."

EASTL fixed containers include:

  • fixed_list
  • fixed_slist
  • fixed_vector
  • fixed_string
  • fixed_map
  • fixed_multimap
  • fixed_set
  • fixed_multiset
  • fixed_hash_map
  • fixed_hash_multimap
  • fixed_hash_set
  • fixed_hash_multiset

To create a fixed_set, you can use the following code:

fixed_set<int, 25> intSet; // Create a set capable of holding 25 elements.
intSet.push_back(37);

3 Consider custom allocators.

While EASTL provides fixed-size containers in order to control container memory usage, EASTL lets you assign a custom allocator to any container. This lets you define your own memory pool. EASTL has a more flexible and powerful mechanism of doing this that standard STL, as EASTL understands object alignment requirements, allows for debug naming, allows for sharing allocators across containers, and allows dynamic allocator assignment.

To create a list container that uses your custom allocator and uses block naming, you can use the following code:

list<int> intList(pSomeAllocator, "graphics/intList");
intList.push_back(37);

4 Consider hash tables instead of maps.

Hash containers (such as hash_map) provide the same interface as associative containers (such as map) but have faster lookup and use less memory. The primary disadvantage relative to associative containers is that hash containers are not sorted.

To make a hash_map (dictionary) of integers to strings, you can use the following code:

hash_map<int, const char*> stringTable;
stringTable[37] = "hello";

5 Consider a vector_map (a.k.a. sorted vector) for unchanging data.

You can improve speed, memory usage, and cache behavior by using a vector_map instead of a map (or vector_set instead of set, etc.). The primary disadvantage of vector_map is that insertions and removal of elements is O(n) instead of O(1). However, if your associative container is not going to be changing much or at all, you can benefit from using a vector_map. Consider calling reserve on the vector_map in order to set the desired capacity up front.

To make a vector_set, you can use the following code:

vector_set<int> intSet(16); // Create a vector_set with an initial capacity of 16.
intSet.insert(37);

Note that you can use containers other than vector to implement vector_set. Here's how you do it with deque:

vector_set<int, less<int>, EASTLAllocatorType, deque<int> > intSet;
intSet.insert(37);

6 Consider slist instead of list.

An slist is a singly-linked list; it is much like a list except that it can only be traversed in a forward direction and not a backward direction. The benefit is that each node is 4 bytes instead of 8 bytes. This is a small improvement, but if you don't need reverse iteration then it can be an improvement. There's also intrusive_slist as an option.

To make an slist, you can use the following code:

slist<int> intSlist;
intSlist.push_front(37);

7 Avoid redundant end() and size() in loops.

Instead of writing code like this:

for(deque<int>::iterator it = d.begin(); it != d.end(); ++it)
    ...
write code like this:
for(deque<int>::iterator it = d.begin(), itEnd = d.end(); it != itEnd; ++it)
    ...
The latter avoids a function call and return of an object (which in deque's case happens to be more than just a pointer). The above only works when the container is unchanged or for containers that have a constant end value. By "constant end value" we mean containers which can be modified but end always remains the same.
Constant begin Non-constant begin Constant end Non-constant end
array1 string
vector
deque
intrusive_list
intrusive_slist
vector_map
vector_multimap
vector_set
vector_multiset
bit_vector
hash_map
hash_multimap
hash_set
hash_multiset
intrusive_hash_map
intrusive_hash_multimap
intrusive_hash_set
intrusive_hash_multiset
array
list
slist
intrusive_list
intrusive_slist
map
multimap
set
multiset
hash_map2
hash_multimap2
hash_set2
hash_multiset2
intrusive_hash_map
intrusive_hash_multimap
intrusive_hash_set
intrusive_hash_multiset
string
vector
deque
vector_map
vector_multimap
vector_set
vector_multiset
bit_vector
1 Arrays can be neither resized nor reallocated.
2 Constant end if the hashtable can't/won't re-hash. Non-constant if it can re-hash.

8 Iterate containers instead of using operator[].

It's faster to iterate random access containers via iterators than via operator[], though operator[] usage may look simpler.

Instead of doing this:

for(unsigned i = 0, iEnd = intVector.size(); i != iEnd; ++i)
    intVector[i] = 37;

you can execute more efficiently by doing this:

for(vector<int>::iterator it = intVector.begin(), itEnd = intVector.end(); it != itEnd; ++it)
    *it = 37;

9 Learn to use the string class appropriately.

Oddly enough, the most mis-used STL container is easily the string class. The tales of string abuse could rival the 1001 Arabian Nights. Most of the abuses involve doing things in a harder way than need be. In examining the historical mis-uses of string, it is clear that many of the problems stem from the user thinking in terms of C-style string operations instead of object-oriented strings. This explains why statements such as strlen(s.c_str()) are so common, whereas the user could just use s.length() instead and be both clearer and more efficient.

Here we provide a table of actual collected examples of things done and how they could have been done instead.

What was written What could have been written

s = s.Left(i) + '+' + s.Right(s.length() - i - 1);

s[i] = '+';

string s(""); // This is the most commonly found misuse.

string s;

s = "";

s.clear();

s.c_str()[0] = 'u';

s[0] = 'u';

len = strlen(s.c_str());

len = s.length();

s = string("u");
s = "u";

puts(s + string("u"));

puts(s + "u");

string s(" ");
puts(s.c_str());

puts(" ");

s.sprintf("u");

s = "u";

char array[32];
sprintf(array, "%d", 10);
s = string(array);

s.sprintf("%d", 10);


The chances are that if you want to do something with a string, there is a very basic way to do it. You don't want your code to appear in a future version of the above table.

10 Cache list size if you want list::size() to be O(1).

EASTL's list, slist, intrusive_list, and intrusive_slist containers have a size() implementation which is O(n). That is, these containers don't keep a count (cache) of the current list size and when you call the size() function they iterate the list. This is by design and the reasoning behind it has been deeply debated and considered (and is discussed in the FAQ and the list header file). In summary, list doesn't cache its size because the only function that would benefit is the size function while many others would be negatively impacted and the memory footprint would be negatively impacted, yet list::size is not a very frequently called function in well-designed code. At the same time, nothing prevents the user from caching the size himself, though admittedly it adds some tedium and risk to the code writing process.

Here's an example of caching the list size manually:

list<int> intList;
size_t    n = 0;

intList.push_back(37);
++n;
intList.pop_front();
--n;

11 Use empty() instead of size() when possible.

All conventional containers have both an empty function and a size function. For all containers empty() executes with O(1) (constant time) efficiency. However, this is not so for size(), as some containers need to calculate the size and others need to do pointer subtraction (which may involve integer division) to find the size.

12 Know your container efficiencies.

The above two practices lead us to this practice, which is a generalization of the above. We present a table of basic information for the conventional EASTL containers. The values are described at the bottom.

Container

empty() efficiency size() efficiency operator[] efficiency

insert() efficiency

erase() efficiency

find() efficiency

sort efficiency

slist 1 O(n) - O(1) O(1) O(n) O(n+)

list

1 n -

1

1

n

n log(n)

intrusive_slist 1 n - 1 1 1 n+
intrusive_list 1 n - 1 1 1 n log(n)
array 1 1 1 - - n n log(n)
vector 1 1a 1 1 at end, else n 1 at end, else n n n log(n)
vector_set 1 1a 1 1 at end, else n 1 at end, else n log(n) 1
vector_multiset 1 1a 1 1 at end, else n 1 at end, else n log(n) 1
vector_map 1 1a 1 1 at end, else n 1 at end, else n log(n) 1
vector_multimap 1 1a 1 1 at end, else n 1 at end, else n log(n) 1
deque 1 1a 1 1 at begin or end,
else n / 2
1 at begin or end,
else n / 2
n n log(n)
bit_vector 1 1a 1 1 at end, else n 1 at end, else n n n log(n)
string, cow_string 1 1a 1 1 at end, else n 1 at end, else n n n log(n)
set 1 1 - log(n) log(n) log(n) 1
multiset 1 1 - log(n) log(n) log(n) 1
map 1 1 log(n) log(n) log(n) log(n) 1
multimap 1 1 - log(n) log(n) log(n) 1
hash_set 1 1 - 1 1 1 -
hash_multiset 1 1 - 1
1 1 -
hash_map 1 1 - 1 1 1 -
hash_multimap 1 1 - 1 1 1 -
intrusive_hash_set 1 1 - 1 1 1 -
intrusive_hash_multiset 1 1 - 1 1 1 -
intrusive_hash_map 1 1 - 1 1 1 -
intrusive_hash_multimap 1 1 - 1 1 1 -


Notes:

  • - means that the operation does not exist.
  • 1 means amortized constant time. Also known as O(1)
  • n means time proportional to the container size. Also known as O(n)
  • log(n) means time proportional to the natural logarithm of the container size. Also known as O(log(n))
  • n log(n) means time proportional to log(n) times the size of the container. Also known as O(n log(n))
  • n+ means that the time is at least n, and possibly higher.
  • Inserting at the end of a vector may cause the vector to be resized; resizing a vector is O(n). However, the amortized time complexity for vector insertions at the end is constant.
  • Sort assumes the usage of the best possible sort for a large container of random data. Some sort algorithms (e.g. quick_sort) require random access iterators and so the sorting of some containers requires a different sort algorithm. We do not include bucket or radix sorts, as they are always O(n).
  • a vector, deque, string size is O(1) but involves pointer subtraction and thus integer division and so is not as efficient as containers that store the size directly.

13 Use vector::reserve.

You can prevent vectors (and strings) from reallocating as you add items by specifying up front how many items you will be requiring. You can do this in the constructor or by calling the reserve function at any time. The capacity function returns the amount of space which is currently reserved.

Here's how you could specify reserved capacity in a vector:

vector<Widget> v(37);   // Reserve space to hold up to 37 items.
    or
vector<Widget> v;       // This empty construction causes to memory to be allocated or reserved.
v.reserve(37);
The EASTL vector (and string) implementation looks like this: template <typename T>
class vector {
    T* mpBegin;     // Beginning of used element memory.
    T* mpEnd;       // End of used element memory.
    T* mpCapacity;  // End of storage capacity. Is >= mpEnd
}
Another approach to being efficient with vector memory usage is to use fixed_vector.

14 Use vector::set_capacity to trim memory usage.

A commonly asked question about vectors and strings is, "How do I reduce the capacity of a vector?" The conventional solution for std STL is to use the somewhat non-obvious trick of using vector<Widget>(v).swap(v). EASTL provides the same functionality via a member function called set_capacity() which is present in both the vector and string classes. 

An example of reducing a vector is the following:

vector<Widget> v;
...
v.set_capacity();
An example of resizing to zero and completely freeing the memory of a vector is the following:
vector<Widget> v;
...
v.set_capacity(0);

15 Use swap() instead of a manually implemented version.

The generic swap algorithm provides a basic version for any kind of object. However, each EASTL container provides a specialization of swap which is optimized for that container. For example, the list container implements swap by simply swapping the internal member pointers and not by moving individual elements.

16 Consider storing pointers instead of objects.

There are times when storing pointers to objects is more efficient or useful than storing objects directly in containers. It can be more efficient to store pointers when the objects are big and the container may need to construct, copy, and destruct objects during sorting or resizing. Moving pointers is usually faster than moving objects. It can be useful to store pointers instead of objects when somebody else owns the objects or the objects are in another container. It might be useful for a Widget to be in a list and in a hash table at the same time.

17 Consider smart pointers instead of raw pointers.

If you take the above recommendation and store objects as pointers instead of as objects, you may want to consider storing them as smart pointers instead of as regular pointers. This is particularly useful for when you want to delete the object when it is removed from the container. Smart pointers will automatically delete the pointed-to object when the smart pointer is destroyed. Otherwise, you will have to be careful about how you work with the list so that you don't generate memory leaks. Smart pointers implement a shared reference count on the stored pointer, as so any operation you do on a smart pointer container will do the right thing. Any pointer can be stored in a smart pointer, and custom new/delete mechanisms can work with smart pointers. The primary smart pointer is shared_ptr.

Here is an example of creating and using a shared_ptr:

typedef shared_ptr<Widget> WPtr;
list<WPtr> wList;

wList.push_back(WPtr(new Widget)); // The user may have operator new/delete overrides.
wList.pop_back();                  // Implicitly deletes the Widget.

Here is an example of creating and using a shared_ptr that uses a custom allocation and deallocation mechanism:

typedef shared_ptr<Widget, EASTLAllocatorType, WidgetDelete> WPtr; // WidgetDelete is a custom destroyer.
list<WPtr> wList;

wList.push_back(WPtr(WidgetCreate(Widget))); // WidgetCreate is a custom allocator.
wList.pop_back();                            // Implicitly calls WidgetDelete.

18 Use iterator pre-increment instead of post-increment.

Pre-increment (e.g. ++x) of iterators is better than post-increment (x++) when the latter is not specifically needed. It is common to find code that uses post-incrementing when it could instead use pre-incrementing; presumably this is due to post-increment looking a little better visually. The problem is that the latter constructs a temporary object before doing the increment. With built-in types such as pointers and integers, the compiler will recognize that the object is a trivial built-in type and that the temporary is not needed, but the compiler cannot do this for other types, even if the compiler sees that the temporary is not used; this is because the constructor may have important side effects and the compiler would be broken if it didn't construct the temporary object.

EASTL iterators are usually not trivial types and so it's best not to hope the compiler will do the best thing. Thus you should always play it safe an use pre-increment of iterators whenever post-increment is not required.

Here is an example of using iterator pre-increment; for loops like this should always use pre-increment:

for(set<int>::iterator it(intSet.begin()), itEnd(intSet.end()); it != itEnd; ++it)
    *it = 37;

19 Make temporary references so the code can be traced/debugged.

Users want to be able to inspect or modify variables which are referenced by iterators. While EASTL containers and iterators are designed to make this easier than other STL implementations, it makes things very easy if the code explicitly declares a reference to the iterated element. In addition to making the variable easier to debug, it also makes code easier to read and makes the debug (and possibly release) version of the application run more efficiently.

Instead of doing this:

for(list<Widget>::iterator it = wl.begin(), itEnd = wl.end(); it != itEnd; ++it) {
    (*it).x = 37;
    (*it).y = 38;
    (*it).z = 39;
}

Consider doing this:

for(list<Widget>::iterator it = wl.begin(), itEnd = wl.end(); it != itEnd; ++it) {
    Widget& w = *it; // The user can easily inspect or modify w here.
    w.x = 37;
    w.y = 38;
    w.z = 39;
}

20 Consider bitvector or bitset instead of vector<bool>.

In EASTL, a vector of bool is exactly that. It intentionally does not attempt to make a specialization which implements a packed bit array. The bitvector class is specifically designed for this purpose. There are arguments either way, but if vector<bool> were allowed to be something other than an array of bool, it would go against user expectations and prevent users from making a true array of bool. There's a mechanism for specifically getting the bit packing, and it is bitvector.

Additionally there is bitset, which is not a conventional iterateable container but instead acts like bit flags. bitset may better suit your needs than bitvector if you need to do flag/bit operations instead of array operations. bitset does have an operator[], though.

21 Vectors can be treated as contiguous memory.

EASTL vectors (and strings) guarantee that elements are present in a linear contiguous array. This means that you can use a vector as you would a C-style array by using the vector data() member function or by using &v[0].

To use a vector as a pointer to an array, you can use the following code:

struct Widget {
    uint32_t x;
    uint32_t y;
};

vector<Widget> v;

quick_sort((uint64_t*)v.data(), (uint64_t*)(v.data() + v.size()));

22 Search hash_map<string> via find_as() instead of find().

EASTL hash tables offer a bonus function called find_as when lets you search a hash table by something other than the container type. This is particularly useful for hash tables of string objects that you want to search for by string literals (e.g. "hello") or char pointers. If you search for a string via the find function, your string literal will necessarily be converted to a temporary string object, which is inefficient.

To use find_as, you can use the following code:

hash_map<string, int> hashMap;
hash_map<string, int>::iterator it = hashMap.find_as("hello"); // Using default hash and compare.

23 Take advantage of type_traits (e.g. EASTL_DECLARE_TRIVIAL_RELOCATE).

EASTL includes a fairly serious type traits library that is on par with the one found in Boost but offers some additional performance-enhancing help as well. The type_traits library provides information about class types, as opposed to class instances. For example, the is_integral type trait tells if a type is one of int, short, long, char, uint64_t, etc.

There are three primary uses of type traits:

  • Allowing for optimized operations on some data types.
  • Allowing for different logic pathways based on data types.
  • Allowing for compile-type assertions about data type expectations.
Most of the type traits are automatically detected and implemented by the compiler. However, EASTL allows for the user to explicitly give the compiler hints about type traits that the compiler cannot know, via the EASTL_DECLARE declarations. If the user has a class that is relocatable (i.e. can safely use memcpy to copy values), the user can use the EASTL_DECLARE_TRIVIAL_RELOCATE declaration to tell the compiler that the class can be copied via memcpy. This will automatically significantly speed up some containers and algorithms that use that class.

Here is an example of using type traits to tell if a value is a floating point value or not:
template <typename T>
DoSomething(T t) {
    assert(is_floating_point<T>::value);
}
Here is an example of declaring a class as relocatable and using it in a vector.
EASTL_DECLARE_TRIVIAL_RELOCATE(Widget); // Usually you put this at the Widget class declaration.
vector<Widget> wVector;
wVector.erase(wVector.begin());         // This operation will be optimized via using memcpy.
The following is a full list of the currently recognized type traits. Most of these are implemented as of this writing, but if there is one that is missing, feel free to contact the maintainer of this library and request that it be completed.
  • is_void
  • is_integral
  • is_floating_point
  • is_arithmetic
  • is_fundamental
  • is_const
  • is_volatile
  • is_abstract
  • is_signed
  • is_unsigned
  • is_array
  • is_pointer
  • is_reference
  • is_member_object_pointer
  • is_member_function_pointer
  • is_member_pointer
  • is_enum
  • is_union
  • is_class
  • is_polymorphic
  • is_function
  • is_object
  • is_scalar
  • is_compound
  • is_same
  • is_convertible
  • is_base_of
  • is_empty
  • is_pod
  • is_aligned
  • has_trivial_constructor
  • has_trivial_copy
  • has_trivial_assign
  • has_trivial_destructor
  • has_trivial_relocate1
  • has_nothrow_constructor
  • has_nothrow_copy
  • has_nothrow_assign
  • has_virtual_destructor
  • alignment_of
  • rank
  • extent
1 has_trivial_relocate is not found in Boost nor the C++ standard update proposal. However, it is very useful in allowing for the generation of optimized object moving operations. It is similar to the is_pod type trait, but goes further and allows non-pod classes to be categorized as relocatable. Such categorization is something that no compiler can do, as only the user can know if it is such. Thus EASTL_DECLARE_TRIVIAL_RELOCATE  is provided to allow the user to give the compiler a hint.

24 Name containers to track memory usage.

All EASTL containers which allocate memory have a built-in function called set_name and have a constructor argument that lets you specify the container name. This name is used in memory tracking and allows for the categorization and measurement of memory usage. You merely need to supply a name for your container to use and it does the rest.

Here is an example of creating a list and naming it "collision list":

list<CollisionData> collisionList(allocator("collision list"));or
list<CollisionData> collisionList;
collisionList.get_allocator().set_name("collision list");

Note that EASTL containers do not copy the name contents but merely copy the name pointer. This is done for simplicity and efficiency. A user can get around this limitation by creating a persistently present string table. Additionally, the user can get around this by declaring static but non-const strings and modifying them at runtime.

25 Learn the algorithms.

EASTL algorithms provide a variety of optimized implementations of fundamental algorithms. Many of the EASTL algorithms are the same as the STL algorithm set, though EASTL adds additional algorithms and additional optimizations not found in STL implementations such as Microsoft's. The copy algorithm, for example, will memcpy data types that have the has_trivial_relocate type trait instead of doing an element-by-element copy.

The classifications we use here are not exactly the same as found in the C++ standard; they have been modified to be a little more intuitive. Not all the functions listed here may be yet available in EASTL as you read this. If you want some function then send a request to the maintainer. Detailed documentation for each algorithm is found in algorithm.h or the otherwise corresponding header file for the algorithm.

Search

  • find, find_if
  • find_end
  • find_first_of
  • adjacent_find
  • binary_search
  • search, search_n
  • lower_bound
  • upper_bound
  • equal_range

Sort

  • is_sorted
  • quick_sort
  • insertion_sort
  • shell_sort
  • heap_sort
  • merge_sort, merge_sort_buffer
  • merge
  • inplace_merge
  • partial_sort
  • stable_sort
  • partial_sort_copy
  • <other sort functions found in the EASTL bonus directories>

Modifying

  • fill, fill_n
  • generate, generate_n
  • random_shuffle
  • swap
  • iter_swap
  • swap_ranges
  • remove, remove_if
  • remove_copy, remove_copy_if
  • replace, replace_if
  • replace_copy, replace_copy_if
  • reverse
  • reverse_copy
  • rotate
  • rotate_copy
  • partition
  • stable_partition
  • transform
  • next_permutation
  • prev_permutation
  • unique
  • unique_copy

Non-Modifying

  • for_each
  • copy
  • copy_backward
  • count, count_if
  • equal
  • mismatch
  • min
  • max
  • min_element
  • max_element
  • lexicographical_compare
  • nth_element

Heap

  • is_heap
  • make_heap
  • push_heap
  • pop_heap
  • change_heap
  • sort_heap
  • remove_heap

Set

  • includes
  • set_difference
  • set_symmetric_difference
  • set_intersection
  • set_union

26 Pass and return containers by reference instead of value.

If you aren't paying attention you might accidentally write code like this:

void DoSomething(list<Widget> widgetList) {
    ...
}

The problem with the above is that widgetList is passed by value and not by reference. Thus the a copy of the container is made and passed instead of a reference of the container being passed. This may seem obvious to some but this happens periodically and the compiler gives no warning and the code will often execute properly, but inefficiently. Of course there are some occasions where you really do want to pass values instead of references.

27 Consider using reset_lose_memory() for fast container teardown.

EASTL containers have a reset function which unilaterally resets the container to a newly constructed state. The contents of the container are forgotten; no destructors are called and no memory is freed. This is a risky but power function for the purpose of implementing very fast temporary containers. There are numerous cases in high performance programming when you want to create a temporary container out of a scratch buffer area, use the container, and then just "vaporize" it, as it would be waste of time to go through the trouble of clearing the container and destroying and freeing the objects. Such functionality is often used with hash tables or maps and with a stack allocator (a.k.a. linear allocator).

Here's an example of usage of the reset function and a PPMalloc-like StackAllocator:

pStackAllocator->push_bookmark();
hash_set<Widget, less<Widget>, StackAllocator> wSet(pStackAllocator);
<use wSet>
wSet.reset_lose_memory();
pStackAllocator->pop_bookmark();

28 Consider using fixed_substring instead of copying strings.

EASTL provides a fixed_substring class which uses a reference to a character segment instead of allocating its own string memory. This can be a more efficient way to work with strings under some circumstances.

Here's an example of usage of fixed_substring:

basic_string<char> str("hello world");
fixed_substring<char> sub(str, 6, 5); // sub == "world"

fixed_substring can refer to any character array and not just one that derives from a string object.

29 Consider using vector::push_back(void).

EASTL provides an alternative way to insert elements into containers that avoids copy construction and/or the creation of temporaries. Consider the following code:

vector<Widget> widgetArray;
widgetArray.push_back(Widget());

The standard vector push_back function requires you to supply an object to copy from. This incurs the cost of the creation of a temporary and for some types of classes or situations this cost may be undesirable. It additionally requires that your contained class support copy-construction whereas you may not be able to support copy construction. As an alternative, EASTL provides a push_back(void) function which requires nothing to copy from but instead constructs the object in place in the container. So you can do this:

vector<Widget> widgetArray;
widgetArray.push_back();
widgetArray.back().x = 0; // Example of how to reference the new object.

Other containers with such copy-less functions include:

vector::push_back()
deque::push_back()
deque::push_front()
list::push_back()
list::push_front()
slist::push_front()
map::insert(const key_type& key)
multimap::insert(const key_type& key)
hash_map::insert(const key_type& key)
hash_multimap::insert(const key_type& key)

Note that the map functions above allow you to insert a default value specified by key alone and not a value_type like with the other map insert functions.


End of document




================================================ FILE: doc/html/EASTL Design.html ================================================ EASTL Design

EASTL Design

Introduction

EASTL (EA Standard Template Library) is designed to be a template library which encompasses and extends the functionality of standard C++ STL while improving it in various ways useful to game development. Much of EASTL's design is identical to standard STL, as the large majority of the STL is well-designed for many uses. The primary areas where EASTL deviates from standard STL implementations are essentially the following:

  • EASTL has a simplified and more flexible custom allocation scheme.
  • EASTL has significantly easier to read code.
  • EASTL has extension containers and algorithms.
  • EASTL has optimizations designed for game development.

Of the above items, the only one which is an incompatible difference with STL is the case of memory allocation. The method for defining a custom allocator for EASTL is slightly different than that of standard STL, though they are 90% similar. The 10% difference, however, is what makes EASTL generally easier and more powerful to work with than standard STL. Containers without custom allocators act identically between EASTL and standard STL.

Motivations

Our motifications for making EASTL drive the design of EASTL. As identified in the EASTL RFC (Request for Comment), the primary reasons for implementing a custom version of the STL are:

  • Some STL implementations (especially Microsoft STL) have inferior performance characteristics that make them unsuitable for game development. EASTL is faster than all existing STL implementations.
  • The STL is sometimes hard to debug, as most STL implementations use cryptic variable names and unusual data structures.
  • STL allocators are sometimes painful to work with, as they have many requirements and cannot be modified once bound to a container.
  • The STL includes excess functionality that can lead to larger code than desirable. It's not very easy to tell programmers they shouldn't use that functionality.
  • The STL is implemented with very deep function calls. This results is unacceptable performance in non-optimized builds and sometimes in optimized builds as well.
  • The STL doesn't support alignment of contained objects.
  • STL containers won't let you insert an entry into a container without supplying an entry to copy from. This can be inefficient.
  • Useful STL extensions (e.g. slist, hash_map, shared_ptr) found in existing STL implementations such as STLPort are not portable because they don't exist in other versions of STL or aren't consistent between STL versions.
  • The STL lacks useful extensions that game programmers find useful (e.g. intrusive_list) but which could be best optimized in a portable STL environment.
  • The STL has specifications that limit our ability to use it efficiently. For example, STL vectors are not guaranteed to use contiguous memory and so cannot be safely used as an array.
  • The STL puts an emphasis on correctness before performance, whereas sometimes you can get significant performance gains by making things less academcially pure.
  • STL containers have private implementations that don't allow you to work with their data in a portable way, yet sometimes this is an important thing to be able to do (e.g. node pools).
  • All existing versions of STL allocate memory in empty versions of at least some of their containers. This is not ideal and prevents optimizations such as container memory resets that can greatly increase performance in some situations.
  • The STL is slow to compile, as most modern STL implementations are very large.
  • There are legal issues that make it hard for us to freely use portable STL implementations such as STLPort.
  • We have no say in the design and implementation of the STL and so are unable to change it to work for our needs.

Prime Directives

The implementation of EASTL is guided foremost by the following directives which are listed in order of importance.

  1. Efficiency (speed and memory usage)
  2. Correctness
  3. Portability
  4. Readability

Note that unlike commercial STL implementations which must put correctness above all, we put a higher value on efficiency. As a result, some functionality may have some usage limitation that is not present in other similar systems but which allows for more efficient operation, especially on the platforms of significance to us.

Portability is significant, but not critical. Yes, EASTL must compile and run on all platforms that we will ship games for. But we don't take that to mean under all compilers that could be conceivably used for such platforms. For example, Microsoft VC6 can be used to compile Windows programs, but VC6's C++ support is too weak for EASTL and so you simply cannot use EASTL under VC6.

Readability is something that EASTL achieves better than many other templated libraries, particularly Microsoft STL and STLPort. We make every attempt to make EASTL code clean and sensible. Sometimes our need to provide optimizations (particularly related to type_traits and iterator types) results in less simple code, but efficiency happens to be our prime directive and so it overrides all other considerations.

Thread Safety

It's not simple enough to simply say that EASTL is thread-safe or thread-unsafe. However, we can say that with respect to thread safety that EASTL does the right thing.

Individual EASTL containers are not thread-safe. That is, access to an instance of a container from multiple threads at the same time is unsafe if any of those accesses are modifying operations. A given container can be read from multiple threads simultaneously as well as any other standalone data structure. If a user wants to be able to have modifying access an instance of a container from multiple threads, it is up to the user to ensure that proper thread synchronization occurs. This usually means using a mutex.

EASTL classes other than containers are the same as containers with respect to thread safety. EASTL functions (e.g. algorithms) are inherently thread-safe as they have no instance data and operate entirely on the stack. As of this writing, no EASTL function allocates memory and thus doesn't bring thread safety issues via that means.

The user may well need to be concerned about thread safety with respect to memory allocation. If the user modifies containers from multiple threads, then allocators are going to be accessed from multiple threads. If an allocator is shared across multiple container instances (of the same type of container or not), then mutexes (as discussed above) the user uses to protect access to indivudual instances will not suffice to provide thread safety for allocators used across multiple instances. The conventional solution here is to use a mutex within the allocator if it is exected to be used by multiple threads.

EASTL uses neither static nor global variables and thus there are no inter-instance dependencies that would make thread safety difficult for the user to implement.

Container Design

All EASTL containers follow a set of consistent conventions. Here we define the prototypical container which has the minimal functionality that all (non-adapter) containers must have. Some containers (e.g. stack) are explicitly adapter containers and thus wrap or inherit the properties of the wrapped container in a way that is implementation specific.

template <class T, class Allocator = EASTLAllocator>
class container
{
public:
    typedef container<T, Allocator>            this_type;
    typedef T                                  value_type;
    typedef T*                                 pointer;
    typedef const T*                           const_pointer;
    typedef T&                                 reference;
    typedef const T&                           const_reference;
    typedef ptrdiff_t                          difference_type;
    typedef impl_defined                       size_type;
    typedef impl-defined                       iterator;
    typedef impl-defined                       const_iterator;
    typedef reverse_iterator<iterator>         reverse_iterator;
    typedef reverse_iterator<const_iterator>   reverse_const_iterator;
    typedef Allocator                          allocator_type;

public:
    container(
const allocator_type& allocator = allocator_type());
    container(const
this_type& x);

    
this_type& operator=(this_type& x);
    void swap(
this_type& x);
    void reset();

    allocator_type& get_allocator();
    void            set_allocator(allocator_type& allocator);

    iterator       begin();
    const_iterator begin() const;
    iterator       end();
    const_iterator end() const;

    bool validate() const;
    int  validate_iterator(const_iterator i) const;

protected:
    allocator_type mAllocator;
};

template <class T,
class Allocator>
bool operator==(const container<T, Allocator>& a, const container<T,
Allocator>& b);

template <class T,
class Allocator>
bool operator!=(const container<T,
Allocator>& a, const container<T, Allocator>& b);

Notes:
  • Swapped containers do not swap their allocators.
  • Newly constructed empty containers do no memory allocation. Some STL and other container libraries allocate an initial node from the class memory allocator. EASTL containers by design never do this. If a container needs an initial node, that node should be made part of the container itself or be a static empty node object.
  • Empty containers (new or otherwise) contain no constructed objects, including those that might be in an 'end' node. Similarly, no user object (e.g. of type T) should be constructed unless required by the design and unless documented in the cotainer/algorithm contract. 
  • The reset function is a special extension function which unilaterally resets the container to an empty state without freeing the memory of the contained objects. This is useful for very quickly tearing down a container built into scratch memory. No memory is allocated by reset, and the container has no allocatedmemory after the reset is executed.
  • The validate and validate_iterator functions provide explicit container and iterator validation. EASTL provides an option to do implicit automatic iterator and container validation, but full validation (which can be potentially extensive) has too much of a performance cost to execute implicitly, even in a debug build. So EASTL provides these explicit functions which can be called by the user at the appropriate time and in optimized builds as well as debug builds.

Allocator Design

The most significant difference between EASTL and standard C++ STL is that standard STL containers are templated on an allocator class with the interface defined in std::allocator. std::allocator is defined in the C++ standard as this:

// Standard C++ allocator

template <class T>
class allocator

{
public:
    typedef size_t    size_type;
    typedef ptrdiff_t difference_type;
    typedef T*        pointer;
    typedef const T*  const_pointer;
    typedef T&        reference;
    typedef const T&  const_reference;
    typedef T         value_type;

    template <class U>
    struct rebind { typedef allocator<U> other; };


    allocator() throw();
    allocator(const allocator&) throw();
    template <class U>
    allocator(const allocator<U>&) throw();

   ~allocator() throw();

    pointer       address(reference x) const;
    const_pointer address(const_reference x) const;
    pointer       allocate(size_type, typename allocator<void>::const_pointer hint = 0);
    void          deallocate(pointer p, size_type n);
    size_type     max_size() const throw();
    void          construct(pointer p, const T& val);
    void          destroy(pointer p);
};

Each STL container needs to have an allocator templated on container type T associated with it. The problem with this is that allocators for containers are defined at the class level and not the instance level. This makes it painful to define custom allocators for containers and adds to code bloat. Also, it turns out that the containers don't actually use allocator<T> but instead use allocator<T>::rebind<U>::other. Lastly, you cannot access this allocator after the container is constructed. There are some good academic reasons why the C++ standard works this way, but it results in a lot of unnecessary pain and makes concepts like memory tracking much harder to implement.

What EASTL does is use a more familiar memory allocation pattern whereby there is only one allocator class interface and it is used by all containers. Additionally EASTL containers let you access their allocators and query them, name them, change them, etc.

EASTL has chosen to make allocators not be copied between containers during container swap and assign operations. This means that if container A swaps its contents with container B, both containers retain their original allocators. Similarly, assigning container A to container B causes container B to retain its original allocator. Containers that are equivalent should report so via operator==; EASTL will do a smart swap if allocators are equal, and a brute-force swap otherwise.

// EASTL allocator

class allocator
{
public:
    allocator(const char* pName = NULL);

    void* allocate(size_t n, int flags = 0);
    void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0);
    void  deallocate(void* p, size_t n);

    const char* get_name() const;
    void        set_name(const char* pName);
};

allocator* GetDefaultAllocator();

Fixed Size Container Design

EASTL supplies a set of fixed-size containers that the user can use, though the user can also implement their own versions. So in addition to class list there is class fixed_list. The fixed_list class implements a linked list via a fixed-size pool of contiguous memory which has no space overhead (unlike with a regular heap), doesn't cause fragmentation, and allocates very quickly.

EASTL implements fixed containers via subclasses of regular containers which set the regular container's allocator to point to themselves. Thus the implementation for fixed_list is very tiny and consists of little more than constructor and allocator functions. This design has some advantages but has one small disadvantage. The primary advantages are primarily that code bloat is reduced and that the implementation is simple and the user can easily extend it. The primary disadvantage is that the parent list class ends up with a pointer to itself and thus has 4 bytes that could arguably be saved if system was designed differently. That different design would be to make the list class have a policy template parameter which specifies that it is a fixed pool container. EASTL chose not to follow the policy design because it would complicate the implementation, make it harder for the user to extend the container, and would potentially waste more memory due to code bloat than it would save due to the 4 byte savings it achieves in container instances.

Algorithm Design

EASTL algorithms very much follow the philosophy of standard C++ algorithms, as this philosophy is sound and efficient. One of the primary aspects of algorithms is that they work on iterators and not containers. You will note for example that the find algorithm takes a first and last iterator as arguments and not a container. This has two primary benefits: it allows the user to specify a subrange of the container to search within and it allows the user to apply the find algorithm to sequences that aren't containers (e.g. a C array).

EASTL algorithms are optimized at least as well as the best STL algorithms found in commercial libraries and are significantly optimized over the algorithms that come with the first-party STLs that come with compilers. Most significantly, EASTL algorithms take advantage of type traits of contained classes and take advantage of iterator types to optimize code generation. For example, if you resize an array of integers (or other "pod" type), EASTL will detect that this can be done with a memcpy instead of a slow object-by-object move as would Micrsoft STL.

The optimizations found in EASTL algorithms and the supporting code in EASTL type traits consistts of some fairly tricky advanced C++ and while it is fairly easy to read, it requires a C++ expert (language lawyer, really) to implement confidently. The result of this is that it takes more effort to develop and maintain EASTL than it would to maintain a simpler library. However, the performance advantages have been deemed worth the tradeoff.

Smart Pointer Design

EASTL implements the following smart pointer types:

  • shared_ptr
  • shared_array
  • weak_ptr
  • instrusive_ptr
  • scoped_ptr
  • scoped_array
  • linked_ptr
  • linked_array
All but linked_ptr/linked_array are well-known smart pointers from the Boost library. The behaviour of these smart pointers is very similar to those from Boost with two exceptions:
  • EASTL smart pointers allow you to assign an allocator to them.
  • EASTL shared_ptr implements deletion via a templated parameter instead of a dynamically allocated virtual member object interface.

With respect to assigning an allocator, this gives EASTL more control over memory allocation and tracking, as Boost smart pointers unilaterally use global operator new to allocate memory from the global heap.

With respect to shared_ptr deletion, EASTL's current design of using a templated parameter is questionable, but does have some reason. The advantage is that EASTL avoids a heap allocation, avoids virtual function calls, and avoids templated class proliferation. The disadvantage is that EASTL shared_ptr containers which hold void pointers can't call the destructors of their contained objects unless the user manually specifies a custom deleter template parameter. This is case whereby EASTL is more efficient but less safe. We can revisit this topic in the future if it becomes an issue.

list::size is O(n)

As of this writing, EASTL has three linked list classes: list, slist, and intrusive_list. In each of these classes, the size of the list is not cached in a member size variable. The result of this is that getting the size of a list is not a fast operation, as it requires traversing the list and counting the nodes. We could make the list::size function be fast by having a member mSize variable which tracks the size as we insert and delete items. There are reasons for having such functionality and reasons for not having such functionality. We currently choose to not have a member mSize variable as it would add four bytes to the class, add a tiny amount of processing to functions such as insert and erase, and would only serve to improve the size function, but no others. In the case of intrusive_list, it would do additional harm. The alternative argument is that the C++ standard states that std::list should be an O(1) operation (i.e. have a member size variable), that many C++ standard library list implementations do so, that the size is but an integer which is quick to update, and that many users expect to have a fast size function. In the final analysis, we are developing a library for game development and performance is paramount, so we choose to not cache the list size. The user can always implement a size cache himself.

basic_string doesn't use copy-on-write

The primary benefit of CoW is that it allows for the sharing of string data between two string objects. Thus if you say this:

string a("hello");
string b(a);

the "hello" will be shared between a and b. If you then say this:

a = "world";

then a will release its reference to "hello" and leave b with the only reference to it. Normally this functionality is accomplished via reference counting and with atomic operations or mutexes.

The C++ standard does not say anything about basic_string and CoW. However, for a basic_string implementation to be standards-conforming, a number of issues arise which dictate some things about how one would have to implement a CoW string. The discussion of these issues will not be rehashed here, as you can read the references below for better detail than can be provided in the space we have here. However, we can say that the C++ standard is sensible and that anything we try to do here to allow for an efficient CoW implementation would result in a generally unacceptable string interface.

The disadvantages of CoW strings are:

  • A reference count needs to exist with the string, which increases string memory usage.
  • With thread safety, atomic operations and mutex locks are expensive, especially on weaker memory systems such as console gaming platforms.
  • All non-const string accessor functions need to do a sharing check the the first such check needs to detach the string. Similarly, all string assignments need to do a sharing check as well. If you access the string before doing an assignment, the assignment doesn't result in a shared string, because the string has already been detached.
  • String sharing doesn't happen the large majority of the time. In some cases, the total sum of the reference count memory can exceed any memory savings gained by the strings that share representations. 

The addition of a cow_string class is under consideration for EASTL. There are conceivably some systems which have string usage patterns which would benefit from CoW sharing. Such functionality is best saved for a separate string implementation so that the other string uses aren't penalized.

This is a good starting HTML reference on the topic:

http://www.gotw.ca/publications/optimizations.htm

Here is a well-known Usenet discussion on the topic:

http://groups-beta.google.com/group/comp.lang.c++.moderated/browse_thread/thread/3dc6af5198d0bf7/886c8642cb06e03d


End of document


























================================================ FILE: doc/html/EASTL FAQ.html ================================================ EASTL FAQ

EASTL FAQ

We provide a FAQ (frequently asked questions) list here for a number of commonly asked questions about EASTL and STL in general. Feel free to suggest new FAQ additions based on your own experience.

Information

1 What is EASTL?
2 What uses are EASTL suitable for?
3
How does EASTL differ from standard C++ STL?
4 Is EASTL thread-safe?
5 What platforms/compilers does EASTL support?
6 Why is there EASTL when there is the STL?
7 Can I mix EASTL with standard C++ STL?
8 Where can I learn more about STL and EASTL?
9 What is the legal status of EASTL?
10 Does EASTL deal with compiler exception handling settings?
11 What C++ language features does EASTL use (e.g. virtual functions)?
12 What compiler warning levels does EASTL support?
13 Is EASTL compatible with Lint?
14 What compiler settings do I need to compile EASTL?
15 How hard is it to incorporate EASTL into my project?
16 Should I use EASTL instead of std STL or instead of my custom library?
17 I think I've found a bug. What do I do?
18 Can EASTL be used by third party EA developers?

Performance

1 How efficient is EASTL compared to standard C++ STL implementations?
2 How efficient is EASTL in general?
3 Strings don't appear to use the "copy-on-write" optimization. Why not?
4 Does EASTL cause code bloat, given that it uses templates?
5 Don't STL and EASTL containers fragment memory?
6 I don't see container optimizations for equivalent scalar types such as pointer types. Why?
7 I've seen some STL's provide a default quick "node allocator" as the default allocator. Why doesn't EASTL do this?
8 Templates sometimes seem to take a long time to compile. Why do I do about that?
9 How do I assign a custom allocator to an EASTL container?
10 How well does EASTL inline?
11 How do I control function inlining?
12 C++ / EASTL seems to bloat my .obj files much more than C does.
13 What are the best compiler settings for EASTL?

Problems

1 I'm getting screwy behavior in sorting algorithms or sorted containers. What's wrong?
2 I am getting compiler warnings (e.g. C4244, C4242 or C4267) that make no sense. Why?
3 I am getting compiler warning C4530, which complains about exception handling and "unwind semantics." What gives?
4 Why are tree-based containers hard to read with a debugger?
5 The EASTL source code is sometimes rather complicated looking. Why is that?
6 When I get compilation errors, they are very long and complicated looking. What do I do?
7 Templates sometimes seem to take a long time to compile. Why do I do about that?
8 I get the compiler error: "template instantiation depth exceeds maximum of 17. use -ftemplate-depth-NN to increase the maximum"
9 I'm getting errors about min and max while compiling.
10 C++ / EASTL seems to bloat my .obj files much more than C does.
11 I'm getting compiler errors regarding operator new being previously defined.
12 I'm getting errors related to wchar_t string  functions such as wcslen.
13 I'm getting compiler warning C4619: there is no warning number Cxxxx (e.g. C4217).
14 My stack-based fixed_vector is not respecting the object alignment requirements.
15 I am getting compiler errors when using GCC under XCode (Macintosh/iphone).
16 I am getting linker errors about Vsnprintf8 or Vsnprintf16.
17 I am getting compiler errors about UINT64_C or UINT32_C.
18 I am getting a crash with a global EASTL container.
19 Why doesn't EASTL support passing NULL to functions with pointer arguments?

Debug

1 How do I get VC++ mouse-overs to view templated data?
2 How do I view containers if the visualizer/tooltip support is not present?
3 The EASTL source code is sometimes rather complicated looking. Why is that?
4 When I get compilation errors, they are very long and complicated looking. What do I do?
5 How do I measure hash table balancing?

Containers

1 Why do some containers have "fixed" versions (e.g. fixed_list) but others(e.g. deque) don't have fixed versions?
2 Can I mix EASTL with standard C++ STL?
3 Why are there so many containers?
4 Don't STL and EASTL containers fragment memory?
5 I don't see container optimizations for equivalent scalar types such as pointer types. Why?
6 What about alternative container and algorithm implementations (e.g. treaps, skip lists, avl trees)?
7 Why are containers hard to read with a debugger?
8 How do I assign a custom allocator to an EASTL container?
9 How do I set the VC++ debugger to display EASTL container data with tooltips?
10 How do I use a memory pool with a container?
11 How do I write a comparison (operator<()) for a struct that contains two or more members?
12 Why doesn't container X have member function Y?
13 How do I search a hash_map of strings via a char pointer efficiently? If I use map.find("hello") it creates a temporary string, which is inefficient.
14 Why are set and hash_set iterators const (i.e. const_iterator)?
15 How do I prevent my hash container from re-hashing?
16 Which uses less memory, a map or a hash_map?
17 How do I write a custom hash function?
18 How do I write a custom compare function for a map or set?
19 How do I force my vector or string capacity down to the size of the container?
20 How do I iterate a container while (selectively) removing items from it?
21 How do I store a pointer in a container?
22 How do I make a union of two containers? difference? intersection?
23 How do I override the default global allocator?
24 How do I do trick X with the string class?
25 How do EASTL smart pointers compare to Boost smart pointers?
26 How do your forward-declare an EASTL container?
27 How do I make two containers share a memory pool?
28 Can I use a std (STL) allocator with EASTL?
29 What are the requirements of classes stored in containers?

Algorithms

1 I'm getting screwy behavior in sorting algorithms or sorted containers. What's wrong?
2 How do I write a comparison (operator<()) for a struct that contains two or more members?
3 How do I sort something in reverse order?
4 I'm getting errors about min and max while compiling.
5 Why don't algorithms take a container as an argument instead of iterators? A container would be more convenient.
6 Given a container of pointers, how do I find an element by value (instead of by pointer)?
7 When do stored objects need to support opertor < vs. when do they need to support operator ==?
8 How do I sort via pointers or array indexes instead of objects directly?

Iterators

1 What's the difference between iterator, const iterator, and const_iterator?
2 How do I tell from an iterator what type of thing it is iterating?
3 How do I iterate a container while (selectively) removing items from it?
4 What is an insert_iterator?


Information

Info.1 What is EASTL?

EASTL refers to "EA Standard Template Library." It is a C++ template library that is analogous to the template facilities of the C++ standard library, which are often referred to as the STL. EASTL consists of the following systems:

  • Containers
  • Iterators
  • Algorithms
  • Utilities
  • Smart pointers
  • Type traits

EASTL provides extensions and optimizations over the equivalents in standard C++ STL.

EASTL is a professional-level implementation which outperforms commercial implementations (where functionality overlaps) and is significantly easier to read and debug.

Info.2 What uses are EASTL suitable for?

EASTL is suitable for any place where templated containers and algorithms would be appropriate. Thus any C++ tools could use it and many C++ game runtimes could use it, especially 2005+ generation game platforms. EASTL has optimizations that make it more suited to the CPUs and memory systems found on console platforms. Additionally, EASTL has some type-traits and iterator-traits-derived template optimizations that make it generally more efficient than home-brew templated containers.

Info.3 How does EASTL differ from standard C++ STL?

There are three kinds of ways that EASTL differs from standard STL:

  1. EASTL equivalents to STL sometimes differ.
  2. EASTL implementations sometimes differ from STL implementations of the same thing.
  3. EASTL has functionality that doesn't exist in STL.

With respect to item #1, the changes are such that they benefit game development and not the type that could silently hurt you if you were more familiar with STL interfaces.

With respect to item #2, where EASTL implementations differ from STL implementations it is almost always due to improvements being made in the EASTL versions or tradeoffs being made which are considered better for game development.

With respect to item #3, there are a number of facilities that EASTL has that STL doesn't have, such as intrusive_list and slist containers, smart pointers, and type traits. All of these are facilities that assist in making more efficient game code and data.

Ways in which EASTL is better than standard STL:

  • Has higher performance in release builds, sometimes dramatically so.
  • Has significantly higher performance in debug builds, due to less call overhead.
  • Has extended per-container functionality, particularly for game development.
  • Has additional containers that are useful for high performance game development.
  • Is easier to read, trace, and debug.
  • Memory allocation is much simpler and more controllable.
  • Has higher portability, as there is a single implementation for all platforms.
  • Has support of object alignment, whereas such functionality is not natively supported by STL.
  • We have control over it, so we can modify it as we like.
  • Has stricter standards for container design and behavior, particularly as this benefits game development.

Ways in which EASTL is worse than standard STL:

  • Standard STL implementations are currently very reliable and weather-worn, whereas EASTL is less tested.
  • Standard STL is automatically available with just about every C++ compiler vendor's library.
  • Standard STL is supported by the compiler vendor and somewhat by the Internet community.

EASTL coverage of std STL

  • list
  • vector
  • deque
  • string
  • set
  • multiset
  • map
  • multimap
  • bitset
  • queue
  • stack
  • priority_queue
  • memory
  • numeric
  • algorithm (all but inplace_merge, prev_permutation, next_permutation, nth_element, includes, unique_copy)
  • utility
  • functional
  • iterator
  • string_view
  • variant
  • any
  • optional

EASTL additions/amendments to std STL

  • allocators work in a simpler way.
  • exception handling can be disabled.
  • all containers expose/declare their node size, so you can make a node allocator for them.
  • all containers have reset_lose_memory(), which unilaterally forgets their contents.
  • all containers have validate() and validate_iterator() functions.
  • all containers understand and respect object alignment requirements.
  • all containers guarantee no memory allocation upon being newly created as empty.
  • all containers and their iterators can be viewed in a debugger (no other STL does this, believe it or not).
  • linear containers guarantee linear memory.
  • vector has push_back(void).
  • vector has a data() function.
  • vector<bool> is actually a vector of type bool.
  • vector and string have set_capacity().
  • string has sprintf(), append_sprintf(), trim(), compare_i(), make_lower(), make_upper().
  • deque allows you to specify the subarray size.
  • list has a push_back(void) and push_back(void) function.
  • hash_map, hash_set, etc. have find_as().

EASTL coverage of TR1 (tr1 refers to proposed additions for the next C++ standard library, ~2008)

  • array
  • type_traits (there are about 30 of these)
  • unordered_set (EASTL calls it hash_set)
  • unordered_multiset
  • unordered_map
  • unordered_multimap
  • shared_ptr, shared_array, weak_ptr, scoped_ptr, scoped_array, intrusive_ptr

EASTL additional functionality (not found elsewhere)

  • fixed_list
  • fixed_slist
  • fixed_vector
  • fixed_string
  • fixed_substring
  • fixed_set
  • fixed_multiset
  • fixed_map
  • fixed_multimap
  • fixed_hash_set
  • fixed_hash_multiset
  • fixed_hash_map
  • fixed_hash_multimap
  • fixed_function
  • vector_set
  • vector_multiset
  • vector_map
  • vector_multimap
  • intrusive_list
  • intrusive_slist
  • intrusive_sdlist
  • intrusive_hash_set
  • intrusive_hash_multiset
  • intrusive_hash_map
  • intrusive_hash_multimap
  • slist (STLPort's STL has this)
  • heap
  • linked_ptr, linked_array
  • sparse_matrix (this is not complete as of this writing)
  • ring_buffer
  • compressed_pair
  • call_traits
  • binary_search_i, change_heap, find_first_not_of, find_last_of, find_last_not_of, identical
  • comb_sort, bubble_sort, selection_sort, shaker_sort, bucket_sort
  • equal_to_2, not_equal_to_2, str_equal_to, str_equal_to_i

Info.4 Is EASTL thread-safe?

It's not simple enough to simply say that EASTL is thread-safe or thread-unsafe. However, we can say that with respect to thread safety that EASTL does the right thing.

Individual EASTL containers are not thread-safe. That is, access to an instance of a container from multiple threads at the same time is unsafe if any of those accesses are modifying operations. A given container can be read from multiple threads simultaneously as well as any other standalone data structure. If a user wants to be able to have modifying access an instance of a container from multiple threads, it is up to the user to ensure that proper thread synchronization occurs. This usually means using a mutex.

EASTL classes other than containers are the same as containers with respect to thread safety. EASTL functions (e.g. algorithms) are inherently thread-safe as they have no instance data and operate entirely on the stack. As of this writing, no EASTL function allocates memory and thus doesn't bring thread safety issues via that means.

The user may well need to be concerned about thread safety with respect to memory allocation. If the user modifies containers from multiple threads, then allocators are going to be accessed from multiple threads. If an allocator is shared across multiple container instances (of the same type of container or not), then mutexes (as discussed above) the user uses to protect access to individual instances will not suffice to provide thread safety for allocators used across multiple instances. The conventional solution here is to use a mutex within the allocator if it is expected to be used by multiple threads.

EASTL uses neither static nor global variables and thus there are no inter-instance dependencies that would make thread safety difficult for the user to implement.

Info.5 What platforms/compilers does EASTL support?

EASTL's support depends entirely on the compiler and not on the platform. EASTL works on any C++ compiler that completely conforms the C++ language standard. Additionally, EASTL is 32 bit and 64 bit compatible. Since EASTL does not use the C or C++ standard library (with a couple small exceptions), it doesn't matter what kind of libraries are provided (or not provided) by the compiler vendor. However, given that we need to work with some compilers that aren't 100% conforming to the language standard, it will be useful to make a list here of these that are supported and those that are not:

Compiler Status Notes
GCC 3.x+ Not Supported Not officially supported due to migration to Clang.
MSVC 12.0+ Supported This compiler is used by the Windows based platforms
Clang 4.0+ Supported This compiler is used by the Linux based platforms

Info.6 Why is there EASTL when there is the STL?

The STL is largely a fine library for general purpose C++. However, we can improve upon it for our uses and gain other advantages as well. The primary motivations for the existence of EASTL are the following:

  • Some STL implementations (especially Microsoft STL) have inferior performance characteristics that make them unsuitable for game development. EASTL is faster than all existing STL implementations.
  • The STL is sometimes hard to debug, as most STL implementations use cryptic variable names and unusual data structures.
  • STL allocators are sometimes painful to work with, as they have many requirements and cannot be modified once bound to a container.
  • The STL includes excess functionality that can lead to larger code than desirable. It's not very easy to tell programmers they shouldn't use that functionality.
  • The STL is implemented with very deep function calls. This results is unacceptable performance in non-optimized builds and sometimes in optimized builds as well.
  • The STL doesn't support alignment of contained objects.
  • STL containers won't let you insert an entry into a container without supplying an entry to copy from. This can be inefficient.
  • Useful STL extensions (e.g. slist, hash_map, shared_ptr) found in existing STL implementations such as STLPort are not portable because they don't exist in other versions of STL or aren't consistent between STL versions.
  • The STL lacks useful extensions that game programmers find useful (e.g. intrusive_list) but which could be best optimized in a portable STL environment.
  • The STL puts an emphasis on correctness before performance, whereas sometimes you can get significant performance gains by making things less academically pure.
  • STL containers have private implementations that don't allow you to work with their data in a portable way, yet sometimes this is an important thing to be able to do (e.g. node pools).
  • All existing versions of STL allocate memory in empty versions of at least some of their containers. This is not ideal and prevents optimizations such as container memory resets that can greatly increase performance in some situations.
  • The STL is slow to compile, as most modern STL implementations are very large.
  • There are legal issues that make it hard for us to freely use portable STL implementations such as STLPort.
  • We have no say in the design and implementation of the STL and so are unable to change it to work for our needs.

Note that there isn't actually anything in the C++ standard called "STL." STL is a term that merely refers to the templated portion of the C++ standard library.

Info.7 Can I mix EASTL with standard C++ STL?

This is possible to some degree, though the extent depends on the implementation of C++ STL. One of things that makes interoperability is something called iterator categories. Containers and algorithms recognize iterator types via their category and STL iterator categories are not recognized by EASTL and vice versa.

Things that you definitely can do:

  • #include both EASTL and standard STL headers from the same .cpp file.
  • Use EASTL containers to hold STL containers.
  • Construct an STL reverse_iterator from an EASTL iterator.
  • Construct an EASTL reverse_iterator from an STL iterator.

Things that you probably will be able to do, though a given std STL implementation may prevent it:

  • Use STL containers in EASTL algorithms.
  • Use EASTL containers in STL algorithms.
  • Construct or assign to an STL container via iterators into an EASTL container.
  • Construct or assign to an EASTL container via iterators into an STL container.

Things that you would be able to do if the given std STL implementation is bug-free:

  • Use STL containers to hold EASTL containers. Unfortunately, VC7.x STL has a confirmed bug that prevents this. Similarly, STLPort versions prior to v5 have a similar but.

Things that you definitely can't do:

  • Use an STL allocator directly with an EASTL container (though you can use one indirectly).
  • Use an EASTL allocator directly with an STL container (though you can use one indirectly).

Info.8 Where can I learn more about STL and EASTL?

EASTL is close enough in philosophy and functionality to standard C++ STL that most of what you read about STL applies to EASTL. This is particularly useful with respect to container specifications. It would take a lot of work to document EASTL containers and algorithms in fine detail, whereas most standard STL documentation applies as-is to EASTL. We won't cover the differences here, as that's found in another FAQ entry.

That being said, we provide a list of sources for STL documentation that may be useful to you, especially if you are less familiar with the concepts of STL and template programming in general.

  • The SGI STL web site. Includes a good STL reference.
  • CodeProject STL introduction.
  • Scott Meyers Effective STL book.
  • The Microsoft online STL documentation. Microsoft links go bad every couple months, so try searching for STL at the Microsoft MSDN site.
  • The Dinkumware online STL documentation. 
  • The C++ standard, which is fairly readable. You can buy an electronic version for about $18 and in the meantime you can make do with draft revisions of it off the Internet by searching for "c++ draft standard".
  • STL performance tips, by Pete Isensee
  • STL algorithms vs. hand-written loops, by Scott Meyers.
  • cppreference.com
  • isocpp.org

Info.9 What is the legal status of EASTL?

EASTL is usable for all uses within Electronic Arts, both for internal usage and for shipping products for all platforms. Any externally derived code would be explicitly stated as such and approved by the legal department if such code ever gets introduced. As of EASTL v1.0, the red_black_tree.cpp file contains two functions derived from the original HP STL and have received EA legal approval for usage in any product.

Info.10 Does EASTL deal with compiler exception handling settings?

EASTL has automatic knowledge of the compiler's enabling/disabling of exceptions. If your compiler is set to disable exceptions, EASTL automatically detects so and executes without them. Also, you can force-enable or force-disable that setting to override the automatic behavior by #defining EASTL_EXCEPTIONS_ENABLED to 0 or 1. See EASTL's config.h for more information.

Info.11 What C++ language features does EASTL use (e.g. virtual functions)?

EASTL uses the following C++ language features:

  • Template functions, classes, member functions.
  • Multiple inheritance.
  • Namespaces.
  • Operator overloading.

EASTL does not use the following C++ language features:

  • Virtual functions / interfaces.
  • RTTI (dynamic_cast).
  • Global and static variables. There are a couple class static const variables, but they act much like enums.
  • Volatile declarations
  • Template export.
  • Virtual inheritance.

EASTL may use the following C++ language features:

  • Try/catch. This is an option that the user can enable and it defaults to whatever the compiler is set to use.
  • Floating point math. Hash containers have one floating point calculation, but otherwise floating point is not used.

Notes:

  • EASTL uses rather little of the standard C or C++ library and uses none of the C++ template library (STL) and iostream library. The memcpy family of functions is one example EASTL C++ library usage.
  • EASTL never uses global new / delete / malloc / free. All allocations are done via user-specified allocators, though a default allocator definition is available.

Info.12 What compiler warning levels does EASTL support?

For VC++ EASTL should compile without warnings on level 4, and should compile without warnings for "warnings disabled by default" except C4242, C4514, C4710, C4786, and C4820. These latter warnings are somewhat draconian and most EA projects have little choice but to leave them disabled.

For GCC, EASTL should compile without warnings with -Wall. Extensive testing beyond that hasn't been done.

However, due to the nature of templated code generation and due to the way compilers compile templates, unforeseen warnings may occur in user code that may or may not be addressable by modifying EASTL.

Info.13 Is EASTL compatible with Lint?

As of EASTL 1.0, minimal lint testing has occurred. Testing with the November 2005 release of Lint (8.00t) demonstrated bugs in Lint that made its analysis not very useful. For example, Lint seems to get confused about the C++ typename keyword and spews many errors with code that uses it. We will work with the makers of Lint to get this resolved so that Lint can provide useful information about EASTL.

Info.14 What compiler settings do I need to compile EASTL?

EASTL consists mostly of header files with templated C++ code, but there are also a few .cpp files that need to be compiled and linked in order to use some of the modules. EASTL will compile in just about any environment. As mentioned elsewhere in this FAQ, EASTL can be compiled at the highest warning level of most compilers, transparently deals with compiler exception handling settings, is savvy to most or all compilation language options (e.g. wchar_t is built-in or not, for loop variables are local or not), and has almost no platform-specific or compiler-specific code. For the most part, you can just drop it in and it will work. The primary thing that needs to be in place is that EASTL .cpp files need to be compiled with the same struct padding/alignment settings as other code in the project. This of course is the same for just about any C++ source code library.

See the Performance section of this FAQ for a discussion of the optimal compiler settings for EASTL performance.

Info.15 How hard is it to incorporate EASTL into my project?

It's probably trivial.

EASTL has only one dependency: EABase. And EASTL auto-configures itself for most compiler environments and for the most typical configuration choices. Since it is fairly highly warning-free, you won't likely need to modify your compiler warning settings, even if they're pretty strict. EASTL has a few .cpp files which need to be compiled if you want to use the modules associated with those files. You can just compile those files with your regular compiler settings. Alternatively, you can use one of the EASTL project files.

In its default configuration, the only thing you need to provide to make EASTL work is to define implementations of the following operator new functions:

#include <new>
void* operator new[](size_t size, const char* pName, int flags, unsigned debugFlags, const char* file, int line); void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line);
The flags and debugFlags arguments correspond to PPMalloc/RenderWare GeneralAllocator/GeneralAllocatorDebug Malloc equivalents.

Info.16 Should I use EASTL instead of std STL or instead of my custom library?

There are reasons you may want to use EASTL; there are reasons you may not want to use it. Ditto for std STL or any other library. Here we present a list of reasons (+ and -) for why you might want to use one or another. However, it should be noted that while EASTL contains functionality found in std STL, it has another ~40% of functionality not found in std STL, so EASTL and std STL (and whatever other template library you may have) are not mutually exclusive.

EASTL

+ Has higher performance than any commercial STL, especially on console platforms.
+ Has extended functionality tailored for game development.
+ Is highly configurable, and we own it so it can be amended at will. Std STL is owned by a third party committee.
+ Is much easier to read and debug than other similar libraries, especially std STL.

- Is highly unit tested, but does not have the same level as std STL.
- Is more complicated than many users' lite template libraries, and may put off some beginners.
- EASTL  

Std STL

+ Is highly portable; your STL code will likely compile and run anywhere.
+ Works without the need to install or download any package to use it. It just works.
+ Is highly reliable and supported by the compiler vendor. You can have confidence in it.
+ Some std STL versions (e.g. STLPort, VC8 STL) have better runtime debug checking than EASTL.

- Has (sometimes greatly) variable implementations, behavior, and performance between implementations.
- Is usually hard to read and debug.
- Doesn't support some of the needs of game development, such as aligned allocations, named allocations, intrusive containers, etc.
- Is not as efficient as EASTL, especially on console platforms.

Your own library

(please forgive us for implying there may be weaknesses in your libraries)

+ You have control over it and can make it work however you want.
+ You can fix bugs in it on the spot and have the fix in your codebase immediately.
+ Your own library can be highly integrated into your application code or development environment.

- Many custom libraries don't have the same level of testing as libraries such as std STL or EASTL.
- Many custom libraries don't have the same breadth or depth as std STL or especially EASTL.
- Many custom libraries don't have the level of performance tuning that std STL or especially EASTL has.

Info.17 I think I've found a bug. What do I do?

Verify that you indeed have a bug
There are various levels of bugs that can occur, which include the following:

  1. Compiler warnings generated by EASTL.
  2. Compiler errors generated by EASTL (failure to compile well-formed code).
  3. Runtime misbehavior by EASTL (function does the wrong thing).
  4. Runtime crash or data corruption by EASTL.
  5. Mismatch between EASTL documentation and behavior.
  6. Mismatch between EASTL behavior and user's expectations (mis-design).

Any of the above items can be the fault of EASTL. However, the first four can also be the fault of the user. Your primary goal in verifying a potential bug is to determine if it is an EASTL bug or a user bug. Template errors can sometimes be hard to diagnose. It's probably best if you first show the problem to somebody you know to make sure you are not missing something obvious. Creating a reproducible case may be useful in helping convince yourself, but as is mentioned below, this is not required in order to report the bug.

Report the bug
The first place to try is the standard EA centralized tech support site. As of this writing (10/2005), that tech site is http://eatech/. Due to the frequent technology churn that seems to occur within Electronic Arts, the bug reporting system in place when you read this may not be the one that was in place when this FAQ entry was written. If the tech site route fails, consider directly contacting the maintainer of the EASTL package.

In reporting a bug, it is nice if there is a simple reproducible case that can be presented. However, such a case requires time to create, and so you are welcome to initially simply state what you think the bug is without producing a simple reproducible case. It may be that this is a known bug or it may be possible to diagnose the bug without a reproducible case. If more information is needed then the step of trying to produce a reproducible case may be necessary.

Info.18 Can EASTL be used by third party EA developers?

EASTL and other core technologies authored by EA (and not licensed from other companies) can be used in source and binary form by designated 3rd parties. The primary case where there is an issue is if the library contains platform specific code for a platform that the 3rd party is not licensed for. In that case the platform-specific code would need to be removed. This doesn’t apply to EASTL, nor many of the other core tech packages.

Performance

Perf.1 How efficient is EASTL compared to standard C++ STL implementations?

With respect to the functionality that is equivalent between EASTL and standard STL, the short answer to this is that EASTL is as at least as efficient as other STL implementations and in a number of aspects is more so. EASTL has functionality such as intrusive_list and linked_ptr that don't exist in standard STL but are explicitly present to provide significant optimizations over standard STL.

The medium length answer is that EASTL is significantly more efficient than Dinkumware STL, and Microsoft Windows STL. EASTL is generally more efficient than Metrowerks STL, but Metrowerks has a few tricks up its sleeve which EASTL doesn't currently implement. EASTL is roughly equal in efficiency to STLPort and GCC 3.x+ STL, though EASTL has some optimizations that these do not.

The long answer requires a breakdown of the functionality between various versions of the STL.

Perf.2 How efficient is EASTL in general?

This question is related to the question, "How efficient are templates?" If you understand the effects of templates then you can more or less see the answer for EASTL. Templates are more efficient than the alternative when they are used appropriately, but can be less efficient than the alternative when used under circumstances that don't call for them. The strength of templates is that the compiler sees all the code and data types at compile time and can often reduce statements to smaller and faster code than with conventional non-templated code. The weakness of templates is that the sometimes produce more code and can result in what is often called "code bloat". However, it's important to note that unused template functions result in no generated nor linked code, so if you have a templated class with 100 functions but you only use one, only that one function will be compiled.

EASTL is a rather efficient implementation of a template library and pulls many tricks of the trade in terms of squeezing optimal performance out of the compiler. The only way to beat it is to write custom code for the data types you are working with, and even then people are sometimes surprised to find that their hand-implemented algorithm works no better or even worse than the EASTL equivalent. But certainly there are ways to beat templates, especially if you resort to assembly language programming and some kinds of other non-generic tricks.

Perf.3 Strings don't appear to use the "copy-on-write" (CoW) optimization. Why not?

Short answer
CoW provides a benefit for a small percentage of uses but provides a disadvantage for the large majority of uses.

Long answer
The primary benefit of CoW is that it allows for the sharing of string data between two string objects. Thus if you say this:

string a("hello");
string b(a);
the "hello" will be shared between a and b. If you then say this:
a = "world";
then a will release its reference to "hello" and leave b with the only reference to it. Normally this functionality is accomplished via reference counting and with atomic operations or mutexes.

The C++ standard does not say anything about basic_string and CoW. However, for a basic_string implementation to be standards-conforming, a number of issues arise which dictate some things about how one would have to implement a CoW string. The discussion of these issues will not be rehashed here, as you can read the references below for better detail than can be provided in the space we have here. However, we can say that the C++ standard is sensible and that anything we try to do here to allow for an efficient CoW implementation would result in a generally unacceptable string interface.

The disadvantages of CoW strings are:

  • A reference count needs to exist with the string, which increases string memory usage.
  • With thread safety, atomic operations and mutex locks are expensive, especially on weaker memory systems such as console gaming platforms.
  • All non-const string accessor functions need to do a sharing check and the first such check needs to detach the string. Similarly, all string assignments need to do a sharing check as well. If you access the string before doing an assignment, the assignment doesn't result in a shared string, because the string has already been detached.
  • String sharing doesn't happen the large majority of the time. In some cases, the total sum of the reference count memory can exceed any memory savings gained by the strings that share representations.

The addition of a cow_string class is under consideration for EASTL. There are conceivably some systems which have string usage patterns which would benefit from CoW sharing. Such functionality is best saved for a separate string implementation so that the other string uses aren't penalized.

References

This is a good starting HTML reference on the topic:
    http://www.gotw.ca/publications/optimizations.htm

Here is a well-known Usenet discussion on the topic:
    http://groups-beta.google.com/group/comp.lang.c++.moderated/browse_thread/thread/3dc6af5198d0bf7/886c8642cb06e03d

Perf.4 Does EASTL cause code bloat, given that it uses templates?

The reason that templated functions and classes might cause an increase in code size because each template instantiation theoretically creates a unique piece of code. For example, when you compile this code:

template <typename T>
const T min(const T a, const T b)
    { return b < a ? b : a; }

int    i = min<int>(3, 4);
double d = min<double>(3.0, 4.0);

the compiler treats it as if you wrote this:

int min(const int a, const int b)
    { return b < a ? b : a; }
double min(const double a, const double b)     { return b < a ? b : a; }

Imagine this same effect happening with containers such as list and map and you can see how it is that templates can cause code proliferation.

A couple things offset the possibility of code proliferation: inlining and folding. In practice the above 'min' function would be converted to inlined functions by the compiler which occupy only a few CPU instructions. In many of the simplest cases the inlined version actually occupies less code than the code required to push parameters on the stack and execute a function call. And they will execute much faster as well.

Code folding (a.k.a. "COMDAT folding", "duplicate stripping", "ICF" / "identical code folding") is a compiler optimization whereby the compiler realizes that two independent functions have compiled to the same code and thus can be reduced to a single function. The Microsoft VC++ compiler (Since VS2005), and GCC (v 4.5+) can do these kinds of optimizations on all platforms. This can result, for example, in all templated containers of pointers (e.g. vector<char*>, vector<Widget*>, etc.) to be linked as a single implementation. This folding occurs at a function level and so individual member functions can be folded while other member functions are not. A side effect of this optimization is that you aren't likely to gain much much declaring containers of void* instead of the pointer type actually contained.

The above two features reduce the extent of code proliferation, but certainly don't eliminate it. What you need to think about is how much code might be generated vs. what your alternatives are. Containers like vector can often inline completely away, whereas more complicated containers such as map can only partially be inlined. In the case of map, if you need such a container for your Widgets, what alternatives do you have that would be more efficient than instantiating a map? This is up to you to answer.

It's important to note that C++ compilers will throw away any templated functions that aren't used, including unused member functions of templated classes. However, some argue that by having many functions available to the user that users will choose to use that larger function set rather than stick with a more restricted set.

Also, don't be confused by syntax bloat vs. code bloat. In looking at templated libraries such as EASTL you will notice that there is sometimes a lot of text in the definition of a template implementation. But the actual underlying code is what you need to be concerned about.

There is a good Usenet discussion on this topic at: http://groups.google.com/group/comp.lang.c++.moderated/browse_frm/thread/2b00649a935997f5

Perf.5 Don't STL and EASTL containers fragment memory?

They only fragment memory if you use them in a way that does so. This is no different from any other type of container used in a dynamic way. There are various solutions to this problem, and EASTL provides additional help as well:

  • For vectors, use the reserve function (or the equivalent constructor) to set aside a block of memory for the container. The container will not reallocate memory unless you try grow beyond the capacity you reserve.
  • EASTL has "fixed" variations of containers which allow you to specify a fixed block of memory which the container uses for its memory. The container will not allocate any memory with these types of containers and all memory will be cache-friendly due to its locality.
  • You can assign custom allocators to containers instead of using the default global allocator. You would typically use an allocator that has its own private pool of memory.
  • Where possible, add all a container's elements to it at once up front instead of adding them over time. This avoids memory fragmentation and increase cache coherency.

Perf.6 I don't see container optimizations for equivalent scalar types such as pointer types. Why?

Metrowerks (and no other, as of this writing) STL has some container specializations for type T* which maps them to type void*. The idea is that a user who declares a list of Widget* and a list of Gadget* will generate only one container: a list of void*. As a result, code generation will be smaller. Often this is done only in optimized builds, as such containers are harder to view in debug builds due to type information being lost.

The addition of this optimization is under consideration for EASTL, though it might be noted that optimizing compilers such as VC++ are already capable of recognizing duplicate generated code and folding it automatically as part of link-time code generation (LTCG) (a.k.a. "whole program optimization"). This has been verified with VC++, as the following code and resulting disassembly demonstrate:

eastl::list<int*>        intPtrList;
eastl::list<TestObject*> toPtrList;

eastl_size_t n1 = intPtrList.size();
eastl_size_t n2 = toPtrList.size();

0042D288  lea         edx,[esp+14h]
0042D28C  call        eastl::list<TestObject>::size (414180h)
0042D291  push        eax 
0042D292  lea         edx,[esp+24h]
0042D296  call        eastl::list<TestObject>::size (414180h)

Note that in the above case the compiler folded the two implementations of size() into a single implementation.

Perf.7 I've seen some STL's provide a default quick "node allocator" as the default allocator. Why doesn't EASTL do this?

Short answer
This is a bad, misguided idea.

Long answer
These node allocators implement a heap for all of STL with buckets for various sizes of allocations and implemented fixed-size pools for each of these buckets. These pools are attractive at first because they do well in STL comparison benchmarks, especially when thread safety is disabled. Such benchmarks make it impossible to truly compare STL implementations because you have two different allocators in use and in some cases allocator performance can dominate the benchmark. However, the real problem with these node allocators is that they badly fragment and waste memory. The technical discussion of this topic is outside the scope of this FAQ, but you can learn more about it by researching memory management on the Internet. Unfortunately, the people who implement STL libraries are generally not experts on the topic of memory management. A better approach, especially for game development, is for the user to decide when fixed-size pools are appropriate and use them via custom allocator assignment to containers.

Perf.8 Templates sometimes seem to take a long time to compile. Why do I do about that?

C++ compilers are generally slower than C compilers, and C++ templates are generally slower to compile than regular C++ code. EASTL has some extra functionality (such as type_traits and algorithm specializations) that is not found in most other template libraries and significantly improves performance and usefulness but adds to the amount of code that needs to be compiled. Ironically, we have a case where more source code generates faster and smaller object code.

The best solution to the problem is to use pre-compiled headers, which are available on all modern ~2002+) compilers, such as VC6.0+, GCC 3.2+, and Metrowerks 7.0+. In terms of platforms this means all 2002+ platforms.

Some users have been speeding up build times by creating project files that put all the source code in one large .cpp file. This has an effect similar to pre-compiled headers. It can go even faster than pre-compiled headers but has downsides in the way of convenience and portability.

Perf.10 How well does EASTL inline?

EASTL is written in such as way as to be easier to inline than typical templated libraries such as STL. How is this so? It is so because EASTL reduces the inlining depth of many functions, particularly the simple ones. In doing so it makes the implementation less "academic" but entirely correct. An example of this is the vector operator[] function, which is implemented like so with Microsoft STL:

reference operator[](size_type n) {
   return *(begin() + n);
}
EASTL implements the function directly, like so:
reference operator[](size_type n) {
    return *(mpBegin + n);
}
Both implementations are correct, but the EASTL implementation will run faster in debug builds, be easier to debug, and will be more likely to be inlined when the usage of this function is within a hierarchy of other functions being inlined. It is not so simple to say that the Microsoft version will always inline in an optimized build, as it could be part of a chain and cause the max depth to be exceeded.

That being said, EASTL appears to inline fairly well under most circumstances, including with GCC, which is the poorest of the compilers in its ability to inline well.

Perf.11 How do I control function inlining?

Inlining is an important topic for templated code, as such code often relies on the compiler being able to do good function inlining for maximum performance. GCC, VC++, and Metrowerks are discussed here. We discuss compilation-level inlining and function-level inlining here, though the latter is likely to be of more use to the user of EASTL, as it can externally control how EASTL is inlined. A related topic is GCC's template expansion depth, discussed elsewhere in this FAQ. We provide descriptions of inlining options here but don't currently have any advice on how to best use these with EASTL.

Compilation-Level Inlining -- VC++

VC++ has some basic functionality to control inlining, and the compiler is pretty good at doing aggressive inlining when optimizing on for all platforms.

#pragma inline_depth( [0... 255] )

Controls the number of times inline expansion can occur by controlling the number of times that a series of function calls can be expanded (from 0 to 255 times). This pragma controls the inlining of functions marked inline and or inlined automatically under the /Ob2 option. The inline_depth pragma controls the number of times a series of function calls can be expanded. For example, if the inline depth is 4, and if A calls B and B then calls C, all three calls will be expanded inline. However, if the closest inline expansion is 2, only A and B are expanded, and C remains as a function call.

#pragma inline_recursion( [{on | off}] )

Controls the inline expansion of direct or mutually recursive function calls. Use this pragma to control functions marked as inline and or functions that the compiler automatically expands under the /Ob2 option. Use of this pragma requires an /Ob compiler option setting of either 1 or 2. The default state for inline_recursion is off. The inline_recursion pragma controls how recursive functions are expanded. If inline_recursion is off, and if an inline function calls itself (either directly or indirectly), the function is expanded only once. If inline_recursion is on, the function is expanded multiple times until it reaches the value set by inline_depth, the default value of 8, or a capacity limit.

Compilation-Level Inlining -- GCC

GCC has a large set of options to control function inlining. Some options are available only  in GCC 3.0 and later and thus not present on older platforms.

-fno-default-inline Do not make member functions inline by default merely because they are defined inside the class scope (C++ only). Otherwise, when you specify -O, member functions defined inside class scope are compiled inline by default; i.e., you don't need to add `inline' in front of the member function name.
-fno-inline Don't pay attention to the inline keyword. Normally this option is used to keep the compiler from expanding any functions inline. Note that if you are not optimizing, no functions can be expanded inline.
-finline-functions Integrate all simple functions into their callers. The compiler heuristically decides which functions are simple enough to be worth integrating in this way. If all calls to a given function are integrated, and the function is declared static, then the function is normally not output as assembler code in its own right. Enabled at level -O3.
-finline-limit=n By default, GCC limits the size of functions that can be inlined. This flag allows the control of this limit for functions that are explicitly marked as inline (i.e., marked with the inline keyword or defined within the class definition in c++). n is the size of functions that can be inlined in number of pseudo instructions (not counting parameter handling). pseudo-instructions are an internal representation of function size. The default value of n is 600. Increasing this value can result in more inlined code at the cost of compilation time and memory consumption. Decreasing usually makes the compilation faster and less code will be inlined (which presumably means slower programs). This option is particularly useful for programs that use inlining heavily such as those based on recursive templates with C++.

Inlining is actually controlled by a number of parameters, which may be specified individually by using --param name=value. The -finline-limit=n option sets some of these parameters as follows:

max-inline-insns-single
    is set to n/2.
max-inline-insns-auto
    is set to n/2.
min-inline-insns
    is set to 130 or n/4, whichever is smaller.
max-inline-insns-rtl
    is set to n.

See --param below for a documentation of the individual parameters controlling inlining.
-fkeep-inline-functions Emit all inline functions into the object file, even if they are inlined where used.
--param name=value In some places, GCC uses various constants to control the amount of optimization that is done. For example, GCC will not inline functions that contain more that a certain number of instructions. You can control some of these constants on the command-line using the --param option. 

max-inline-insns-single
Several parameters control the tree inliner used in gcc. This number sets the maximum number of instructions (counted in GCC's internal representation) in a single function that the tree inliner will consider for inlining. This only affects functions declared inline and methods implemented in a class declaration (C++). The default value is 450.

max-inline-insns-auto
When you use -finline-functions (included in -O3), a lot of functions that would otherwise not be considered for inlining by the compiler will be investigated. To those functions, a different (more restrictive) limit compared to functions declared inline can be applied. The default value is 90.

large-function-insns
The limit specifying really large functions. For functions larger than this limit after inlining inlining is constrained by --param large-function-growth. This parameter is useful primarily to avoid extreme compilation time caused by non-linear algorithms used by the backend. This parameter is ignored when -funit-at-a-time is not used. The default value is 2700.

large-function-growth
Specifies maximal growth of large function caused by inlining in percents. This parameter is ignored when -funit-at-a-time is not used. The default value is 100 which limits large function growth to 2.0 times the original size.

inline-unit-growth
Specifies maximal overall growth of the compilation unit caused by inlining. This parameter is ignored when -funit-at-a-time is not used. The default value is 50 which limits unit growth to 1.5 times the original size.

max-inline-insns-recursive
max-inline-insns-recursive-auto
Specifies maximum number of instructions out-of-line copy of self recursive inline function can grow into by performing recursive inlining. For functions declared inline --param max-inline-insns-recursive is taken into acount. For function not declared inline, recursive inlining happens only when -finline-functions (included in -O3) is enabled and --param max-inline-insns-recursive-auto is used. The default value is 450.

max-inline-recursive-depth
max-inline-recursive-depth-auto
Specifies maximum recursion depth used by the recursive inlining. For functions declared inline --param max-inline-recursive-depth is taken into acount. For function not declared inline, recursive inlining happens only when -finline-functions (included in -O3) is enabled and --param max-inline-recursive-depth-auto is used. The default value is 450.

inline-call-cost
Specify cost of call instruction relative to simple arithmetics operations (having cost of 1). Increasing this cost disqualify inlining of non-leaf functions and at same time increase size of leaf function that is believed to reduce function size by being inlined. In effect it increase amount of inlining for code having large abstraction penalty (many functions that just pass the arguments to other functions) and decrease inlining for code with low abstraction penalty. Default value is 16.
-finline-limit=n By default, GCC limits the size of functions that can be inlined. This flag allows the control of this limit for functions that are explicitly marked as inline (i.e., marked with the inline keyword or defined within the class definition in c++). n is the size of functions that can be inlined in number of pseudo instructions (not counting parameter handling). The default value of n is 600. Increasing this value can result in more inlined code at the cost of compilation time and memory consumption. Decreasing usually makes the compilation faster and less code will be inlined (which presumably means slower programs). This option is particularly useful for programs that use inlining heavily such as those based on recursive templates with C++.

Inlining is actually controlled by a number of parameters, which may be specified individually by using --param name=value. The -finline-limit=n option sets some of these parameters as follows:

max-inline-insns-single
is set to n/2.
max-inline-insns-auto
is set to n/2.
min-inline-insns
is set to 130 or n/4, whichever is smaller.
max-inline-insns-rtl
is set to n.

See below for a documentation of the individual parameters controlling inlining.

Note: pseudo instruction represents, in this particular context, an abstract measurement of function's size. In no way, it represents a count of assembly instructions and as such its exact meaning might change from one release to an another.

GCC additionally has the -Winline compiler warning, which emits a warning whenever a function declared as inline was not inlined.

Compilation-Level Inlining -- Metrowerks

Metrowerks has a number of pragmas (and corresponding compiler settings) to control inlining. These include always_inline, inline_depth, inline_max_size, and inline max_total_size.

#pragma always_inline on | off | reset

Controls the use of inlined functions. If you enable this pragma, the compiler ignores all inlining limits and attempts to inline all functions where it is legal to do so. This pragma is deprecated. Use the inline_depth pragma instead.

#pragma inline_depth(n)
#pragma inline_depth(smart)

Controls how many passes are used to expand inline function. Sets the number of passes used to expand inline function calls. The number n is an integer from 0 to 1024 or the smart specifier. It also represents the distance allowed in the call chain from the last function up. For example, if d is the total depth of a call chain, then functions below (d-n) are inlined if they do not exceed the inline_max_size and inline_max_total_size settings which are discussed directly below.

#pragma inline_max_size(n);
#pragma inline_max_total_size(n);

The first pragma sets the maximum function size to be considered for inlining; the second sets the maximum size to which a function is allowed to grow after the functions it calls are inlined. Here, n is the number of statements, operands, and operators in the function, which
turns out to be roughly twice the number of instructions generated by the function. However, this number can vary from function to function. For the inline_max_size pragma, the default value of n is 256; for the inline_max_total_size pragma, the default value of n is 10000. The smart specifier is the default mode, with four passes where the passes 2-4 are limited to small inline functions. All inlineable functions are expanded if inline_depth is set to 1-1024.

Function-Level Inlining -- VC++

To force inline usage under VC++, you use this:

    __forceinline void foo(){ ... }

It should be noted that __forceinline has no effect if the compiler is set to disable inlining. It merely tells the compiler that when inlining is enabled that it shouldn't use its judgment to decide if the function should be inlined but instead to always inline it.

To disable inline usage under VC++, you need to use this:

    #pragma inline_depth(0) // Disable inlining.
    void foo() { ... }
    #pragma inline_depth()  // Restore default.

The above is essentially specifying compiler-level inlining control within the code for a specific function.

Function-Level Inlining -- GCC / Metrowerks

To force inline usage under GCC 3.1+, you use this:

    inline void foo() __attribute__((always_inline)) { ... }
       
or
    inline __attribute__((always_inline)) void foo() { ... }

To disable inline usage under GCC 3+, you use this:

    void foo() __attribute__((noinline)) { ... }
        or
    inline __attribute__((noinline)) void foo() { ... }

EABase has some wrappers for this, such as EA_FORCE_INLINE.

Perf.12 C++ / EASTL seems to bloat my .obj files much more than C does.

There is no need to worry. The way most C++ compilers compile templates, they compile all seen template code into the current .obj module, which results in larger .obj files and duplicated template code in multiple .obj files. However, the linker will (and in fact must) select only a single version of any given function for the application, and these linked functions will usually be located contiguously.

Additionally, the debug information for template definitions is usually larger than that for non-templated C++ definitions, which itself is sometimes larger than C definitions due to name decoration.

Perf.13 What are the best compiler settings for EASTL?

We will discuss various aspects of this topic here. As of this writing, more EASTL research on this topic has been done on Microsoft compiler platforms (e.g. Win32) than GCC platforms. Thus currently this discussion focuses on VC++ optimization. Some of the concepts are applicable to GCC, though. EASTL has been successfully compiled and tested (the EASTL unit test) on our major development platforms with the highest optimization settings enabled, including GCC's infamous -O3 level.

Optimization Topics

  • Function inlining.
  • Optimization for speed vs. optimization for size.
  • Link-time code generation (LTCG).
  • Profile-guided optimization (PGO).

Function inlining
EASTL is a template library and inlining is important for optimal speed. Compilers have various options for enabling inlining and those options are discussed in this FAQ in detail. Most users will want to enable some form of inlining when compiling EASTL and other templated libraries. For users that are most concerned about the compiler's inlining increasing code size may want to try the 'inline only functions marked as inline' compiler option. Here is a table of normalized results from the benchmark project (Win32 platform):

Inlining Disabled Inline only 'inline' Inline any
Application size 100K 86K 86K
Execution time 100 75 75


The above execution times are highly simplified versions of the actual benchmark data but convey a sense of the general average behaviour that can be expected. In practice, simple functions such as vector::operator[] will execute much faster with inlining enabled but complex functions such as map::insert may execute no faster within inlining enabled.

Optimization for Speed / Size
Optimization for speed results in the compiler inlining more code than it would otherwise. This results in the inlined code executing faster than if it was not inlined. As mentioned above, basic function inlining can result in smaller code as well as faster code, but after a certain point highly inlined code becomes greater in size than less inlined code and the performance advantages of inlining start to lessen. The EASTL Benchmark project is a medium sized application that is about 80% templated and thus acts as a decent measure of the practical tradeoff between speed and size. Here is a table of normalized results from the benchmark project (Windows platform):

Size Speed Speed + LTCG Speed + LTCG + PGO
Application size 80K 100K 98K 98K
Execution time 100 90 83 75


What the above table is saying is that if you are willing to have your EASTL code be 20% larger, it will be 10% faster. Note that it doesn't mean that your app will be 20% larger, only the templated code in it like EASTL will be 20% larger.

Link-time code generation (LTCG)
LTCG is a mechanism whereby the compiler compiles the application as if it was all in one big .cpp file instead of separate .cpp files that don't see each other. Enabling LTCG optimizations is done by simply setting some compiler and linker settings and results in slower link times. The benchmark results are presented above and for the EASTL Benchmark project show some worthwhile improvement.

Profile-guided optimization (PGO)
PGO is a mechanism whereby the compiler uses profiling information from one or more runs to optimize the compilation and linking of an application. Enabling PGO optimizations is done by setting some linker settings and doing some test runs of the application, then linking the app with the test run results. Doing PGO optimizations is a somewhat time-consuming task but the benchmark results above demonstrate that for the EASTL Benchmark project that PGO is worth the effort.

Problems

Prob.1 I'm getting screwy behavior in sorting algorithms or sorted containers. What's wrong?

It may possible that you are seeing floating point roundoff problems. Many STL algorithms require object comparisons to act consistently. However, floating point values sometimes compare differently between uses because in one situation a value might be in 32 bit form in system memory, whereas in anther situation that value might be in an FPU register with a different precision. These are difficult problems to track down and aren't the fault of EASTL or whatever similar library you might be using. There are various solutions to the problem, but the important thing is to find a way to force the comparisons to be consistent.

The code below was an example of this happening, whereby the object pA->mPos was stored in system memory while pB->mPos was stored in a register and comparisons were inconsistent and a crash ensued.

class SortByDistance : public binary_function<WorldTreeObject*, WorldTreeObject*, bool>
{
private:
    Vector3 mOrigin;

public:
    SortByDistance(Vector3 origin) {
        mOrigin = origin;
    }

    bool operator()(WorldTreeObject* pA, WorldTreeObject* pB) const {
         return ((WorldObject*)pA)->mPos - mOrigin).GetLength()
              < ((WorldObject*)pB)->mPos - mOrigin).GetLength();
    }
};

Another thing to watch out for is the following mistake:

struct ValuePair
{
    uint32_t a;
    uint32_t b;
};

// Improve speed by casting the struct to uint64_t
bool operator<(const ValuePair& vp1, const ValuePair& vp2)
    { return *(uint64_t*)&vp1 < *(uint64_t*)&vp2; }

The problem is that the ValuePair struct has 32 bit alignment but the comparison assumes 64 bit alignment. The code above has been observed to crash on the PowerPC 64-based machines. The resolution is to declare ValuePair as having 64 bit alignment.

Prob.2 I am getting compiler warnings (e.g. C4244, C4242 or C4267) that make no sense. Why?

One cause of this occurs with VC++ when you have code compiled with the /Wp64 (detect 64 bit portability issues) option. This causes pointer types to have a hidden flag called __w64 attached to them by the compiler. So 'ptrdiff_t' is actually known by the compiler as '__w64 int', while 'int' is known by the compilers as simply 'int'. A problem occurs here when you use templates. For example, let's say we have this templated function
template <typename T>
T min(const T a, const T b) {
    return b < a ? b : a;
}
If you compile this code:
ptrdiff_t a = min(ptrdiff_t(0), ptrdiff_t(1));
int       b = min((int)0, (int)1);
You will get the following warning for the second line, which is somewhat nonsensical:
warning C4244: 'initializing' : conversion from 'const ptrdiff_t' to 'int', possible loss of data

This could probably be considered a VC++ bug, but in the meantime you have little choice but to ignore the warning or disable it.

Prob.3 I am getting compiler warning C4530, which complains about exception handling and "unwind semantics." What gives?

VC++ has a compiler option (/EHsc) that allows you to enable/disable exception handling stack unwinding but still enable try/catch. This is useful because it can save a lot in the way of code generation for your application. Disabling stack unwinding will decrease the size of your executable on at least the Win32 platform by 10-12%.

If you have stack unwinding disabled, but you have try/catch statements, VC++ will generate the following warning:

warning C4530: C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc

As of EASTL v1.0, this warning has been disabled within EASTL for EASTL code. However, non-EASTL code such as std STL code may still cause this warning to be triggered. In this case there is not much you can do about this other than to disable the warning.

Prob.4 Why are tree-based EASTL containers hard to read with a debugger?

Short answer
Maximum performance and design mandates.

Long answer
You may notice that when you have a tree-based container (e.g. set, map)  in the debugger that it isn't automatically able to recognize the tree nodes as containing instances of your contained object. You can get the debugger to do what you want with casting statements in the debug watch window, but this is not an ideal solution. The reason this is happening is that node-based containers always use an anonymous node type as the base class for container nodes. This is primarily done for performance, as it allows the node manipulation code to exist as a single non-templated library of functions and it saves memory because containers will have one or two base nodes as container 'anchors' and you don't want to allocate a node of the size of the user data when you can just use a base node. See list.h for an example of this and some additional in-code documentation on this.

Additionally, EASTL has the design mandate that an empty container constructs no user objects. This is both for performance reasons and because it doing so would skew the user's tracking of object counts and might possibly break some expectation the user has about object lifetimes.

Currently this debug issue exists only with tree-based containers. Other node-based containers such as list and slist use a trick to get around this problem in debug builds.

See Debug.2 for more.

Prob.5 The EASTL source code is sometimes rather complicated looking. Why is that?

Short answer
Maximum performance.

Long answer
EASTL uses templates, type_traits, iterator categories, redundancy reduction, and branch reduction in order to achieve optimal performance. A side effect of this is that there are sometimes a lot of template parameters and multiple levels of function calls due to template specialization. The ironic thing about this is that this makes the code (an optimized build, at least) go faster, not slower. In an optimized build the compiler will see through the calls and template parameters and generate a direct optimized inline version.

As an example of this, take a look at the implementation of the copy implementation in algorithm.h. If you are copying an array of scalar values or other trivially copyable values, the compiler will see how the code directs this to the memcpy function and will generate nothing but a memcpy in the final code. For non-memcpyable data types the compiler will automatically understand that in do the right thing.

EASTL's primary objective is maximal performance, and it has been deemed worthwhile to make the code a little less obvious in order to achieve this goal. Every case where EASTL does something in an indirect way is by design and usually this is for the purpose of achieving the highest possible performance.

Prob.6 When I get compilation errors, they are very long and complicated looking. What do I do?

Assuming the bugs are all worked out of EASTL, these errors really do indicate that you have something wrong. EASTL is intentionally very strict about types, as it tries to minimize the chance of users errors. Unfortunately, there is no simple resolution to the problem of long compiler errors other than to deal with them. On the other hand, once you've dealt with them a few times, you tend to realize that most of time they are the same kinds of errors and

Top five approaches to dealing with long compilation errors:

  1. Look at the line where the compilation error occurred and ignore the text of the error and just look at obvious things that might be wrong.
  2. Consider the most common typical causes of templated compilation errors and consider if any of these might be your problem. Usually one of them are.
  3. Either read through the error (it's not as hard as it may look on the surface) or copy the error to a text file and remove the extraneous
  4. Compile the code under GCC instead of MSVC, as GCC warnings and errors tend to be more helpful than MSVC's. Possibly also consider compiling an isolated version under Comeau C++'s free online compiler at www.comeaucomputing.com or the Dinkumware online compiler at http://dinkumware.com/exam/. 
  5. Try using an STL filter (http://www.bdsoft.com/tools/stlfilt.html) which automatically boils down template errors to simpler forms. We haven't tried this yet with EASTL. Also there is the more generic TextFilt (http://textfilt.sourceforge.net/).

Top five causes of EASTL compilation errors:

  1. const-correctness. Perhaps a quarter of container template errors are due to the user not specifying const correctly.
  2. Missing hash function. hash_map, hash_set, etc. require that you either specify a hash function or one exists for your class. See functional.h for examples of declarations of hash functions for common data types.
  3. Missing operators. Various containers and algorithms require that certain operators exist for your contained classes. For example, list requires that you can test contained objects for equivalence (i.e. operator==), while map requires that you can test contained objects for "less-ness" (operator <). If you define a Widget class and don't have a way to compare two Widgets, you will get errors when trying to put them into a map.
  4. Specifying the wrong data type. For example, it is a common mistake to forget that when you insert into a map, you need to insert a pair of objects and not just your key or value type.
  5. Incorrect template parameters. When declaring a template instantiation (e.g. map<int, int, less<int> >) you simply need to get the template parameters correct. Also note that when you have ">>" next to each other that you need to separate them by one space (e.g. "> >").

Prob.7 Templates sometimes seem to take a long time to compile. Why do I do about that?

C++ compilers are generally slower than C compilers, and C++ templates are generally slower to compile than regular C++ code. EASTL has some extra functionality (such as type_traits and algorithm specializations) that is not found in most other template libraries and significantly improves performance and usefulness but adds to the amount of code that needs to be compiled. Ironically, we have a case where more source code generates faster and smaller object code.

The best solution to the problem is to use pre-compiled headers, which are available on all modern ~2002+) compilers, such as VC6.0+, GCC 3.2+, and Metrowerks 7.0+. In terms of platforms this means all 2002+ platforms.

Some users have been speeding up build times by creating project files that put all the source code in one large .cpp file. This has an effect similar to pre-compiled headers. It can go even faster than pre-compiled headers but has downsides in the way of convenience and portability.

Prob.8 I get the compiler error: "template instantiation depth exceeds maximum of 17. use -ftemplate-depth-NN to increase the maximum". 

This is a GCC error that occurs when a templated function calls a templated function which calls a templated function, etc. past a depth of 17. You can use the GCC command line argument -ftemplate-depth-40 (or some other high number) to get around this. As note below, the syntax starting with GCC 4.5 has changed slightly.

The primary reason you would encounter this with EASTL is type traits that are used by algorithms. The type traits library is a (necessarily) highly templated set of types and functions which adds at most about nine levels of inlining. The copy and copy_backward algorithms have optimized pathways that add about four levels of inlining. If you have just a few more layers on top of that in container or user code then the default limit of 17 can be exceeded. We are investigating ways to reduce the template depth in the type traits library, but only so much can be done, as most compilers don't support type traits natively. Metrowerks is the current exception.

From the GCC documentation:

-ftemplate-depth-n

Set the maximum instantiation depth for template classes to n. 
A limit on the template instantiation depth is needed to detect 
endless recursions during template class instantiation ANSI/ISO 
C++ conforming programs must not rely on a maximum depth greater than 17.

Note that starting with GCC 4.5 the syntax is -ftemplate-depth=N instead of -ftemplate-depth-n.

Prob.9 I'm getting errors about min and max while compiling.

You need to define NOMINMAX under VC++ when this occurs, as it otherwise defines min and max macros that interfere. There may be equivalent issues with other compilers. Also, VC++ has a specific <minmax.h> header file which defines min and max macros but which doesn't pay attention to NOMINMAX and so in that case there is nothing to do but not include that file or to undefine min and max. minmax.h is not a standard file and its min and max macros are not standard C or C++ macros or functions.

Prob.10 C++ / EASTL seems to bloat my .obj files much more than C does.

There is no need to worry. The way most C++ compilers compile templates, they compile all seen template code into the current .obj module, which results in larger .obj files and duplicated template code in multiple .obj files. However, the linker will (and must) select only a single version of any given function for the application, and these linked functions will usually be located contiguously.

Prob.11 I'm getting compiler errors regarding placement operator new being previously defined.

This can happen if you are attempting to define your own versions of placement new/delete. The C++ language standard does not allow the user to override these functions. Section 18.4.3 of the standard states:

     Placement forms
     1. These functions are reserved, a C++ program may not define functions that displace the versions in the Standard C++ library.

You may find that #defining __PLACEMENT_NEW_INLINE seems to fix your problems under VC++, but it can fail under some circumstances and is not portable and fails with other compilers, which don't have an equivalent workaround.

Prob.12 I'm getting errors related to wchar_t string  functions such as wcslen().

EASTL requires EABase-related items that the following be so. If not, then EASTL gets confused about what types it can pass to wchar_t related functions.

  • The #define EA_WCHAR_SIZE is equal to sizeof(wchar_t).
  • If sizeof(wchar_t) == 2, then char16_t is typedef'd to wchar_t.
  • If sizeof(wchar_t) == 4, then char32_t is typedef'd to wchar_t.

EABase v2.08 and later automatically does this for most current generation and all next generation platforms. With GCC 2.x, the user may need to predefine EA_WCHAR_SIZE to the appropriate value, due to limitations with the GCC compiler. Note that GCC defaults to sizeof(wchar_t) ==4, but it can be changed to 2 with the -fshort_wchar compiler command line argument. If you are using EASTL without EABase, you will need to make sure the above items are correctly defined.

Prob.13 I'm getting compiler warning C4619: there is no warning number Cxxxx (e.g. C4217).

Compiler warning C4619 is a VC++ warning which is saying that the user is attempting to enable or disable a warning which the compiler doesn't recognize. This warning only occurs if the user has the compiler set to enable warnings that are normally disabled, regardless of the warning level. The problem, however, is that there is no easy way for user code to tell what compiler warnings any given compiler version will recognize. That's why Microsoft normally disables this warning.

The only practical solution we have for this is for the user to disable warning 4619 globally or an a case-by-case basis. EA build systems such as nant/framework 2's eaconfig will usually disable 4619. In general, global enabling of 'warnings that are disabled by default' often result in quandrys such as this.

Prob.14 My stack-based fixed_vector is not respecting the object alignment requirements.

EASTL fixed_* containers rely on the compiler-supplied alignment directives, such as that implemented by EA_PREFIX_ALIGN. This is normally a good thing because it allows the memory to be local with the container. However, as documented by Microsoft at http://msdn2.microsoft.com/en-us/library/83ythb65(VS.71).aspx, this doesn't work for stack variables. The two primary means of working around this are:

  • Use something like AlignedObject<> from the EAStdC package's EAAllocator.h file.
  • Use eastl::vector with a custom allocator and have it provide aligned memory. EASTL automatically recognizes that the objects are aligned and will call the aligned version of your allocator allocate() function. You can get this aligned memory from the stack, if you need it, somewhat like how AlignedObject<> works.

Prob.15 I am getting compiler errors when using GCC under XCode (Macintosh/iphone).

The XCode environment has a compiler option which causes it to evaluate include directories recursively. So if you specify /a/b/c as an include directory, it will consider all directories underneath c to also be include directories. This option is enabled by default, though many XCode users disable it, as it is a somewhat dangerous option. The result of enabling this option with EASTL is that <EASTL/string.h> is used by the compiler when you say #include <string.h>. The solution is to disable this compiler option. It's probably a good idea to disable this option anyway, as it typically causes problems for users yet provides minimal benefits.

Prob.16 I am getting linker errors about Vsnprintf8 or Vsnprintf16.

EASTL requires the user to provide a function called Vsnprintf8 if the string::sprintf function is used. vsnprintf is not a standard C function, but most C standard libraries provide some form of it, though in some ways their implementations differ, especially in what the return value means. Also, most implementations of vsnprintf are slow, mostly due to mutexes related to locale functionality. And you can't really use vendor vsnprintf on an SPU due to the heavy standard library size. EASTL is stuck because it doesn't want to depend on something with these problems. EAStdC provides a single consistent fast lightweight, yet standards-conforming, implementation in the form of Vsnprintf(char8_t*, ...), but EASTL can't have a dependency on EAStdC. So the user must provide an implementation, even if all it does is call EAStdC's Vsnprintf or the vendor vsnprintf for that matter.

Example of providing Vsnprintf8 via EAStdC:

#include <EAStdC/EASprintf.h>
   
int Vsnprintf8(char8_t* pDestination, size_t n, const char8_t* pFormat, va_list arguments)
{
    return EA::StdC::Vsnprintf(pDestination, n, pFormat, arguments);
}

int Vsnprintf16(char16_t* pDestination, size_t n, const char16_t* pFormat, va_list arguments)
{
    return EA::StdC::Vsnprintf(pDestination, n, pFormat, arguments);
}

Example of providing Vsnprintf8 via C libraries:

#include <stdio.h>
   
int Vsnprintf8(char8_t* p, size_t n, const char8_t* pFormat, va_list arguments)
{
    #ifdef _MSC_VER
        return vsnprintf_s(p, n, _TRUNCATE, pFormat, arguments);
    #else
        return vsnprintf(p, n, pFormat, arguments);
    #endif
}

int Vsnprintf16(char16_t* p, size_t n, const char16_t* pFormat, va_list arguments)
{
    #ifdef _MSC_VER
        return vsnwprintf_s(p, n, _TRUNCATE, pFormat, arguments);
    #else
        return vsnwprintf(p, n, pFormat, arguments); // Won't work on Unix because its libraries implement wchar_t as int32_t.
    #endif
}

Prob.17 I am getting compiler errors about UINT64_C or UINT32_C.

This is usually an order-of-include problem that comes about due to the implementation of __STDC_CONSTANT_MACROS in C++ Standard libraries. The C++ <stdint.h> header file defineds UINT64_C only if __STDC_CONSTANT_MACROS has been defined by the user or the build system; the compiler doesn't automatically define it. The failure you are seeing occurs because user code is #including a system header before #including EABase and without defining __STDC_CONSTANT_MACROS itself or globally. EABase defines __STDC_CONSTANT_MACROS and #includes the appropriate system header. But if the system header was already previously #included and __STDC_CONSTANT_MACROS was not defined, then UINT64_C doesn't get defined by anybody.

The real solution that the C++ compiler and standard library wants is for the app to globally define __STDC_CONSTANT_MACROS itself in the build.

Prob.18 I am getting a crash with a global EASTL container.

This usually due to compiler's lack of support for global (and static) C++ class instances. The crash is happening because the global variable exists but its constructor was not called on application startup and it's member data is zeroed bytes. To handle this you need to manually initialize such variables. There are two primary ways:

Failing code:

eastl::list<int> gIntList; // Global variable.
   
void DoSomething()
{
    gIntList.push_back(1); // Crash. gIntList was never constructed.
}

Declaring a pointer solution:

eastl::list<int>* gIntList = NULL;
   
void DoSomething()
{
    if(!gIntList) // Or move this to an init function.
        gIntList = new eastl::list<int>;

    gIntList->push_back(1); // Success
}

Manual constructor call solution:

eastl::list<int> gIntList;
   
void InitSystem()
{
    new(&gIntList) eastl::list<int>;
}

void DoSomething()
{
    gIntList.push_back(1); // Success
}

Prob.19 Why doesn't EASTL support passing NULL string functions?

The primary argument is to make functions safer for use. Why crash on NULL pointer access when you can make the code safe? That's a good argument. The counter argument, which EASTL currently makes, is:

  • It breaks consistency with the C++ STL library and C libraries, which require strings to be valid.
  • It makes the coder slower and bigger for all users, though few need NULL checks.
  • The specification for how to handle NULL is simple for some cases but not simple for others. Operator < below a case where the proper handling of it in a consistent way is not simple, as all comparison code (<, >, ==, !=, >=, <=) in EASTL must universally and consistently handle the case where either or both sides are NULL. A NULL string seems similar to an empty string, but doesn't always work out so simply.
  • What about other invalid string pointers? NULL is merely one invalid value of many, with its only distinction being that sometimes it's intentionally NULL (as opposed to being NULL due to not being initialized).
  • How and where to implement the NULL checks in such a way as to do it efficiently is not always simple, given that public functions call public functions.
  • It's arguable (and in fact the intent of the C++ standard library) that using pointers that are NULL is a user/app mistake. If we really want to be safe then we should be using string objects for everything. You may not entirely buy this argument in practice, but on the other hand one might ask why is the caller of EASTL using a NULL pointer in the first place? The answer of course is that somebody gave it to him.

Debug

Debug.1 How do I set the VC++ debugger to display EASTL container data with tooltips?

See Cont.9

Debug.2 How do I view containers if the visualizer/tooltip support is not present?

Here is a table of answers about how to manually inspect containers in the debugger.

 Container Approach
slist
fixed_slist
slist is a singly-linked list. Look at the slist mNode variable. You can walk the list by looking at mNode.mpNext, etc.
list
fixed_list
list is a doubly-linked list. Look at the list mNode variable. You can walk the list forward by looking at mNode.mpNext, etc. and backward by looking at mpPrev, etc.
intrusive_list
intrusive_slist
Look at the list mAnchor node. This lets you walk forward and backward in the list via mpNext and mpPrev.
array View the array mValue member in the debugger. It's simply a C style array.
vector
fixed_vector
View the vector mpBegin value in the debugger. If the string is long, use ", N" to limit the view length, as with someVector.mpBegin, 32
vector_set
vector_multiset
vector_map
vector_multimap
These are containers that are implemented as a sorted vector, deque, or array. They are searched via a standard binary search. You can view them the same way you view a vector or deque.
deque
deque is implemented as an array of arrays, where the arrays implement successive equally-sized segments of the deque. The mItBegin deque member points the deque begin() position.
bitvector Look at the bitvector mContainer variable. If it's a vector, then see vector above.
bitset Look at the bitset mWord variable. The bitset is nothing but one or more uint32_t mWord items.
set
multiset
fixed_set
fixed_multiset
The set containers are implemented as a tree of elements. The set mAnchor.mpNodeParent points to the top of the tree; the mAnchor.mpNodeLeft points to the far left node of the tree (set begin()); the mAnchor.mpNodeRight points to the right of the tree (set end()).
map
multimap
fixed_map
fixed_multimap
The map containers are implemented as a tree of pairs, where pair.first is the map key and pair.second is the map value. The map mAnchor.mpNodeParent points to the top of the tree; the mAnchor.mpNodeLeft points to the far left node of the tree (map begin()); the mAnchor.mpNodeRight points to the right of the tree (map end()).
hash_map
hash_multimap
fixed_hash_map
fixed_hash_multimap
hash tables in EASTL are implemented as an array of singly-linked lists. The array is the mpBucketArray member. Each element in the list is a pair, where the first element of the pair is the map key and the second is the map value.
intrusive_hash_map
intrusive_hash_multimap
intrusive_hash_set
intrusive_hash_multiset
intrusive hash tables in EASTL are implemented very similarly to regular hash tables. See the hash_map and hash_set entries for more info.
hash_set
hash_multiset
fixed_hash_set
fixed_hash_map
hash tables in EASTL are implemented as an array of singly-linked lists. The array is the mpBucketArray member.
basic_string
fixed_string
fixed_substring
View the string mpBegin value in the debugger. If the string is long, use ", N" to limit the view length, as with someString.mpBegin, 32
heap
A heap is an array of data (e.g. EASTL vector) which is organized in a tree whereby the highest priority item is array[0], The next two highest priority items are array[1] and [2]. Underneath [1] in priority are items [3] and [4], and underneath item [2] in priority are items [5] and [6]. etc.
stack
View the stack member c value in the debugger. That member will typically be a list or deque.
queue
View the queue member c value in the debugger. That member will typically be a list or deque.
priority_queue
View the priority_queue member c value in the debugger. That member will typically be a vector or deque which is organized as a heap. See the heap section above for how to view a heap.
smart_ptr View the mpValue member.

Debug.3 The EASTL source code is sometimes rather complicated looking. Why is that?

Short answer
Maximum performance.

Long answer
EASTL uses templates, type_traits, iterator categories, redundancy reduction, and branch reduction in order to achieve optimal performance. A side effect of this is that there are sometimes a lot of template parameters and multiple levels of function calls due to template specialization. The ironic thing about this is that this makes the code (an optimized build, at least) go faster, not slower. In an optimized build the compiler will see through the calls and template parameters and generate a direct optimized inline version.

As an example of this, take a look at the implementation of the copy implementation in algorithm.h. If you are copying an array of scalar values or other trivially copyable values, the compiler will see how the code directs this to the memcpy function and will generate nothing but a memcpy in the final code. For non-memcpyable data types the compiler will automatically understand that in do the right thing.

EASTL's primary objective is maximal performance, and it has been deemed worthwhile to make the code a little less obvious in order to achieve this goal. Every case where EASTL does something in an indirect way is by design and usually this is for the purpose of achieving the highest possible performance.

Debug.4 When I get compilation errors, they are very long and complicated looking. What do I do?

Assuming the bugs are all worked out of EASTL, these errors really do indicate that you have something wrong. EASTL is intentionally very strict about types, as it tries to minimize the chance of users errors. Unfortunately, there is no simple resolution to the problem of long compiler errors other than to deal with them. On the other hand, once you've dealt with them a few times, you tend to realize that most of time they are the same kinds of errors and

Top five approaches to dealing with long compilation errors:

  1. Look at the line where the compilation error occurred and ignore the text of the error and just look at obvious things that might be wrong.
  2. Consider the most common typical causes of templated compilation errors and consider if any of these might be your problem. Usually one of them are.
  3. Either read through the error (it's not as hard as it may look on the surface) or copy the error to a text file and remove the extraneous
  4. Compile the code under GCC instead of MSVC, as GCC warnings and errors tend to be more helpful than MSVC's. Possibly also consider compiling an isolated version under Comeau C++'s free online compiler at www.comeaucomputing.com or the Dinkumware online compiler at http://dinkumware.com/exam/. 
  5. Try using an STL filter (http://www.bdsoft.com/tools/stlfilt.html) which automatically boils down template errors to simpler forms. We haven't tried this yet with EASTL. Also there is the more generic TextFilt (http://textfilt.sourceforge.net/).

Top five causes of EASTL compilation errors:

  1. const-correctness. Perhaps a quarter of container template errors are due to the user not specifying const correctly.
  2. Missing hash function. hash_map, hash_set, etc. require that you either specify a hash function or one exists for your class. See functional.h for examples of declarations of hash functions for common data types.
  3. Missing operators. Various containers and algorithms require that certain operators exist for your contained classes. For example, list requires that you can test contained objects for equivalence (i.e. operator==), while map requires that you can test contained objects for "less-ness" (operator <). If you define a Widget class and don't have a way to compare two Widgets, you will get errors when trying to put them into a map.
  4. Specifying the wrong data type. For example, it is a common mistake to forget that when you insert into a map, you need to insert a pair of objects and not just your key or value type.
  5. Incorrect template parameters. When declaring a template instantiation (e.g. map<int, int, less<int> >) you simply need to get the template parameters correct. Also note that when you have ">>" next to each other that you need to separate them by one space (e.g. "> >").

Debug.5 How do I measure hash table balancing?

The following functionality lets you spelunk hash container layout.

  • There is the load_factor function which tells you the overall hashtable load, but doesn't tell you if a load is unevenly distributed.
  • You can control the load factor and thus the automated bucket redistribution with set_load_factor.
  • The local_iterator begin(size_type n) and local_iterator end(size_type) functions lets you iterate each bucket individually. You can use this to examine the elements in a bucket.
  • You can use the above to get the size of any bucket, but there is also simply the bucket_size(size_type n) function.
  • The bucket_count function tells you the count of buckets. So with this you can completely visualize the layout of the hash table.
  • There is also iterator find_by_hash(hash_code_t c), for what it's worth.

The following function draws an ASCII bar graph of the hash table for easy visualization of bucket distribution:

#include <EASTL/hash_map.h>
#include <EASTL/algorithm.h>
#include <stdio.h>

template <typename HashTable>
void VisualizeHashTableBuckets(const HashTable& h)
{
    eastl_size_t bucketCount       = h.bucket_count();
    eastl_size_t largestBucketSize = 0;

    for(eastl_size_t i = 0; i < bucketCount; i++)
        largestBucketSize = eastl::max_alt(largestBucketSize, h.bucket_size(i));

    YourPrintFunction("\n --------------------------------------------------------------------------------\n");

    for(eastl_size_t i = 0; i < bucketCount; i++)
    {
        const eastl_size_t k = h.bucket_size(i) * 80 / largestBucketSize;

        char buffer[16];
        sprintf(buffer, "%3u|", (unsigned)i);
        YourPrintFunction(buffer);

        for(eastl_size_t j = 0; j < k; j++)
            YourPrintFunction("*");

        YourPrintFunction("\n");
    }

    YourPrintFunction(" --------------------------------------------------------------------------------\n");
}

This results in a graph that looks like the following (with one horizontal bar per bucket). This hashtable has a large number of collisions in each of its 10 buckets.

   ------------------------------------------------------
 0|********************************************
 1|************************************************
 2|***************************************
 3|********************************************
 4|*****************************************************
 5|*************************************************
 6|****************************************
 7|***********************************************
 8|********************************************
 9|**************************************
10|********************************************
   -----------------------------------------------------

Containers

Cont.1 Why do some containers have "fixed" versions (e.g. fixed_list) but others(e.g. deque) don't have fixed versions?

Recall that fixed containers are those that are implemented via a single contiguous block of memory and don't use a general purpose heap to allocate memory from. For example, fixed_list is a list container that implements its list by a user-configurable fixed block of memory. Such containers have an upper limit to how many items they can hold, but have the advantage of being more efficient with memory use and memory access coherency.

The reason why some containers don't have fixed versions is that such functionality doesn't make sense with these containers. Containers which don't have fixed versions include:

array, deque, bitset, stack, queue, priority_queue,
intrusive_list, intrusive_hash_map, intrusive_hash_set,
intrusive_hash_multimap, intrusive_hash_multimap,
vector_map, vector_multimap, vector_set, vector_multiset.

Some of these containers are adapters which wrap other containers and thus there is no need for a fixed version because you can just wrap a fixed container. In the case of intrusive containers, the user is doing the allocation and so there are no memory allocations. In the case of array, the container is a primitive type which doesn't allocate memory. In the case of deque, it's primary purpose for being is to dynamically resize and thus the user would likely be better of using a fixed_vector.

Cont.2 Can I mix EASTL with standard C++ STL?

This is possible to some degree, though the extent depends on the implementation of C++ STL. One of things that makes interoperability is something called iterator categories. Containers and algorithms recognize iterator types via their category and STL iterator categories are not recognized by EASTL and vice versa.

Things that you definitely can do:

  • #include both EASTL and standard STL headers from the same .cpp file.
  • Use EASTL containers to hold STL containers.
  • Construct an STL reverse_iterator from an EASTL iterator.
  • Construct an EASTL reverse_iterator from an STL iterator.

Things that you probably will be able to do, though a given std STL implementation may prevent it:

  • Use STL containers in EASTL algorithms.
  • Use EASTL containers in STL algorithms.
  • Construct or assign to an STL container via iterators into an EASTL container.
  • Construct or assign to an EASTL container via iterators into an STL container.

Things that you would be able to do if the given std STL implementation is bug-free:

  • Use STL containers to hold EASTL containers. Unfortunately, VC7.x STL has a confirmed bug that prevents this. Similarly, STLPort versions prior to v5 have a similar but.

Things that you definitely can't do:

  • Use an STL allocator directly with an EASTL container (though you can use one indirectly).
  • Use an EASTL allocator directly with an STL container (though you can use one indirectly).

Cont.3 Why are there so many containers?

EASTL has a large number of container types (e.g vector, list, set) and often has a number of variations of given types (list, slist, intrusive_list, fixed_list). The reason for this is that each container is tuned and to a specific need and there is no single container that works for all needs. The more the user is concerned about squeezing the most performance out of their system, the more the individual container variations become significant. It's important to note that having additional container types generally does not mean generating additional code or code bloat. Templates result in generated code regardless of what templated class they come from, and so for the most part you get optimal performance by choosing the optimal container for your needs.

Cont.4 Don't STL and EASTL containers fragment memory?

They only fragment memory if you use them in a way that does so. This is no different from any other type of container used in a dynamic way. There are various solutions to this problem, and EASTL provides additional help as well:

  • For vectors, use the reserve function (or the equivalent constructor) to set aside a block of memory for the container. The container will not reallocate memory unless you try grow beyond the capacity you reserve.
  • EASTL has "fixed" variations of containers which allow you to specify a fixed block of memory which the container uses for its memory. The container will not allocate any memory with these types of containers and all memory will be cache-friendly due to its locality.
  • You can assign custom allocators to containers instead of using the default global allocator. You would typically use an allocator that has its own private pool of memory.
  • Where possible, add all a container's elements to it at once up front instead of adding them over time. This avoids memory fragmentation and increase cache coherency.

Cont.5 I don't see container optimizations for equivalent scalar types such as pointer types. Why?

Metrowerks (and no other, as of this writing) STL has some container specializations for type T* which maps them to type void*. The idea is that a user who declares a list of Widget* and a list of Gadget* will generate only one container: a list of void*. As a result, code generation will be smaller. Often this is done only in optimized builds, as such containers are harder to view in debug builds due to type information being lost.

The addition of this optimization is under consideration for EASTL, though it might be noted that optimizing compilers such as VC++ are already capable of recognizing duplicate generated code and folding it automatically as part of link-time code generation (LTCG) (a.k.a. "whole program optimization"). This has been verified with VC++, as the following code and resulting disassembly demonstrate:

eastl::list<int*>        intPtrList;
eastl::list<TestObject*> toPtrList;

eastl_size_t n1 = intPtrList.size();
eastl_size_t n2 = toPtrList.size();

0042D288  lea         edx,[esp+14h]
0042D28C  call        eastl::list<TestObject>::size (414180h)
0042D291  push        eax 
0042D292  lea         edx,[esp+24h]
0042D296  call        eastl::list<TestObject>::size (414180h)
Note that in the above case the compiler folded the two implementations of size() into a single implementation.

Cont.6 What about alternative container and algorithm implementations (e.g. treaps, skip lists, avl trees)?

EASTL chooses to implement some alternative containers and algorithms and not others. It's a matter of whether or not the alternative provides truly complementary or improved functionality over existing containers. The following is a list of some implemented and non-implemented alternatives and the rationale behind each:

Implemented:

  • intrusive_list, etc. -- Saves memory and improves cache locality.
  • vector_map, etc. -- Saves memory and improves cache locality.
  • ring_buffer -- Useful for some types of operations and has no alternative.
  • shell_sort -- Useful sorting algorithm.
  • sparse_matrix -- Useful for some types of operations and has no alternative.

Not implemented:

  • skip lists (alternative to red-black tree) -- These use more memory and usually perform worse than rbtrees.
  • treap (alternative to red-black tree) -- These are easier and smaller than rbtrees, but perform worse.
  • avl tree (alternative to red-black tree) -- These have slightly better search performance than rbtrees, but significantly worse insert/remove performance.
  • btree (alternative to red-black tree) --  These are no better than rbtrees.

If you have an idea of something that should be implemented, please suggest it or even provide at least a prototypical implementation.

Cont.7 Why are tree-based EASTL containers hard to read with a debugger?

Short answer
Maximum performance and design mandates.

Long answer
You may notice that when you have a tree-based container (e.g. set, map)  in the debugger that it isn't automatically able to recognize the tree nodes as containing instances of your contained object. You can get the debugger to do what you want with casting statements in the debug watch window, but this is not an ideal solution. The reason this is happening is that node-based containers always use an anonymous node type as the base class for container nodes. This is primarily done for performance, as it allows the node manipulation code to exist as a single non-templated library of functions and it saves memory because containers will have one or two base nodes as container 'anchors' and you don't want to allocate a node of the size of the user data when you can just use a base node. See list.h for an example of this and some additional in-code documentation on this.

Additionally, EASTL has the design mandate that an empty container constructs no user objects. This is both for performance reasons and because it doing so would skew the user's tracking of object counts and might possibly break some expectation the user has about object lifetimes.

Currently this debug issue exists only with tree-based containers. Other node-based containers such as list and slist use a trick to get around this problem in debug builds.

Cont.8 How do I assign a custom allocator to an EASTL container?

There are two ways of doing this:

  1. Use the set_allocator function that is present in each container.
  2. Specify a new allocator type via the Allocator template parameter that is present in each container.

For item #1, EASTL expects that you provide an instance of an allocator of the type that EASTL recognizes. This is simple but has the disadvantage that all such allocators must be of the same class. The class would need to have C++ virtual functions in order to allow a given instance to act differently from another instance.

For item #2, you specify that the container use your own allocator class. The advantage of this is that your class can be implemented any way you want and doesn't require virtual functions for differentiation from other instances. Due to the way C++ works your class would necessarily have to use the same member function names as the default allocator class type. In order to make things easier, we provide a skeleton allocator here which you can copy and fill in with your own implementation.

class custom_allocator
{
public:
    custom_allocator(const char* pName = EASTL_NAME_VAL("custom allocator"))
    {
        #if EASTL_NAME_ENABLED
            mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME;
        #endif

        // Possibly do something here.
    }

    custom_allocator(const allocator& x, const char* pName = EASTL_NAME_VAL("custom allocator"));
    {
        #if EASTL_NAME_ENABLED
            mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME;
        #endif

        // Possibly copy from x here.
    }

    ~custom_allocator();
    {
        // Possibly do something here.
    }

    custom_allocator& operator=(const custom_allocator& x)
    {
        // Possibly copy from x here.
        return *this;
    }

    void* allocate(size_t n, int flags = 0)
    {
        // Implement the allocation here.
    }

    void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0)
    {
        // Implement the allocation here.
    }

    void deallocate(void* p, size_t n)
    {
        // Implement the deallocation here.
    }

    const char* get_name() const
    {
        #if EASTL_NAME_ENABLED
            return mpName;
        #else
            return "custom allocator";
        #endif
    }

    void set_name(const char* pName)
    {
        #if EASTL_NAME_ENABLED
            mpName = pName;
        #endif
    }

protected:
    // Possibly place instance data here.

    #if EASTL_NAME_ENABLED
        const char* mpName; // Debug name, used to track memory.
    #endif
};


inline bool operator==(const allocator& a, const allocator& b)
{
    // Provide a comparison here.
}

inline bool operator!=(const allocator& a, const allocator& b)
{
    // Provide a negative comparison here.
}

Here's an example of how to use the above custom allocator:

// Declare a Widget list and have it default construct.
list<Widget, custom_allocator> widgetList;

// Declare a Widget list and have it construct with a copy of some global allocator.
list<Widget, custom_allocator> widgetList2(gSomeGlobalAllocator);

// Declare a Widget list and have it default construct, but assign
// an underlying implementation after construction.
list<Widget, custom_allocator> widgetList;
widgetList.get_allocator().mpIAllocator = new WidgetAllocatorImpl;

Cont.9 How do I set the VC++ debugger to display EASTL container data with tooltips?

Visual Studio supports this via the AutoExp.dat file, an example of which is present with this documentation.

Sometimes the AutoExp.dat doesn't seem to work. Avery Lee's explanation:

If I had to take a guess, the problem is most likely in the cast to the concrete node type. These are always tricky because, for some strange reason, the debugger is whitespace sensitive with regard to specifying template types. You might try manually checking one of the routines of the specific map instantiation and checking that the placement of whitespace and const within the template expression still matches exactly. In some cases the compiler uses different whitespace rules depending on the value type which makes it impossible to correctly specify a single visualizer – this was the case for eastl::list<>, for which I was forced to include sections for both cases. The downside is that you have a bunch of (error) entries either way.

Cont.10 How do I use a memory pool with a container?

Using custom memory pools is a common technique for decreasing memory fragmentation and increasing memory cache locality. EASTL gives you the flexibility of defining your own memory pool systems for containers. There are two primary ways of doing this:

  • Assign a custom allocator to a container. eastl::fixed_pool provides an implementation.
  • Use one of the EASTL fixed containers, such as fixed_list.

Custom Allocator
In the custom allocator case, you will want to create a memory pool and assign it to the container. For purely node-based containers such as list, slist, map, set, multimap, and multiset, your pool simply needs to be able to allocate list nodes. Each of these containers has a member typedef called node_type which defines the type of node allocated by the container. So if you have a memory pool that has a constructor that takes the size of pool items and the count of pool items, you would do this (assuming that MemoryPool implements the Allocator interface):

typedef list<Widget, MemoryPool> WidgetList;           // Declare your WidgetList type.

MemoryPool myPool(sizeof(WidgetList::node_type), 100); // Make a pool of 100 Widget nodes.
WidgetList myList(&myPool);                            // Create a list that uses the pool.

In the case of containers that are array-based, such as vector and basic_string, memory pools don't work very well as these containers work on a realloc-basis instead of by adding incremental nodes. What we want to do with these containers is assign a sufficient block of memory to them and reserve() the container's capacity to the size of the memory.

In the case of mixed containers which are partly array-based and partly node-based, such as hash containers and deque, you can use a memory pool for the nodes but will need a single array block to supply for the buckets (hash containers and deque both use a bucket-like system).

You might consider using eastl::fixed_pool as such an allocator, as it provides such functionality and allows the user to provide the actual memory used for the pool. Here is some example code:

char buffer[256];

list<Widget, fixed_pool> myList;
myList.get_allocator().init(buffer, 256);

Fixed Container
In the fixed container case, the container does all the work for you. To use a list which implements a private pool of memory, just declare it like so:

fixed_list<Widget, 100> fixedList; // Declare a fixed_list that can hold 100 Widgets

Cont.11 How do I write a comparison (operator<()) for a struct that contains two or more members? 

See Algo.2

Cont.12 Why doesn't container X have member function Y?

Why don't the list or vector containers have a find() function? Why doesn't the vector container have a sort() function? Why doesn't the string container have a mid() function? These are common examples of such questions.

The answer usually boils down to two reasons:

  • The functionality exists in a more centralized location elsewhere, such as the algorithms.
  • The functionality can be had by using other member functions.

In the case of find and sort functions not being part of containers, the find algorithm and sort algorithm are centralized versions that apply to any container. Additionally, the algorithms allow you to specify a sub-range of the container on which to apply the algorithm. So in order to find an element in a list, you would do this:

list<int>::iterator i = find(list.begin(), list.end(), 3);

And in order to sort a vector, you would do this:

quick_sort(v.begin(), v.end());   // Sort the entire array.
quick_sort(&v[3], &v[8]);         // Sort the items at the indexes in the range of [3, 8).

In the case of functionality that can be had by using other member functions, note that EASTL follows the philosophy that duplicated functionality should not exist in a container, with exceptions being made for cases where mistakes and unsafe practices commonly happen if the given function isn't present. In the case of string not having a mid function, this is because there is a string constructor that takes a sub-range of another string. So to make a string out of the middle of another, you would do this:

string strMid(str, 3, 5); // Make a new string of the characters from the source range of [3, 3+5).

It might be noted that the EASTL string class is unique among EASTL containers in that it sometimes violates the minimum functionality rule. This is so because the std C++ string class similarly does so and EASTL aims to be compatible.

Cont.13 How do I search a hash_map of strings via a char pointer efficiently? If I use map.find("hello") it creates a temporary string, which is inefficient.

The problem is illustrated with this example:

map<string, Widget> swMap;
  ...
map<string, Widget>::iterator it = swMap.find("blue"); // A temporary string object is created here.

In this example, the find function expects a string object and not a string literal and so (silently!) creates a temporary string object for the duration of the find. There are two solutions to this problem:

  • Make the map a map of char pointers instead of string objects. Don't forget to write a custom compare or else the default comparison function will compare pointer values instead of string contents.
  • Use the EASTL hash_map::find_as function, which allows you to find an item in a hash container via an alternative key than the one the hash table uses.

Cont.14 Why are set and hash_set iterators const (i.e. const_iterator)?

The situation is illustrated with this example:

set<int> intSet;

intSet.insert(1);
set<int>::iterator i = intSet.begin();
*i = 2; // Error: iterator i is const.

In this example, the iterator is a regular iterator and not a const_iterator, yet the compiler gives an error when trying to change the iterator value. The reason this is so is that a set is an ordered container and changing the value would make it out of order. Thus, set and multiset iterators are always const_iterators. If you need to change the value and are sure the change will not alter the container order, use const_cast or declare mutable member variables for your contained object. This resolution is the one blessed by the C++ standardization committee.

Cont.15 How do I prevent my hash container from re-hashing?

If you want to make a hashtable never re-hash (i.e. increase/reallocate its bucket count), call set_max_load_factor with a very high value such as 100000.f.

Similarly, you can control the bucket growth factor with the rehash_policy function. By default, when buckets reallocate, they reallocate to about twice their previous count. You can control that value as with the example code here:

hash_set<int> hashSet;
hashSet.rehash_policy().mfGrowthFactor = 1.5f

Cont.16 Which uses less memory, a map or a hash_map?

A hash_map will virtually always use less memory. A hash_map will use an average of two pointers per stored element, while a map uses three pointers per stored element.

Cont.17 How do I write a custom hash function?

You can look at the existing hash functions in functional.h, but we provide a couple examples here.

To write a specific hash function for a Widget class, you would do this:

struct WidgetHash {
    size_t operator()(const Widget& w) const
        { return w.id; }
};

hash_set<Widget, WidgetHash> widgetHashSet;

To write a generic (templated) hash function for a set of similar classes (in this case that have an id member), you would do this:

template <typename T>
struct GeneralHash {
    size_t operator()(const T& t) const
        { return t.id; }
};

hash_set<Widget, GeneralHash<Widget> > widgetHashSet;
hash_set<Dogget, GeneralHash<Dogget> > doggetHashSet;

Cont.18 How do I write a custom compare function for a map or set?

The sorted containers require that an operator< exist for the stored values or that the user provide a suitable custom comparison function. A custom can be implemented like so:

struct WidgetLess {     bool operator()(const Widget& w1, const Widget& w2) const         { return w.id < w2.id; } }; set<Widget, WidgetLess> wSet;

It's important that your comparison function must be consistent in its behaviour, else the container will either be unsorted or a crash will occur. This concept is called "strict weak ordering."

Cont.19 How do I force my vector or string capacity down to the size of the container?

You can simply use the set_capacity() member function which is present in both vector and string. This is a function that is not present in std STL vector and string functions.

eastl::vector<Widget> x;
x.set_capacity();   // Shrink x's capacity to be equal to its size.

eastl::vector<Widget> x;
x.set_capacity(0);  // Completely clear x.

To compact your vector or string in a way that would also work with std STL you need to do the following.

How to shrink a vector's capacity to be equal to its size:

std::vector<Widget> x;
std::vector<Widget>(x).swap(x); // Shrink x's capacity.
How to completely clear a std::vector (size = 0, capacity = 0, no allocation):
std::vector<Widget> x;
std::vector<Widget>().swap(x); // Completely clear x.

Cont.20 How do I iterate a container while (selectively) removing items from it?

All EASTL containers have an erase function which takes an iterator as an argument and returns an iterator to the next item. Thus, you can erase items from a container while iterating it like so:

set<int> intSet;
set<int>::iterator i = intSet.begin();
while(i != intSet.end()) { if(*i & 1)  // Erase all odd integers from the container.         i = intSet.erase(i);     else         ++i; }

Cont.21 How do I store a pointer in a container?

The problem with storing pointers in containers is that clearing the container will not free the pointers automatically. There are two conventional resolutions to this problem:

  • Manually free pointers when removing them from containers. 
  • Store the pointer as a smart pointer instead of a "raw"pointer.

The advantage of the former is that it makes the user's intent obvious and prevents the possibility of smart pointer "thrashing" with some containers. The disadvantage of the former is that it is more tedicous and error-prone.

The advantage of the latter is that your code will be cleaner and will always be error-free. The disadvantage is that it is perhaps slightly obfuscating and with some uses of some containers it can cause smart pointer thrashing, whereby a resize of a linear container (e.g. vector) can cause shared pointers to be repeatedly incremented and decremented with no net effect.

It's important that you use a shared smart pointer and not an unshared one such as C++ auto_ptr, as the latter will result in crashes upon linear container resizes. Here we provide an example of how to create a list of smart pointers:

list< shared_ptr<Widget> > wList;

wList.push_back(shared_ptr<Widget>(new Widget));
wList.pop_back(); // The Widget will be freed.

Cont.22 How do I make a union of two containers? difference? intersection?

The best way to accomplish this is to sort your container (or use a sorted container such as set) and then apply the set_union, set_difference, or set_intersection algorithms.

Cont.23 How do I override the default global allocator? 

There are multiple ways to accomplish this. The allocation mechanism is defined in EASTL/internal/config.h and in allocator.h/cpp. Overriding the default global allocator means overriding these files, overriding what these files refer to, or changing these files outright. Here is a list of things you can do, starting with the simplest:

  • Simply provide the following versions of operator new (which EASTL requires, actually):
        void* operator new[](size_t size, const char* pName, int flags, unsigned debugFlags, const char* file, int line);
        void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line);
  • Predefine the config.h macros for EASTLAlloc, EASTLFree, etc. See config.h for this.
  • Override config.h entirely via EASTL_USER_CONFIG_HEADER. See config.h for this.
  • Provide your own version of allocator.h/cpp
  • Provide your own version of config.h. 

If you redefine the allocator class, you can make it work however you want.

Note that config.h defines EASTLAllocatorDefault, which returns the default allocator instance. As documented in config.h, this is not a global allocator which implements all container allocations but is the allocator that is used when EASTL needs to allocate memory internally. There are very few cases where EASTL allocates memory internally, and in each of these it is for a sensible reason that is documented to behave as such.

Cont.24 How do I do trick X with the string container?

There seem to be many things users want to do with strings. Perhaps the most commonly requested EASTL container extensions are string class shortcut functions. While some of these requests are being considered, we provide some shortcut functions here.

find_and_replace

template <typename String>
void find_and_replace(String& s, const typename String::value_type* pFind, const typename String::value_type* pReplace)    
{
    for(size_t i; (i = source.find(pFind)) != T::npos; )
        s.replace(i, eastl::CharStrlen(pFind), pReplace);
}

Example:
    find_and_replace(s, "hello", "hola");

trim front (multiple chars)

template <typename String>
void trim_front(String& s, const typename String::value_type* pValues)
{
    s.erase(0, s.find_first_not_of(pValues));
}

Example:
    trim_front(s, " \t\n\r");

trim back (multiple chars)

template <typename String>
void trim_front(String& s, const typename String::value_type* pValues)
{
    s.resize(s.find_last_not_of(pValues) + 1);
}

Example:
    trim_back(s, " \t\n\r");

prepend

template <typename String>
void prepend(String& s, const typename String::value_type* p)
{
    s.insert(0, p);
}

Example:
    prepend(s, "log: ");

begins_with

template <typename String>
bool begins_with(const String& s, const typename String::value_type* p)
{
    return s.compare(0, eastl::CharStrlen(p), p) == 0;
}

Example:
    if(begins_with(s, "log: ")) ...

ends_with

template <typename String>
bool ends_with(const String& s, const typename String::value_type* p)
{
    const typename String::size_type n1 = s.size();
    const typename String::size_type n2 = eastl::CharStrlen(p);
    return ((n1 >= n2) && s.compare(n1 - n2, n2, p) == 0);
}

Example:
    if(ends_with(s, "test.")) ...

tokenize
Here is a simple tokenization function that acts very much like the C strtok function. 

template <typename String>
size_t tokenize(const String& s, const typename String::value_type* pDelimiters,
                String* resultArray, size_t resultArraySize)
{
    size_t n = 0;
    typename String::size_type lastPos = s.find_first_not_of(pDelimiters, 0);
    typename String::size_type pos     = s.find_first_of(pDelimiters, lastPos);

    while((n < resultArraySize) && (pos != String::npos) || (lastPos != String::npos))
    {
        resultArray[n++].assign(s, lastPos, pos - lastPos);
        lastPos = s.find_first_not_of(pDelimiters, pos);
        pos     = s.find_first_of(pDelimiters, lastPos);
    }

    return n;
}

Example:
   string resultArray[32];
tokenize(s, " \t", resultArray, 32));

Cont.25 How do EASTL smart pointers compare to Boost smart pointers? 

EASTL's smart pointers are nearly identical to Boost (including all that crazy member template and dynamic cast functionality in shared_ptr), but are not using the Boost source code. EA legal has already stated that it is fine to have smart pointer classes with the same names and functionality as those present in Boost. EA legal specifically looked at the smart pointer classes in EASTL for this. There are two differences between EASTL smart pointers and Boost smart pointers:

  • EASTL smart pointers don't have thread safety built-in. It was deemed that this is too much overhead and that thread safety is something best done at a higher level. By coincidence the C++ library proposal to add shared_ptr also omits the thread safety feature. FWIW, I put a thread-safe shared_ptr in EAThread, though it doesn't attempt to do all the fancy member template things that Boost shared_ptr does. Maybe I'll add that some day if people care.
  • EASTL shared_ptr object deletion goes through a deletion object instead of through a virtual function interface. 95% of the time this makes no difference (aside from being more efficient), but the primary case where it matters is when you have shared_ptr<void> and assign to is something like "new Widget". The problem is that shared_ptr<void> doesn't know what destructor to call and so doesn't call a destructor unless you specify a custom destructor object as part of the template specification. I don't know what to say about this one, as it is less safe, but forcing everybody to have the overhead of additional templated classes and virtual destruction functions doesn't seem to be in the spirit of high performance or lean game development.

There is the possibility of making a shared_ptr_boost which is completely identical to Boost shared_ptr. So perhaps that will be done some day.

Cont.26 How do your forward-declare an EASTL container?

Here is are some examples of how to do this:

namespace eastl
{
    template <typename T, typename Allocator> class basic_string;
    typedef basic_string<char, allocator> string8;   // Forward declare EASTL's string8 type.

    template <typename T, typename Allocator> class vector;
    typedef vector<char, allocator> CharArray;

    template <typename Value, typename Hash, typename Predicate, typename Allocator, bool bCacheHashCode> class hash_set;

    template <typename Key, typename T, typename Compare, typename Allocator> class map;
}

The forward declaration can be used to declare a pointer or reference to such a class. It cannot be used to declare an instance of a class or refer to class data, static or otherwise. Nevertheless, forward declarations for pointers and references are useful for reducing the number of header files a header file needs to include.

Cont.27 How do I make two containers share a memory pool?

EASTL (and std STL) allocators are specified by value semantics and not reference semantics. Value semantics is more powerful (because a value can also be a reference, but not the other way around), but is not always what people expects if they're used to writing things the other way.

Here is some example code:

struct fixed_pool_reference
{
public:
fixed_pool_reference()
{
mpFixedPool = NULL;
}

fixed_pool_reference(eastl::fixed_pool& fixedPool)
{
mpFixedPool = &fixedPool;
}

fixed_pool_reference(const fixed_pool_reference& x)
{
mpFixedPool = x.mpFixedPool;
}

fixed_pool_reference& operator=(const fixed_pool_reference& x)
{
mpFixedPool = x.mpFixedPool;
return *this;
}

void* allocate(size_t /*n*/, int /*flags*/ = 0)
{
return mpFixedPool->allocate();
}

void* allocate(size_t /*n*/, size_t /*alignment*/, size_t /*offset*/, int /*flags*/ = 0)
{
return mpFixedPool->allocate();
}

void deallocate(void* p, size_t /*n*/)
{
return mpFixedPool->deallocate(p);
}

const char* get_name() const
{
return "fixed_pool_reference";
}

void set_name(const char* /*pName*/)
{
}

protected:
friend bool operator==(const fixed_pool_reference& a, const fixed_pool_reference& b);
friend bool operator!=(const fixed_pool_reference& a, const fixed_pool_reference& b);

eastl::fixed_pool* mpFixedPool;
}; inline bool operator==(const fixed_pool_reference& a, const fixed_pool_reference& b) { return (a.mpFixedPool == b.mpFixedPool);
} inline bool operator!=(const fixed_pool_reference& a, const fixed_pool_reference& b) { return (a.mpFixedPool != b.mpFixedPool); }

Example usage of the above:

typedef eastl::list<int, fixed_pool_reference> IntList;

IntList::node_type buffer[2];
eastl::fixed_pool  myPool(buffer, sizeof(buffer), sizeof(Int::node_type), 2);

IntList myList1(myPool);
IntList myList2(myPool);
           
myList1.push_back(37);
myList2.push_back(39);

Cont.28 Can I use a std (STL) allocator with EASTL?

No. EASTL allocators are similar in interface to std STL allocators, but not 100% compatible. If it was possible to make them compatible with std STL allocators but also match the design of EASTL then compatibility would exist. The primary reasons for lack of compatibility are:

  • EASTL allocators have a different allocate function signature.
  • EASTL allocators have as many as four extra required functions: ctor(name), get_name(), set_name(), allocate(size, align, offset).
  • EASTL allocators have an additional allocate function specifically for aligned allocations, as listed directly above.

What are the requirements of classes stored in containers?

Class types stored in containers must have:

  • a public copy constructor
  • a public assignment operator
  • a public destructor
  • an operator < that compares two such classes (sorted containers only).
  • an operator == that compares two such classes (hash containers only).

Recall that the compiler generates basic versions these functions for you when you don't implement them yourself, so you can omit any of the above if the compiler-generated version is sufficient.

For example, the following code will act incorrectly, because the user forgot to implement an assignment operator. The compiler-generated assignment operator will assign the refCount value, which the user doesn't want, and which will be called by the vector during resizing.

struct NotAPod
{
   NotAPod(const NotAPod&) {} // Intentionally don't copy the refCount 
 int refCount; // refCounts should not be copied between NotAPod instances. }; eastl::vector<NotAPod> v;

Algorithms

Algo.1 I'm getting screwy behavior in sorting algorithms or sorted containers. What's wrong?

It may possible that you are seeing floating point roundoff problems. Many STL algorithms require object comparisons to act consistently. However, floating point values sometimes compare differently between uses because in one situation a value might be in 32 bit form in system memory, whereas in anther situation that value might be in an FPU register with a different precision. These are difficult problems to track down and aren't the fault of EASTL or whatever similar library you might be using. There are various solutions to the problem, but the important thing is to find a way to force the comparisons to be consistent.

The code below was an example of this happening, whereby the object pA->mPos was stored in system memory while pB->mPos was stored in a register and comparisons were inconsistent and a crash ensued.

class SortByDistance : public binary_function<WorldTreeObject*, WorldTreeObject*, bool>
{
private:
    Vector3 mOrigin;

public:
    SortByDistance(Vector3 origin) {
        mOrigin = origin;
    }

    bool operator()(WorldTreeObject* pA, WorldTreeObject* pB) const {
        return ((WorldObject*)pA)->mPos - mOrigin).GetLength()
             < ((WorldObject*)pB)->mPos - mOrigin).GetLength();
    }
};

Algo.2 How do I write a comparison (operator<()) for a struct that contains two or more members? 

For a struct with two members such as the following:

struct X {
    Blah m1;
    Blah m2;
};

You would write the comparison function like this:

bool operator<(const X& a, const X& b) {
    return (a.m1 == b.m1) ? (a.m2 < b.m2) : (a.m1 < b.m1);
}

or, using only operator < but more instructions:

bool operator<(const X& a, const X& b) {
    return (a.m1 < b.m1) || (!(b.m1 < a.m1) && (a.m2 < b.m2));
}

For a struct with three members, you would have:

bool operator<(const X& a, const X& b) {
    if(a.m1 != b.m1)
        return (a.m1 < b.m1);
    if(a.m2 != b.m2)
        return (a.m2 < b.m2);
    return (a.mType < b.mType);
}

And a somewhat messy implementation if you wanted to use only operator <.

Note also that you can use the above technique to implement operator < for spatial types such as vectors, points, and rectangles. You would simply treat the members of the struct as an array of values and ignore the fact that they have spatial meaning. All operator < cares about is that things order consistently.

bool operator<(const Point2D& a, const Point2D& b) {
    return (a.x == b.x) ? (a.y < b.y) : (a.x < b.x);
}

Algo.3 How do I sort something in reverse order?

Normally sorting puts the lowest value items first in the sorted range. You can change this by simply reversing the comparison. For example:

sort(intVector.begin(), intVector.end(), greater<int>());

It's important that you use operator > instead of >=. The comparison function must return false for every case where values are equal.

Algo.4 I'm getting errors about min and max while compiling.

You need to define NOMINMAX under VC++ when this occurs, as it otherwise defines min and max macros that interfere. There may be equivalent issues with other compilers. Also, VC++ has a specific <minmax.h> header file which defines min and max macros but which doesn't pay attention to NOMINMAX and so in that case there is nothing to do but not include that file or to undefine min and max. minmax.h is not a standard file and its min and max macros are not standard C or C++ macros or functions.

Algo.5 Why don't algorithms take a container as an argument instead of iterators? A container would be more convenient.

Having algorithms that use containers instead of algorithms would reduce reduce functionality with no increase in performance. This is because the use of iterators allows for the application of algorithms to sub-ranges of containers and allows for the application of algorithms to containers aren't formal C++ objects, such as C-style arrays.

Providing additional algorithms that use containers would introduce redundancy with respect to the existing algorithms that use iterators.

Algo.6 Given a container of pointers, how do I find an element by value (instead of by pointer)?

Functions such as find_if help you find a T element in a container of Ts. But if you have a container of pointers such as vector<Widget*>, these functions will enable you to find an element that matches a given Widget* pointer, but they don't let you find an element that matches a given Widget object.

You can write your own iterating 'for' loop and compare values, or you can use a generic function object to do the work if this is a common task:

template<typename T>
struct dereferenced_equal
{
    const T& mValue;

    dereferenced_equal(const T& value) : mValue(value) { }     
    bool operator==(const T* pValue) const { return *pValue == mValue; }
};

...

find_if(container.begin(), container.end(), dereferenced_equal<Widget>(someWidget));

Algo.7 When do stored objects need to support operator < vs. when do they need to support operator ==?

Any object which is sorted needs to have operator < defined for it, implicitly via operator < or explicitly via a user-supplied Compare function. Sets and map containers require operator <, while sort, binary search, and min/max algorithms require operator <.

Any object which is compared for equality needs to have operator == defined for it, implicitly via operator == or explicitly via a user-supplied BinaryPredicate function. Hash containers required operator ==, while many of the algorithms other than those mentioned above for operator < require operator ==.

Some algorithms and containers require neither < nor ==. Interestingly, no algorithm or container requires both < and ==.

Algo.8 How do I sort via pointers or array indexes instead of objects directly?

Pointers

vector<TestObject>  toArray;
vector<TestObject*> topArray;

for(eastl_size_t i = 0; i < 32; i++)
   toArray.push_back(TestObject(rng.RandLimit(20)));
for(eastl_size_t i = 0; i < 32; i++) // This needs to be a second loop because the addresses might change in the first loop due to container resizing.
   topArray.push_back(&toArray[i]);

struct TestObjectPtrCompare
{
    bool operator()(TestObject* a, TestObject* b)
        { return a->mX < a->mX; }
};

quick_sort(topArray.begin(), topArray.end(), TestObjectPtrCompare());

Array indexes

vector<TestObject>   toArray;
vector<eastl_size_t> toiArray;

for(eastl_size_t i = 0; i < 32; i++)
{
    toArray.push_back(TestObject(rng.RandLimit(20)));
    toiArray.push_back(i);
}

struct TestObjectIndexCompare
{
    vector* mpArray;

    TestObjectIndexCompare(vector<TestObject>* pArray) : mpArray(pArray) { }
    TestObjectIndexCompare(const TestObjectIndexCompare& x) : mpArray(x.mpArray){ }
    TestObjectIndexCompare& operator=(const TestObjectIndexCompare& x) { mpArray = x.mpArray; return *this; }

    bool operator()(eastl_size_t a, eastl_size_t b)
       { return (*mpArray)[a] < (*mpArray)[b]; }
};

quick_sort(toiArray.begin(), toiArray.end(), TestObjectIndexCompare(&toArray));

Array indexes (simpler version using toArray as a global variable)

vector<TestObject>   toArray;
vector<eastl_size_t> toiArray;

for(eastl_size_t i = 0; i < 32; i++)
{
    toArray.push_back(TestObject(rng.RandLimit(20)));
    toiArray.push_back(i);
}

struct TestObjectIndexCompare
{
    bool operator()(eastl_size_t a, eastl_size_t b)
       { return toArray[a] < toArray[b]; }
};

quick_sort(toiArray.begin(), toiArray.end(), TestObjectIndexCompare(&toArray));

Iterators

Iter.1 What's the difference between iterator, const iterator, and const_iterator?

An iterator can be modified and item it points to can be modified.
A const iterator cannot be modified, but the items it points to can be modified.
A const_iterator can be modified, but the items it points to cannot be modified.
A const const_iterator cannot be modified, nor can the items it points to.

This situation is much like with char pointers:

Iterator type Pointer equivalent
iterator char*
const iterator char* const
const_iterator const char*
const const_iterator const char* const

Iter.2 How do I tell from an iterator what type of thing it is iterating?

Use the value_type typedef from iterator_traits, as in this example

template <typename Iterator>
void DoSomething(Iterator first, Iterator last)
{
    typedef typename iterator_traits<Iterator>::value_type;

    // use value_type
}

Iter.3 How do I iterate a container while (selectively) removing items from it?

All EASTL containers have an erase function which takes an iterator as an argument and returns an iterator to the next item. Thus, you can erase items from a container while iterating it like so:

set<int> intSet;
set<int>::iterator i = intSet.begin();

while(i != intSet.end())
{
    if(*i & 1) // Erase all odd integers from the container.
        i = intSet.erase(i);
    else
        ++i;
}

Iter.4 What is an insert_iterator?

An insert_iterator is a utility class which is like an iterator except that when you assign a value to it, the insert_iterator inserts the value into the container (via insert()) and increments the iterator. Similarly, there are front_insert_iterator and back_insert_iterator, which are similar to insert_iterator except that assigning a value to them causes then to call push_front and push_back, respectively, on the container. These utilities may seem a slightly abstract, but they have uses in generic programming.


End of document




================================================ FILE: doc/html/EASTL Glossary.html ================================================ EASTL Glossary

EASTL Glossary

This document provides definitions to various terms related to EASTL. Items that are capitalized are items that are used as template parameters.

adapter An adapter is something that encapsulates a component to provide another interface, such as a C++ class which makes a stack out of a list.
algorithm
Algorithms are standalone functions which manipulate data which usually but not necessarily comes from a container. Some algorithms change the data while others don't. Examples are reverse, sort, find, and remove.
associative container An associative container is a variable-sized container that supports efficient retrieval of elements (values) based on keys. It supports insertion and removal of elements, but differs from a sequence in that it does not provide a mechanism for inserting an element at a specific position. Associative containers include map, multimap, set, multiset, hash_map, hash_multimap, hash_set, hash_multiset.
array An array is a C++ container which directly implements a C-style fixed array but which adds STL container semantics to it.
basic_string A templated string class which is usually used to store char or wchar_t strings.
begin The function used by all conventional containers to return the first item in the container.
BidirectionalIterator An input iterator which is like ForwardIterator except it can be read in a backward direction as well.
BinaryOperation  A function which takes two arguments and returns a value (which will usually be assigned to a third object).
BinaryPredicate A function which takes two arguments and returns true if some criteria is met (e.g. they are equal).
binder1st, binder2nd These are function objects which convert one function object into another.  In particular, they implement a binary function whereby you can specify one of the arguments.This is a somewhat abstract concept but has its uses.
bit vector A specialized container that acts like vector<bool> but is implemented via one bit per entry. STL vector<bool> is usually implemented as a bit vector but EASTL avoids this in favor of a specific bit vector container.
bitset An extensible yet efficient implementation of bit flags. Not strictly a conventional STL container and not the same thing as vector<bool> or a bit_vector, both of which are formal iterate-able containers.
capacity Refers to the amount of total storage available in an array-based container such as vector, string, and array. Capacity is always >= container size and is > size in order to provide extra space for a container to grow into.
const_iterator An iterator whose iterated items are cannot be modified. A const_iterator is akin to a const pointer such as 'const char*'.
container A container is an object that stores other objects (its elements), and that has methods for accessing its elements. In particular, every type that is a model of container has an associated iterator type that can be used to iterate through the container's elements.
copy constructor A constructor for a type which takes another object of that type as its argument. For a hypothetical Widget class, the copy constructor is of the form Widget(const Widget& src);
Compare A function which takes two arguments and returns the lesser of the two.
deque The name deque is pronounced "deck" and stands for "double-ended queue."

A deque is very much like a vector: like vector, it is a sequence that supports random access to elements, constant time insertion and removal of elements at the end of the sequence, and linear time insertion and removal of elements in the middle.

The main way in which deque differs from vector is that deque also supports constant time insertion and removal of elements at the beginning of the sequence. Additionally, deque does not have any member functions analogous to vector's capacity() and reserve(), and does not provide the guarantees on iterator validity that are associated with those member functions.
difference_type The typedef'd type used by all conventional containers and iterators to define the distance between two iterators. It is usually the same thing as the C/C++ ptrdiff_t data type.
empty The function used by all conventional containers to tell if a container has a size of zero. In many cases empty is more efficient than checking for size() == 0.
element An element refers to a member of a container.
end The function used by all conventional containers to return one-past the last item in the container.
equal_range equal_range is a version of binary search: it attempts to find the element value in an ordered range [first, last). The value returned by equal_range is essentially a combination of the values returned by lower_bound and upper_bound: it returns a pair of iterators i and j such that i is the first position where value could be inserted without violating the ordering and j is the last position where value could be inserted without violating the ordering. It follows that every element in the range [i, j) is equivalent to value, and that [i, j) is the largest subrange of [first, last) that has this property.
explicit instantiation Explicit instantiation lets you create an instantiation of a templated class or function without actually using it in your code. Since this is useful when you are creating library files that use templates for distribution, uninstantiated template definitions are not put into object files. An example of the syntax for explicit instantiation is:
    template class vector<char>;
    template void min<int>(int, int);
    template void min(int, int);
ForwardIterator An input iterator which is like InputIterator except it can be reset back to the beginning.
Function A function which takes one argument and applies some operation to the target.
function object, functor A function object or functor is a class that has the function-call operator (operator()) defined.
Generator A function which takes no arguments and returns a value (which will usually be assigned to an object).
hash_map, hash_multimap, hash_set, hash_multiset The hash containers are implementations of map, multimap, set, and multiset via a hashtable instead of via a tree. Searches are O(1) (fast) but the container is not sorted.
heap A heap is a data structure which is not necessarily sorted but is organized such that the highest priority item is at the top. A heap is synonymous with a priority queue and has numerous applications in computer science.
InputIterator An input iterator (iterator you read from) which allows reading each element only once and only in a forward direction.
intrusive_list, intrusive_hash_map, etc. Intrusive containers are containers which don't allocate memory but instead use their contained object to manage the container's memory. While list allocates nodes (with mpPrev/mpNext pointers) that contain the list items, intrusive_list doesn't allocate nodes but instead the container items have the mpPrev/mpNext pointers.
intrusive_ptr intrusive_ptr is a smart pointer which doesn't allocate memory but instead uses the contained object to manage lifetime via addref and release functions.
iterator An iterator is the fundamental entity of reading and enumerating values in a container. Much like a pointer can be used to walk through a character array, an iterator is used to walk through a linked list.
iterator category An iterator category defines the functionality the iterator provides. The conventional iterator categories are InputIterator, ForwardIterator, BidirectionalIterator, RandomAccessIterator, and OutputIterator. See the definitions of each of these for more information.Iterator category is synonymous with iterator_tag.
iterator_tag See iterator category.
key_type, Key A Key or key_type is the identifier used by associative (a.k.a. dictionary) containers (e.g. map, hash_map) to identify the type used to index the mapped_type. If you have a dictionary of strings that you access by an integer id, the ids are the keys and the strings are the mapped types.
lexicographical compare A lexicographical compare is a comparison of two containers that compares them element by element, much like the C strcmp function compares two strings.
linked_ptr A linked_ptr is a shared smart pointer which implements object lifetime via a linked list of all linked_ptrs that are referencing the object. linked_ptr, like intrusive_ptr, is a non-memory-allocating alternative to shared_ptr.
list A list is a doubly linked list. It is a sequence that supports both forward and backward traversal, and (amortized) constant time insertion and removal of elements at the beginning or the end, or in the middle. Lists have the important property that insertion and splicing do not invalidate iterators to list elements, and that even removal invalidates only the iterators that point to the elements that are removed. The ordering of iterators may be changed (that is, list<T>::iterator might have a different predecessor or successor after a list operation than it did before), but the iterators themselves will not be invalidated or made to point to different elements unless that invalidation or mutation is explicit.
lower_bound lower_bound is a version of binary search: it attempts to find the element value in an ordered range [first, last). Specifically, it returns the first position where value could be inserted without violating the ordering.
map Map is a sorted associative container that associates objects of type Key with objects of type T. Map is a pair associative container, meaning that its value type is pair<const Key, T>. It is also a unique associative container, meaning that no two elements have the same key. It is implemented with a tree structure.
mapped_type A mapped_type is a typedef used by associative containers to identify the container object which is accessed by a key. If you have a dictionary of strings that you access by an integer id, the ids are the keys and the strings are the mapped types.
member template A member template is a templated function of a templated class. Thus with a member template function there are two levels of templating -- the class and the function.
multimap,  Multimap is a sorted associative container that associates objects of type Key with objects of type T. multimap is a pair associative container, meaning that its value type is pair<const Key, T>. It is also a multiple associative container, meaning that there is no limit on the number of elements with the same key.It is implemented with a tree structure.
multiset Multiset is a sorted associative container that stores objects of type Key. Its value type, as well as its key type, is Key. It is also a multiple associative container, meaning that two or more elements may be identical. It is implemented with a tree structure.
node A node is a little holder class used by many containers to hold the contained items. A linked-list, for example, defines a node which has three members: mpPrev, mpNext, and T (the contained object).
npos npos is used by the string class to identify a non-existent index. Some string functions return npos to indicate that the function failed.
rel_ops rel_ops refers to "relational operators" and is a set of templated functions which provide operator!= for classes that  have only operator== and provide operator > for classes that have only operator <, etc. Unfortunately, rel_ops have a habit of polluting the global operator space and creating conflicts. They must be used with discretion.
reverse_iterator A reverse_iterator is an iterator which wraps a bidirectional or random access iterator and allows the iterator to be read in reverse direction. The difference between using reverse_iterators and just decrementing regular iterators is that reverse_iterators use operator++ to move backwards and thus work in any algorithm that calls ++ to move through a container.
OutputIterator An output iterator (iterator you write to) which allows writing each element only once in only in a forward direction.
POD POD means Plain Old Data. It refers to C++ classes which act like built-in types and C structs. These are useful to distinguish because some algorithms can be made more efficient when they can detect that they are working with PODs instead of regular classes. 
Predicate A function which takes one argument returns true if the argument meets some criteria.
priority_queue A priority_queue is an adapter container which implements a heap via a random access container such as vector or deque.
queue A queue is an adapter container which implements a FIFO (first-in, first-out) container with which you can add items to the back and get items from the front.
RandomAccessIterator An input iterator which can be addressed like an array. It is a superset of all other input iterators.
red-black tree A red-black tree is a binary tree which has the property of being always balanced. The colors red and black are somewhat arbitrarily named monikers for nodes used to measure the balance of the tree. Red-black trees are considered the best all-around data structure for sorted containers.
scalar A scalar is a data type which is implemented via a numerical value. In C++ this means integers, floating point values, enumerations, and pointers. 
scoped_ptr A scoped_ptr is a smart pointer which is the same as C++ auto_ptr except that it cannot be copied.
set Set is a sorted associative container that stores objects of type Key. Its value type, as well as its key type, is Key. It is also a unique associative container, meaning that no two elements are the same.It is implemented with a tree structure.
sequence A sequence is a variable-sized container whose elements are arranged in a strict linear (though not necessarily contiguous) order. It supports insertion and removal of elements. Sequence containers include vector, deque, array, list, slist.
size All conventional containers have a size member function which returns the count of elements in the container. The efficiency of the size function differs between containers.
size_type The type that a container uses to define its size and counts. This is similar to the C/C++ size_t type but may be specialized for the container. It defaults to size_t, but it is possible to force it to be 4 bytes for 64 bit machines by defining EASTL_SIZE_T_32BIT.
skip list A skip-list is a type of container which is an alternative to a binary tree for finding data.
shared_ptr A shared_ptr is a smart pointer which allows multiple references (via multiple shared_ptrs) to the same object. When the last shared_ptr goes away, the pointer is freed. shared_ptr is implemented via a shared count between all instances.
slist An slist is like a list but is singly-linked instead of doubly-linked. It can only be iterated in a forward-direction.
smart pointer Smart pointer is a term that identifies a family of utility classes which store pointers and free them when the class instance goes out of scope. Examples of smart pointers are shared_ptr, linked_ptr, intrusive_ptr, and scoped_ptr.
splice Splicing refers to the moving of a subsequence of one Sequence into another Sequence.
stack A stack is a adapter container which implements LIFO (last-in, first, out) access via another container such as a list or deque.
STL Standard Template Library. 
StrictWeakOrdering A BinaryPredicate that compares two objects, returning true if the first precedes the second. Like Compare but has additional requirements. Used for sorting routines.

This predicate must satisfy the standard mathematical definition of a strict weak ordering. A StrictWeakOrdering has to behave the way that "less than" behaves: if a is less than b then b is not less than a, if a is less than b and b is less than c then a is less than c, and so on.
string See basic_string.
T T is the template parameter name used by most containers to identify the contained element type. 
template parameter A template parameter is the templated type used to define a template function or class. In the declaration 'template <typename T> class vector{ },'  T is a template parameter.
template specialization A template specialization is a custom version of a template which overrides the default version and provides alternative functionality, often for the purpose of providing improved or specialized functionality.
treap A tree-like structure implemented via a heap. This is an alternative to a binary tree (e.g. red-black tree), skip-list, and sorted array as a mechanism for a fast-access sorted container.
type traits Type traits are properties of types. If you have a templated type T and you want to know if it is a pointer, you would use the is_pointer type trait. If you want to know if the type is a POD, you would use the is_pod type trait. Type traits are very useful for allowing the implementation of optimized generic algorithms and for asserting that types have properties expected by the function or class contract. For example, you can use type_traits to tell if a type can be copied via memcpy instead of a slower element-by-element copy.
typename Typename is a C++ keyword used in templated function implementations which identifies to the compiler that the following expression is a type and not a value. It is used extensively in EASTL, particularly in the algorithms.
UnaryOperation A function which takes one argument and returns a value (which will usually be assigned to second object).
upper_bound upper_bound is a version of binary search: it attempts to find the element value in an ordered range [first, last). Specifically, it returns the last position where value could be inserted without violating the ordering.
value_type, Value A value_type is a typedef used by all containers to identify the elements they contain. In most cases value_type is simply the same thing as the user-supplied T template parameter. The primary exception is the associative containers whereby value_type is the pair of key_type and mapped_type.
vector A vector is a Sequence that supports random access to elements, constant time insertion and removal of elements at the end, and linear time insertion and removal of elements at the beginning or in the middle. The number of elements in a vector may vary dynamically; memory management is automatic. Vector is the simplest of the container classes, and in many cases the most efficient.
vector_map, vector_multimap, vector_set, vector_multiset These are containers that implement the functionality of map, multimap, set, and multiset via a vector or deque instead of a tree. They use less memory and find items faster, but are slower to modify and modification invalidates iterators.
weak_ptr A weak_ptr is an adjunct to shared_ptr which doesn't increment the reference on the contained object but can safely tell you if the object still exists and access it if so. It has uses in preventing circular references in shared_ptrs.


End of document








================================================ FILE: doc/html/EASTL Gotchas.html ================================================ EASTL Gotchas

EASTL Gotchas

There are some cases where the EASTL design results in "gotchas" or behavior that isn't necessarily what the new user would expect. These are all situations in which this behavior may be undesirable. One might ask, "Why not change EASTL to make these gotchas go away?" The answer is that in each case making the gotchas go away would either be impossible or would compromise the functionality of the library.

Summary

The descriptions here are intentionally terse; this is to make them easier to visually scan.

1 map::operator[] can create elements.
2 char* converts to string silently.
3 char* is compared by ptr and not by contents.
4 Iterators can be invalidated by container mutations.
5 Vector resizing may cause ctor/dtor cascades.
6 Vector and string insert/push_back/resize can reallocate.
7 Deriving from containers may not work.
8 set::iterator is const_iterator.
9 Inserting elements means copying by value.
10 Containers of pointers can leak if you aren't careful.
11 Containers of auto_ptrs can crash.
12 Remove algorithms don't actually remove elements.
13 list::size() is O(n).
14 vector and deque::size() may incur integer division.
15 Be careful making custom Compare functions.
16 Comparisons involving floating point are dangerous.
17 Writing beyond string::size and vector::size is dangerous.
18 Container operator=() doesn't copy allocators.

Detail

1 map::operator[] can create elements.

By design, map operator[] creates a value for you if it isn't already present. The reason for this is that the alternative behavior would be to throw an exception, and such behavior isn't desirable. The resolution is to simply use the map::find function instead of operator[].

2 char* converts to string silently.

The string class has a non-explicit constructor that takes char* as an argument. Thus if you pass char* to a function that takes a string object, a temporary string will be created. In some cases this is undesirable behavior but the user may not notice it right away, as the compiler gives no warnings. The reason that the string constructor from char* is not declared explicit is that doing so would prevent the user from expressions such as: string s = "hello". In this example, no temporary string object is created, but the syntax is not possible if the char* constructor is declared explicit. Thus a decision to make the string char* constructor explicit involves tradeoffs.

There is an EASTL configuration option called EASTL_STRING_EXPLICIT which makes the string char* ctor explicit and avoids the behaviour described above.

3 char* is compared by ptr and not by contents.

If you have a set of strings declared as set<char*>, the find function will compare via the pointer value and not the string contents. The workaround is to make a set of string objects or, better, to supply a custom string comparison function to the set. The workaround is not to declare a global operator< for type char*, as that could cause other systems to break.

4 Iterators can be invalidated by container mutations

With some containers, modifications of them may invalidate iterators into them. With other containers, modifications of them only an iterator if the modification involves the element that iterator refers to. Containers in the former category include vector, deque, basic_string (string), vector_map, vector_multimap, vector_set, and vector_multiset. Containers in the latter category include list, slist, map, multimap, multiset, all hash containers, and all intrusive containers.

5 Vector resizing may cause ctor/dtor cascades.

If elements are inserted into a vector in middle of the sequence, the elements from the insertion point to the end will be copied upward. This will necessarily cause a series of element constructions and destructions as the elements are copied upward. Similarly, if an element is appended to a vector but the vector capacity is exhausted and needs to be reallocated, the entire vector will undergo a construction and destruction pass as the values are copied to the new storage. This issue exists for deque as well, though to a lesser degree. For vector, the resolution is to reserve enough space in your vector to prevent such reallocation. For deque the resolution is to set its subarray size to enough to prevent such reallocation. Another solution that can often be used is to take advantage of the has_trivial_relocate type trait, which can cause such moves to happen via memcpy instead of via ctor/dtor calls. If your class can be safely memcpy'd, you can use EASTL_DECLARE_TRIVIAL_RELOCATE to tell the compiler it can be memcpy'd. Note that built-in scalars (e.g. int) already are automatically memcpy'd by EASTL.

6 Vector and string insert/push_back/resize can reallocate.

If you create an empty vector and use push_back to insert 100 elements, the vector will reallocate itself at least three or four times during the operation. This can be an undesirable thing. The best thing to do if possible is to reserve the size you will need up front in the vector constructor or before you add any elements.

7 Deriving from containers may not work.

EASTL containers are not designed with the guarantee that they can be arbitrarily subclassed. This is by design and is done for performance reasons, as such guarantees would likely involve making containers use virtual functions. However, some types of subclassing can be successful and EASTL does such subclassing internally to its advantage. The primary problem with subclassing results when a parent class function calls a function that the user wants to override. The parent class cannot see the overridden function and silent unpredictable behavior will likely occur. If your derived container acts strictly as a wrapper for the container then you will likely be able to successfully subclass it.

8 set::iterator is const_iterator.

The reason this is so is that a set is an ordered container and changing the value referred to by an iterator could make the set be out of order. Thus, set and multiset iterators are always const_iterators. If you need to change the value and are sure the change will not alter the container order, use const_cast or declare mutable member variables for your contained object. This resolution is the one blessed by the C++ standardization committee. This issue is addressed in more detail in the EASTL FAQ.

9 Inserting elements means copying by value.

When you insert an element into a (non-intrusive) container, the container makes a copy of the element. There is no provision to take over ownership of an object from the user. The exception to this is of course when you use a container of pointers instead of a container of values. See the entry below regarding containers of pointers. Intrusive containers (e.g. intrusive_list) do in fact take over the user-provided value, and thus provide another advantage over regular containers in addition to avoiding memory allocation.

10 Containers of pointers can leak if you aren't careful.

Containers of points don't know or care about the possibility that the pointer may have been allocated and need to be freed. Thus if you erase such elements from a container they are not freed. The resolution is to manually free the pointers when removing them or to instead use a container of smart pointers (shared smart pointers, in particular). This issue is addressed in more detail in the EASTL FAQ and the auto_ptr-related entry below.

11 Containers of auto_ptrs can crash

We suggested above that the user can use a container of smart pointers to automatically manage contained pointers. However, you don't want to use auto_ptr, as auto_ptrs cannot be safely assigned to each other; doing so results in a stale pointer and most likely a crash.

12 Remove algorithms don't actually remove elements.

Algorithms such as remove, remove_if, remove_heap, and unique do not erase elements from the sequences they work on. Instead, they return an iterator to the new end of the sequence and the user must call erase with that iterator in order to actually remove the elements from the container. This behavior exists because algorithms work on sequences via iterators and don't know how to work with containers. Only the container can know how to best erase its own elements. In each case, the documentation for the algorithm reminds the user of this behavior. Similarly, the copy algorithm copies elements from one sequence to another and doesn't modify the size of the destination sequence. So the destination must hold at least as many items as the source, and if it holds more items, you may want to erase the items at the end after the copy.

13 list::size() is O(n).

By this we mean that calling size() on a list will iterate the list and add the size as it goes. Thus, getting the size of a list is not a fast operation, as it requires traversing the list and counting the nodes. We could make list::size() be fast by having a member mSize variable. There are reasons for having such functionality and reasons for not having such functionality. We currently choose to not have a member mSize variable as it would add four bytes to the class, add processing to functions such as insert and erase, and would only serve to improve the size function, but no other function. The alternative argument is that the C++ standard states that std::list should be an O(1) operation (i.e. have a member size variable), most C++ standard library list implementations do so, the size is but an integer which is quick to update, and many users expect to have a fast size function. All of this applies to slist and intrusive_list as well.

Note that EASTL's config.h file has an option in it to cause list and slist to cache their size with an mSize variable and thus make size() O(1). This option is disabled by default.

14 vector and deque::size() may incur integer division.

Some containers (vector and deque in particular) calculate their size by pointer subtraction. For example, the implementation of vector::size() is 'return mpEnd - mpBegin'. This looks like a harmless subtraction, but if the size of the contained object is not an even power of two then the compiler will likely need to do an integer division to calculate the value of the subtracted pointers. One might suggest that vector use mpBegin and mnSize as member variables instead of mpBegin and mpEnd, but that would incur costs in other vector operations. The suggested workaround is to iterate a vector instead of using a for loop and operator[] and for those cases where you do use a for loop and operator[], get the size once at the beginning of the loop instead of repeatedly during the condition test.

15 Be careful making custom Compare functions.

A Compare function compares two values and returns true if the first is less than the second. This is easy to understand for integers and strings, but harder to get right for more complex structures. Many a time have people decided to come up with a fancy mechanism for comparing values and made mistakes. The FAQ has a couple entries related to this. See http://blogs.msdn.com/oldnewthing/archive/2003/10/23/55408.aspx for a story about how this can go wrong by being overly clever.

16 Comparisons involving floating point are dangerous.

Floating point comparisons between two values that are very nearly equal can result in inconsistent results. Similarly, floating point comparisons between NaN values will always generate inconsistent results, as NaNs by definition always compare as non-equal. You thus need to be careful when using comparison functions that work with floating point values. Conversions to integral values may help the problem, but not necessarily.

17 Writing beyond string::size and vector::size is dangerous.

A trick that often comes to mind when working with strings is to set the string capacity to some maximum value, strcpy data into it, and then resize the string when done. This can be done with EASTL, but only if you resize the string to the maximum value and not reserve the string to the maximum value. The reason is that when you resize a string from size (n) to size (n + count), the count characters are zeroed and overwrite the characters that you strcpyd.

The following code is broken:

string mDataDir;

mDataDir.reserve(kMaxPathLength);
strcpy(&mDataDir[0], "blah/blah/blah");
mDataDir.resize(strlen(&mDataDir[0])); // Overwrites your blah/... with 00000...

This following code is OK:

string mDataDir;

mDataDir.resize(kMaxPathLength);
strcpy(&mDataDir[0], "blah/blah/blah");
mDataDir.resize(strlen(&mDataDir[0]));

18 Container operator=() doesn't copy allocators.

EASTL container assignment (e.g. vector::operator=(const vector&)) doesn't copy the allocator. There are good and bad reasons for doing this, but that's how it acts. So you need to beware that you need to assign the allocator separately or make a container subclass which overrides opeator=() and does this.



End of document




================================================ FILE: doc/html/EASTL Introduction.html ================================================ EASTL Introduction

EASTL Introduction

EASTL stands for Electronic Arts Standard Template Library. It is a C++ template library of containers, algorithms, and iterators useful for runtime and tool development across multiple platforms. It is a fairly extensive and robust implementation of such a library and has an emphasis on high performance above all other considerations.

Intended Audience

This is a short document intended to provide a basic introduction to EASTL for those new to the concept of EASTL or STL. If you are familiar with the C++ STL or have worked with other templated container/algorithm libraries, you probably don't need to read this. If you have no familiarity with C++ templates at all, then you probably will need more than this document to get you up to speed. In this case you need to understand that templates, when used properly, are powerful vehicles for the ease of creation of optimized C++ code. A description of C++ templates is outside the scope of this documentation, but there is plenty of such documentation on the Internet. See the EASTL FAQ.html document for links to information related to learning templates and STL.

EASTL Modules

EASTL consists primarily of containers, algorithms, and iterators. An example of a container is a linked list, while an example of an algorithm is a sort function; iterators are the entities of traversal for containers and algorithms. EASTL containers a fairly large number of containers and algorithms, each of which is a very clean, efficient, and unit-tested implementation. We can say with some confidence that you are not likely to find better implementations of these (commercial or otherwise), as these are the result of years of wisdom and diligent work. For a detailed list of EASTL modules, see EASTL Modules.html.

EASTL Suitability

What uses are EASTL suitable for? Essentially any situation in tools and shipping applications where the functionality of EASTL is useful. Modern compilers are capable of producing good code with templates and many people are using them in both current generation and future generation applications on multiple platforms from embedded systems to servers and mainframes.


End of document






================================================ FILE: doc/html/EASTL Maintenance.html ================================================ EASTL Maintenance

EASTL Maintenance

Introduction

The purpose of this document is to provide some necessary background for anybody who might do work on EASTL. Writing generic templated systems like EASTL can be surprisingly tricky. There are numerous details of the C++ language that you need to understand which don't usually come into play during the day-to-day C++ coding that many people do. It is easy to make a change to some function that seems proper and works for your test case but either violates the design expectations or simply breaks under other circumstances.

It may be useful to start with an example. Here we provide an implementation of the count algorithm which is seems simple enough. Except it is wrong and while it will compile in some cases it won't compile in others:

template <class InputIterator, class T>
int count(InputIterator first, InputIterator last, const T& value)
{
     int result = 0;
 
     for(; first < last; ++first){
         if(*first == value)
             ++result;
     }
 
     return result;
 } 

The problem is with the comparison 'first < last'. The count algorithm takes an InputIterator and operator< is not guaranteed to exist for any given InputIterator (and indeed while operator< exists for vector::iterator, it doesn't exist for list::iterator). The comparison in the above algorithm must instead be implemented as 'first != last'. If we were working with a RandomAccessIterator then 'first < last' would be valid.

In the following sections we cover various topics of interest regarding the development and maintentance of EASTL. Unfortunately, this document can't cover every aspect of EASTL maintenance issues, but at least it should give you a sense of the kinds of issues.

C++ Language Standard

First and foremost, you need to be familiar with the C++ standard. In particular, the sections of the standard related to containers, algorithms, and iterators are of prime significance. We'll talk about some of this in more detail below. Similarly, a strong understanding of the basic data types is required. What is the difference between ptrdiff_t and intptr_t; unsigned int and size_t; char and signed char?

In addition to the C++ language standard, you'll want to be familiar with the C++ Defect Report. This is a continuously updated document which lists flaws in the original C++ language specification and the current thinking as the resolutions of those flaws. You will notice various references to the Defect Report in EASTL source code.

Additionally, you will want to be familiar with the C++ Technical Report 1 (as of this writing there is only one). This document is the evolving addendum to the C++ standard based on both the Defect Report and based on desired additions to the C++ language and standard library.

Additionally, you will probably want to have some familiarity with Boost. It also helps to keep an eye on comp.std.c++ Usenet discussions. However, watch out for what people say on Usenet. They tend to defend GCC, Unix, std STL, and C++ to a sometimes unreasonable degree. Many discussions ignore performance implications and concentrate only on correctness and sometimes academic correctness above usability.

Language Use

Macros are (almost) not allowed in EASTL. A prime directive of EASTL is to be easier to read by users and most of the time macros are an impedence to this. So we avoid macros at all costs, even if it ends up making our development and maintenance more difficult. That being said, you will notice that the EASTL config.h file uses macros to control various options. This is an exception to the rule; when we talk about not using macros, we mean with the EASTL implementation itself.

EASTL assumes a compliant and intelligent C++ compiler, and thus all language facilities are usable. However, we nevertheless choose to stay away from some language functionality. The primary language features we avoid are:

  • RTTI (run-time-type-identification) (this is deemed too costly)
  • Template export (few compilers support this)
  • Exception specifications (most compilers ignore them)

Use of per-platform or per-compiler code should be avoided when possible but where there is a significant advantage to be gained it can and indeed should be used. An example of this is the GCC __builtin_expect feature, which allows the user to give the compiler a hint about whether an expression is true or false. This allows for the generation of code that executes faster due to more intelligent branch prediction.

Prime Directives

The implementation of EASTL is guided foremost by the following directives which are listed in order of importance.

  1. Efficiency (speed and memory usage)
  2. Correctness (doesn't have bugs)
  3. Portability (works on all required platforms with minimal specialized code)
  4. Readability (code is legible and comments are present and useful)

Note that unlike commercial STL implementations which must put correctness above all, we put a higher value on efficiency. As a result, some functionality may have some usage limitation that is not present in other similar systems but which allows for more efficient operation, especially on the platforms of significance to us.

Portability is significant, but not critical. Yes, EASTL must compile and run on all platforms that we will ship games for. But we don't take that to mean under all compilers that could be conceivably used for such platforms. For example, Microsoft VC6 can be used to compile Windows programs, but VC6's C++ support is too weak for EASTL and so you simply cannot use EASTL under VC6.

Readability is something that EASTL achieves better than many other templated libraries, particularly Microsoft STL and STLPort. We make every attempt to make EASTL code clean and sensible. Sometimes our need to provide optimizations (particularly related to type_traits and iterator types) results in less simple code, but efficiency happens to be our prime directive and so it overrides all other considerations.

Coding Conventions

Here we provide a list of coding conventions to follow when maintaining or adding to EASTL, starting with the three language use items from above:

  • No RTTI use.
  • No use of exception specifications (e.g. appending the 'throw' declarator to a function).
  • No use of exception handling itself except where explicitly required by the implementation (e.g. vector::at).
  • Exception use needs to savvy to EASTL_EXCEPTIONS_ENABLED.
  • No use of macros (outside of config.h). Macros make things more difficult for the user.
  • No use of static or global variables.
  • No use of global new, delete, malloc, or free. All memory must be user-specifyable via an Allocator parameter (default-specified or explicitly specified).
  • Containers use protected member data and functions as opposed to private. This is because doing so allows subclasses to extend the container without the creation of intermediary functions. Recall from our prime directives above that performance and simplicity overrule all.
  • No use of multithreading primitives. 
  • No use of the export keyword.
  • We don't have a rule about C-style casts vs. C++ static_cast<>, etc. We would always use static_cast except that debuggers can't evaluate them and so in practice they can get in the way of debugging and tracing. However, if the cast is one that users don't tend to need to view in a debugger, C++ casts are preferred.
  • No external library dependencies whatsoever, including standard STL. EASTL is dependent on only EABase and the C++ compiler. 
  • All code must be const-correct. This isn't just for readability -- compilation can fail unless const-ness is used correctly everywhere. 
  • Algorithms do not refer to containers; they refer only to iterators.
  • Algorithms in general do not allocate memory. If such a situation arises, there should be a version of the algorithm which allows the user to provide the allocator.
  • No inferior implementations. No facility should be added to EASTL unless it is of professional quality.
  • The maintainer should emulate the EASTL style of code layout, regardless of the maintainer's personal preferences. When in Rome, do as the Romans do. EASTL uses 4 spaces for indents, which is how the large majority of code within EA is written.
  • No major changes should be done without consulting a peer group.

Compiler Issues

Historically, templates are the feature of C++ that has given C++ compilers the most fits. We are still working with compilers that don't completely and properly support templates. Luckily, most compilers are now good enough to handle what EASTL requires. Nevertheless, there are precautions we must take.

It turns out that the biggest problem in writing portable EASTL code is that VC++ allows you to make illegal statements which are not allowed by other compilers. For example, VC++ will allow you to neglect using the typename keyword in template references, whereas GCC (especially 3.4+) requires it.

In order to feel comfortable that your EASTL code is C++ correct and is portable, you must do at least these two things:

  • Test under at least VS2005, GCC 3.4+, GCC 4.4+, EDG, and clang.
  • Test all functions that you write, as compilers will often skip the compilation of a template function if it isn't used.

The two biggest issues to watch out for are 'typename' and a concept called "dependent names". In both cases VC++ will accept non-conforming syntax whereas most other compilers will not. Whenever you reference a templated type (and not a templated value) in a template, you need to prefix it by 'typename'. Whenever your class function refers to a base class member (data or function), you need to refer to it by "this->", "base_type::", or by placing a "using" statement in your class to declare that you will be referencing the given base class member.

Iterator Issues

The most important thing to understand about iterators is the concept of iterator types and their designated properties. In particular, we need to understand the difference between InputIterator, ForwardIterator, BidirectionalIterator, RandomAccessIterator, and OutputIterator. These differences dictate both how we implement our algorithms and how we implement our optimizations. Please read the C++ standard for a reasonably well-implemented description of these iterator types.

Here's an example from EASTL/algorithm.h which demonstrates how we use iterator types to optimize the reverse algorithm based on the kind of iterator passed to it:

template <class BidirectionalIterator>
inline void reverse_impl(BidirectionalIterator first, BidirectionalIterator last, bidirectional_iterator_tag)
{     for(; (first != last) && (first != --last); ++first) // We are not allowed to use operator <, <=, >, >= with         iter_swap(first, last);                          // a generic (bidirectional or otherwise) iterator. }
template <typename RandomAccessIterator> inline void reverse_impl(RandomAccessIterator first, RandomAccessIterator last, random_access_iterator_tag) {     for(; first < --last; ++first) // With a random access iterator, we can use operator < to more efficiently implement         iter_swap(first, last);    // this algorithm. A generic iterator doesn't necessarily have an operator < defined. }

template <class BidirectionalIterator> inline void reverse(BidirectionalIterator first, BidirectionalIterator last) {     typedef typename iterator_traits<BidirectionalIterator>::iterator_category IC;     reverse_impl(first, last, IC()); }

Exception Handling

You will notice that EASTL uses try/catch in some places (particularly in containers) and uses the EASTL_EXCEPTIONS_ENABLED define. For starters, any EASTL code that uses try/catch should always be wrapped within #if EASTL_EXCEPTIONS_ENABLED (note: #if, not #ifdef).

This is simple enough, but what you may be wondering is how it is that EASTL decides to use try/catch for some sections of code and not for others. EASTL follows the C++ standard library conventions with respect to exception handling, and you will see similar exception handling in standard STL. The code that you need to wrap in try/catch is code that can throw a C++ exception (not to be confused with CPU exception) and needs to have something unwound (or fixed) as a result. The important thing is that the container be in a valid state after encountering such exceptions. In general the kinds of things that require such try/catch are:

  • Memory allocation failures (which throw exceptions)
  • Constructor exceptions

Take a look at the cases in EASTL where try/catch is used and see what it is doing.

Type Traits

EASTL provides a facility called type_traits which is very similar to the type_traits being proposed by the C++ TR1 (see above). type_traits are useful because they tell you about properties of types at compile time. This allows you to do things such as assert that a data type is scalar or that a data type is const. The way we put them to use in EASTL is to take advantage of them to implement different pathways for functions based on types. For example, we can copy a contiguous array of scalars much faster via memcpy than we can via a for loop, though we could not safely employ the for loop for a non-trivial C++ class.

As mentioned in the GeneralOptimizations section below, EASTL should take advantage of type_traits information to the extent possible to achive maximum effiiciency.

General Optimizations

One of the primary goals of EASTL is to achieve the highest possible efficiency. In cases where EASTL functionality overlaps standard C++ STL functionality, standard STL implementations provided by compiler vendors are a benchmark upon which EASTL strives to beat. Indeed EASTL is more efficient than all other current STL implementations (with some exception in the case of some Metrowerks STL facilities). Here we list some of the things to look for when considering optimization of EASTL code These items can be considered general optimization suggestions for any code, but this particular list applies to EASTL:

  • Take advantage of type_traits to the extent possible (e.g. to use memcpy to move data instead of a for loop when possible).
  • Take advantage of iterator types to the extent possible.
  • Take advantage of the compiler's expectation that if statements are expected to evaluate as true and for loop conditions are expected to evaluate as false.
  • Make inline-friendly code. This often means avoiding temporaries to the extent possible.
  • Minimize branching (i.e. minimize 'if' statements). Where branching is used, make it so that 'if' statements execute as true.
  • Use EASTL_LIKELY/EASTL_UNLIKELY to give branch hints to the compiler when you are confident it will be beneficial.
  • Use restricted pointers (EABase's EA_RESTRICT or various compiler-specific versions of __restrict).
  • Compare unsigned values to < max instead of comparing signed values to >= 0 && < max.
  • Employ power of 2 integer math instead of math with any kind of integer.
  • Use template specialization where possible to implement improved functionality.
  • Avoid function calls when the call does something trivial. This improves debug build speed (which matters) and sometimes release build speed as well, though sometimes makes the code intent less clear. A comment next to the code saying what call it is replacing makes the intent clear without sacrificing performance.

Unit Tests

Writing robust templated containers and algorithms is difficult or impossible without a heavy unit test suite in place. EASTL has a pretty extensive set of unit tests for all containers and algorithms. While the successful automated unit testing of shipping application programs may be a difficult thing to pull off, unit testing of libraries such as this is of huge importance and cannot be understated.

  • When making a new unit test, start by copying one of the existing unit tests and follow its conventions.
  • Test containers of both scalars and classes.
  • Test algorithms on both container iterators (e.g. vector.begin()) and pointer iterators (e.g. int*).
  • Make sure that algorithm or container member functions which take iterators work with the type of iterator they claim to (InputIterator, ForwardIterator, BidirectionalIterator, RandomAccessIterator). 
  • Test for const-correctness. If a user is allowed to modify something that is supposed to be const, silent errors can go undetected.
  • Make sure that unit tests cover all functions and all pathways of the tested code. This means that in writing the unit test you need to look at the source code to understand all the pathways.
  • Consider using a random number generator (one is provided in the test library) to do 'monkey' testing whereby unexpected input is given to a module being tested. When doing so, make sure you seed the generator in a way that problems can be reproduced.
  • While we avoid macros in EASTL user code, macros to assist in unit tests aren't considered a problem. However, consider that a number of macros could be replaced by templated functions and thus be easier to work with.
  • Unit tests don't need to be efficient; feel free to take up all the CPU power and time you need to test a module sufficiently.
  • EASTL containers are not thread-safe, by design. Thus there is no need to do multithreading tests as long as you stay away from the usage of static and global variables.
  • Unit tests must succeed with no memory leaks and of course no memory corruption. The heap system should be configured to test for this, and heap validation functions are available to the unit tests while in the middle of runs.

Things to Keep in Mind

  • When referring to EASTL functions and types from EASTL code, make sure to preface the type with the EASTL namespace. If you don't do this you can get collisions due to the compiler not knowing if it should use the EASTL namespace or the namespace of the templated type for the function or type.
  • Newly constructed empty containers do no memory allocation. Some STL and other container libraries allocate an initial node from the class memory allocator. EASTL containers by design never do this. If a container needs an initial node, that node should be made part of the container itself or be a static empty node object.
  • Empty containers (new or otherwise) contain no constructed objects, including those that might be in an 'end' node. Similarly, no user object (e.g. of type T) should be constructed unless required by the design and unless documented in the cotainer/algorithm contract. 
  • When creating a new container class, it's best to copy from an existing similar class to the extent possible. This helps keep the library consistent and resolves subtle problems that can happen in the construction of containers.
  • Be very careful about tweaking the code. It's easy to think (for example) that a > could be switch to a >= where instead it is a big deal. Just about every line of code in EASTL has been thought through and has a purpose. Unit tests may or may not currently test every bit of EASTL, so you can't necessarily rely on them to give you 100% confidence in changes. If you are not sure about something, contact the original author and he will tell you for sure.
  • Algorithm templates always work with iterators and not containers. A given container may of course implement an optimized form or an algorithm itself.
  • Make sure everything is heavily unit tested. If somebody finds a bug, fix the bug and make a unit test to make sure the bug doesn't happen again.
  • It's easy to get iterator categories confused or forgotten while implementing algorithms and containers.
  • Watch out for the strictness of GCC 3.4+. There is a bit of syntax — especially related to templates — that other compilers accept but GCC 3.4+ will not.
  • Don't forget to update the config.h EASTL_VERSION define before publishing.
  • The vector and string classes define iterator to be T*. We want to always leave this so — at least in release builds — as this gives some algorithms an advantage that optimizers cannot get around.






================================================ FILE: doc/html/EASTL Modules.html ================================================ EASTL Modules

EASTL Modules

Introduction

We provide here a list of all top-level modules present or planned for future presence in EASTL. In some cases (e.g. algorithm), the module consists of many smaller submodules which are not described in detail here. In those cases you should consult the source code for those modules or consult the detailed documentation for those modules. This document is a high level overview and not a detailed document.

Module List

 Module Description
config Configuration header. Allows for changing some compile-time options.
slist
fixed_slist
Singly-linked list.
fixed_slist is a version which is implemented via a fixed block of contiguous memory.
list
fixed_list
Doubly-linked list.
intrusive_list
intrusive_slist
List whereby the contained item provides the node implementation.
array Wrapper for a C-style array which extends it to act like an STL container.
vector
fixed_vector
Resizable array container.
vector_set
vector_multiset
Set implemented via a vector instead of a tree. Speed and memory use is improved but resizing is slower.
vector_map
vector_multimap
Map implemented via a vector instead of a tree. Speed and memory use is improved but resizing is slower.
deque
Double-ended queue, but also with random access. Acts like a vector but insertions and removals are efficient.
bit_vector Implements a vector of bool, but the actual storage is done with one bit per bool. Not the same thing as a bitset.
bitset Implements an efficient arbitrarily-sized bitfield. Note that this is not strictly the same thing as a vector of bool (bit_vector), as it is optimized to act like an arbitrary set of flags and not to be a generic container which can be iterated, inserted, removed, etc.
set
multiset
fixed_set
fixed_multiset
A set is a sorted unique collection, multiset is sorted but non-unique collection.
map
multimap
fixed_map
fixed_multimap
A map is a sorted associative collection implemented via a tree. It is also known as dictionary.
hash_map
hash_multimap
fixed_hash_map
fixed_hash_multimap
Map implemented via a hash table.
intrusive_hash_map
intrusive_hash_multimap
intrusive_hash_set
intrusive_hash_multiset
hash_map whereby the contained item provides the node implementation, much like intrusive_list.
hash_set
hash_multiset
fixed_hash_set
fixed_hash_map
Set implemented via a hash table.
basic_string
fixed_string
fixed_substring
basic_string is a character string/array.
fixed_substring is a string which is a reference to a range within another string or character array.
cow_string is a string which implements copy-on-write.
algorithm min/max, find, binary_search, random_shuffle, reverse, etc. 
sort
Sorting functionality, including functionality not in STL. quick_sort, heap_sort, merge_sort, shell_sort, insertion_sort, etc.
numeric Numeric algorithms: accumulate, inner_product, partial_sum, adjacent_difference, etc.
heap
Heap structure functionality: make_heap, push_heap, pop_heap, sort_heap, is_heap, remove_heap, etc.
stack
Adapts any container into a stack.
queue
Adapts any container into a queue.
priority_queue
Implements a conventional priority queue via a heap structure.
type_traits Type information, useful for writing optimized and robust code. Also used for implementing optimized containers and algorithms.
utility
pair, make_pair, rel_ops, etc.
functional
Function objects.
iterator
Iteration for containers and algorithms.
smart_ptr Smart pointers: shared_ptr, shared_array, weak_ptr, scoped_ptr, scoped_array, linked_ptr, linked_array, intrusive_ptr.

 

Module Behaviour

The overhead sizes listed here refer to an optimized release build; debug builds may add some additional overhead. Some of the overhead sizes may be off by a little bit (usually at most 4 bytes). This is because the values reported here are those that refer to when EASTL's container optimizations have been complete. These optimizations may not have been completed as you are reading this.

Container

Stores

Container Overhead (32 bit) Container Overhead (64 bit)

Node Overhead (32 bit)

Node Overhead (64 bit)

Iterator category

size() efficiency operator[] efficiency

Insert efficiency

Erase via Iterator efficiency

Find efficiency

Sort efficiency

slist T 8 16 4 8 f n - 1 1 n n+

list

T

12 24

8

16

b

n -

1

1

n

n log(n)

intrusive_slist T 4 8 4 8 f n - 1 1 n n+
intrusive_list T 8 16 8 16 b n - 1 1 n n log(n)
array T 0 0 0 0 r 1 1 - - n n log(n)
vector T 16 32 0 0 r 1 1 1 at end, else n 1 at end, else n n n log(n)
vector_set T 16 32 0 0 r 1 1 1 at end, else n 1 at end, else n log(n) 1
vector_multiset T 16 32 0 0 r 1 1 1 at end, else n 1 at end, else n log(n) 1
vector_map Key, T 16 32 0 0 r 1 1 1 at end, else n 1 at end, else n log(n) 1
vector_multimap Key, T 16 32 0 0 r 1 1 1 at end, else n 1 at end, else n log(n) 1
deque T 44 84 0 0 r 1 1 1 at begin or end,
else n / 2
1 at begin or end,
else n / 2
n n log(n)
bit_vector bool 8 16 0 0 r 1 1 1 at end, else n 1 at end, else n n n log(n)
string (all types) T 16 32 0 0 r 1 1 1 at end, else n 1 at end, else n n n log(n)
set T 24 44 16 28 b 1 - log(n) log(n) log(n) 1
multiset T 24 44 16 28 b 1 - log(n) log(n) log(n) 1
map Key, T 24 44 16 28 b 1 log(n) log(n) log(n) log(n) 1
multimap Key, T 24 44 16 28 b 1 - log(n) log(n) log(n) 1
hash_set T 16 20 4 8 b 1 - 1 1 1 -
hash_multiset T 16 20 4 8 b 1 - 1
1 1 -
hash_map Key, T 16 20 4 8 b 1 - 1 1 1 -
hash_multimap Key, T 16 20 4 8 b 1 - 1 1 1 -
intrusive_hash_set T 16 20 4 8 b 1 - 1 1 1 -
intrusive_hash_multiset T 16 20 4 8 b 1 - 1 1 1 -
intrusive_hash_map T (Key == T) 16 20 4 8 b 1 - 1 1 1 -
intrusive_hash_multimap T (Key == T)  16 20 4 8 b 1 - 1 1 1 -
  • - means that the operation does not exist.
  • 1 means amortized constant time. Also known as O(1)
  • n means time proportional to the container size. Also known as O(n)
  • log(n) means time proportional to the natural logarithm of the container size. Also known as O(log(n))
  • n log(n) means time proportional to log(n) times the size of the container. Also known as O(n log(n))
  • n+ means that the time is at least n, and possibly higher.
  • Iterator meanings are: f = forward iterator; b = bidirectional iterator, r = random iterator.
  • Overhead indicates approximate per-element overhead memory required in bytes. Overhead doesn't include possible additional overhead that may be imposed by the memory heap used to allocate nodes. General heaps tend to have between 4 and 16 bytes of overhead per allocation, depending on the heap.
  • Some overhead values are dependent on the structure alignment characteristics in effect. The values reported here are those that would be in effect for a system that requires pointers to be aligned on boundaries of their size and allocations with a minimum of 4 bytes (thus one byte values get rounded up to 4).
  • Some overhead values are dependent on the size_type used by containers. size_type defaults to size_t, but it is possible to force it to be 4 bytes for 64 bit machines by defining EASTL_SIZE_T_32BIT.
  • Inserting at the end of a vector may cause the vector to be resized; resizing a vector is O(n). However, the amortized time complexity for vector insertions at the end is constant.
  • Sort assumes the usage of the best possible sort for a large container of random data. Some sort algorithms (e.g. quick_sort) require random access iterators and so the sorting of some containers requires a different sort algorithm. We do not include bucket or radix sorts, as they are always O(n).
  • Some containers (e.g. deque, hash*) have unusual data structures that make per-container and per-node overhead calculations not quite account for all memory.

End of document

























================================================ FILE: doc/html/EASTLDoc.css ================================================ body { font-family: Georgia, "Times New Roman", Times, serif; font-size: 12pt; } h1 { font-family: Verdana, Arial, Helvetica, sans-serif; display: block; background-color: #BBCCDD; border: 2px solid #000000; font-size: 16pt; font-weight: bold; padding: 6px; } h2 { font-size: 14pt; font-family: Verdana; border-bottom: 2px solid black; } h3 { font-family: Verdana; font-size: 13pt; font-weight: bold; } .code-example { display: block; background-color: #D1DDE9; margin-left: 3em; margin-right: 3em; margin-top: 1em; margin-bottom: 1em; padding: 8px; border: 2px solid #7993C8; font-family: "Courier New", Courier, mono; font-size: 10pt; white-space: pre; } .code-example-span { font-family: "Courier New", Courier, mono; font-size: 10pt; white-space: pre; } .code-example-comment { background-color: #e0e0f0; padding: 0px 0px; font-family: "Courier New", Courier, mono; font-size: 10pt; white-space: pre; color: #999999; margin: auto auto; } .faq-question { background-color: #D9E2EC; font-size: 12pt; font-weight: bold; margin-top: 0em; padding-left:5px; padding-right:8px; padding-top:2px; padding-bottom:3px; margin-bottom: 0.5em; } .faq-answer { display: block; margin: 4pt 1em 0.8em; } .indented { margin-left: 50px; } ================================================ FILE: include/EASTL/algorithm.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // This file implements some of the primary algorithms from the C++ STL // algorithm library. These versions are just like that STL versions and so // are redundant. They are provided solely for the purpose of projects that // either cannot use standard C++ STL or want algorithms that have guaranteed // identical behaviour across platforms. /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Definitions // // You will notice that we are very particular about the templated typenames // we use here. You will notice that we follow the C++ standard closely in // these respects. Each of these typenames have a specific meaning; // this is why we don't just label templated arguments with just letters // such as T, U, V, A, B. Here we provide a quick reference for the typenames // we use. See the C++ standard, section 25-8 for more details. // -------------------------------------------------------------- // typename Meaning // -------------------------------------------------------------- // T The value type. // Compare A function which takes two arguments and returns the lesser of the two. // Predicate A function which takes one argument returns true if the argument meets some criteria. // BinaryPredicate A function which takes two arguments and returns true if some criteria is met (e.g. they are equal). // StrickWeakOrdering A BinaryPredicate that compares two objects, returning true if the first precedes the second. Like Compare but has additional requirements. Used for sorting routines. // Function A function which takes one argument and applies some operation to the target. // Size A count or size. // Generator A function which takes no arguments and returns a value (which will usually be assigned to an object). // UnaryOperation A function which takes one argument and returns a value (which will usually be assigned to second object). // BinaryOperation A function which takes two arguments and returns a value (which will usually be assigned to a third object). // InputIterator An input iterator (iterator you read from) which allows reading each element only once and only in a forward direction. // ForwardIterator An input iterator which is like InputIterator except it can be reset back to the beginning. // BidirectionalIterator An input iterator which is like ForwardIterator except it can be read in a backward direction as well. // RandomAccessIterator An input iterator which can be addressed like an array. It is a superset of all other input iterators. // OutputIterator An output iterator (iterator you write to) which allows writing each element only once in only in a forward direction. // // Note that with iterators that a function which takes an InputIterator will // also work with a ForwardIterator, BidirectionalIterator, or RandomAccessIterator. // The given iterator type is merely the -minimum- supported functionality the // iterator must support. /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Optimizations // // There are a number of opportunities for optimizations that we take here // in this library. The most obvious kinds are those that subsitute memcpy // in the place of a conventional loop for data types with which this is // possible. The algorithms here are optimized to a higher level than currently // available C++ STL algorithms from vendors such as Microsoft. This is especially // so for game programming on console devices, as we do things such as reduce // branching relative to other STL algorithm implementations. However, the // proper implementation of these algorithm optimizations is a fairly tricky // thing. // // The various things we look to take advantage of in order to implement // optimizations include: // - Taking advantage of random access iterators. // - Taking advantage of trivially copyable data types (types for which it is safe to memcpy or memmove). // - Taking advantage of type_traits in general. // - Reducing branching and taking advantage of likely branch predictions. // - Taking advantage of issues related to pointer and reference aliasing. // - Improving cache coherency during memory accesses. // - Making code more likely to be inlinable by the compiler. // /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Supported Algorithms // // Algorithms that we implement are listed here. Note that these items are not // all within this header file, as we split up the header files in order to // improve compilation performance. Items marked with '+' are items that are // extensions which don't exist in the C++ standard. // // ------------------------------------------------------------------------------- // Algorithm Notes // ------------------------------------------------------------------------------- // adjacent_find // adjacent_find // all_of C++11 // any_of C++11 // none_of C++11 // binary_search // binary_search // +binary_search_i // +binary_search_i // +change_heap Found in heap.h // +change_heap Found in heap.h // clamp // copy // copy_if C++11 // copy_n C++11 // copy_backward // count // count_if // equal // equal // equal_range // equal_range // fill // fill_n // find // find_end // find_end // find_first_of // find_first_of // +find_first_not_of // +find_first_not_of // +find_last_of // +find_last_of // +find_last_not_of // +find_last_not_of // find_if // find_if_not // for_each // generate // generate_n // +identical // +identical // iter_swap // lexicographical_compare // lexicographical_compare // lexicographical_compare_three_way // lower_bound // lower_bound // make_heap Found in heap.h // make_heap Found in heap.h // min // min // max // max // +min_alt Exists to work around the problem of conflicts with min/max #defines on some systems. // +min_alt // +max_alt // +max_alt // +median // +median // merge Found in sort.h // merge Found in sort.h // min_element // min_element // max_element // max_element // mismatch // mismatch // move // move_backward // nth_element Found in sort.h // nth_element Found in sort.h // partial_sort Found in sort.h // partial_sort Found in sort.h // push_heap Found in heap.h // push_heap Found in heap.h // pop_heap Found in heap.h // pop_heap Found in heap.h // random_shuffle // remove // remove_if // +apply_and_remove // +apply_and_remove_if // remove_copy // remove_copy_if // +remove_heap Found in heap.h // +remove_heap Found in heap.h // replace // replace_if // replace_copy // replace_copy_if // reverse_copy // reverse // random_shuffle // rotate // rotate_copy // search // search // search_n // set_difference // set_difference // set_difference_2 // set_difference_2 // set_decomposition // set_decomposition // set_intersection // set_intersection // set_symmetric_difference // set_symmetric_difference // set_union // set_union // sort Found in sort.h // sort Found in sort.h // sort_heap Found in heap.h // sort_heap Found in heap.h // stable_sort Found in sort.h // stable_sort Found in sort.h // partition Found in sort.h // stable_partition Found in sort.h // swap // swap_ranges // transform // transform // unique // unique // upper_bound // upper_bound // includes // includes // is_permutation // is_permutation // next_permutation // next_permutation // is_partitioned // partition_point // // Algorithms from the C++ standard that we don't implement are listed here. // Most of these items are absent because they aren't used very often. // They also happen to be the more complicated than other algorithms. // However, we can implement any of these functions for users that might // need them. // inplace_merge // inplace_merge // partial_sort_copy // partial_sort_copy // prev_permutation // prev_permutation // search_n // unique_copy // unique_copy // /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_ALGORITHM_H #define EASTL_ALGORITHM_H #include #include #include #include #include #include #include #include #include #include #include EA_DISABLE_ALL_VC_WARNINGS(); #if defined(EA_COMPILER_MSVC) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) #include #endif #include #include // memcpy, memcmp, memmove EA_RESTORE_ALL_VC_WARNINGS(); #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif /////////////////////////////////////////////////////////////////////////////// // min/max workaround // // MSVC++ has #defines for min/max which collide with the min/max algorithm // declarations. The following may still not completely resolve some kinds of // problems with MSVC++ #defines, though it deals with most cases in production // game code. // #if EASTL_NOMINMAX #ifdef min #undef min #endif #ifdef max #undef max #endif #endif namespace eastl { /// min_element /// /// min_element finds the smallest element in the range [first, last). /// It returns the first iterator i in [first, last) such that no other /// iterator in [first, last) points to a value smaller than *i. /// The return value is last if and only if [first, last) is an empty range. /// /// Returns: The first iterator i in the range [first, last) such that /// for any iterator j in the range [first, last) the following corresponding /// condition holds: !(*j < *i). /// /// Complexity: Exactly 'max((last - first) - 1, 0)' applications of the /// corresponding comparisons. /// template ForwardIterator min_element(ForwardIterator first, ForwardIterator last) { if(first != last) { ForwardIterator currentMin = first; while(++first != last) { if(*first < *currentMin) currentMin = first; } return currentMin; } return first; } /// min_element /// /// min_element finds the smallest element in the range [first, last). /// It returns the first iterator i in [first, last) such that no other /// iterator in [first, last) points to a value smaller than *i. /// The return value is last if and only if [first, last) is an empty range. /// /// Returns: The first iterator i in the range [first, last) such that /// for any iterator j in the range [first, last) the following corresponding /// conditions hold: compare(*j, *i) == false. /// /// Complexity: Exactly 'max((last - first) - 1, 0)' applications of the /// corresponding comparisons. /// template ForwardIterator min_element(ForwardIterator first, ForwardIterator last, Compare compare) { if(first != last) { ForwardIterator currentMin = first; while(++first != last) { if(compare(*first, *currentMin)) currentMin = first; } return currentMin; } return first; } /// max_element /// /// max_element finds the largest element in the range [first, last). /// It returns the first iterator i in [first, last) such that no other /// iterator in [first, last) points to a value greater than *i. /// The return value is last if and only if [first, last) is an empty range. /// /// Returns: The first iterator i in the range [first, last) such that /// for any iterator j in the range [first, last) the following corresponding /// condition holds: !(*i < *j). /// /// Complexity: Exactly 'max((last - first) - 1, 0)' applications of the /// corresponding comparisons. /// template ForwardIterator max_element(ForwardIterator first, ForwardIterator last) { if(first != last) { ForwardIterator currentMax = first; while(++first != last) { if(*currentMax < *first) currentMax = first; } return currentMax; } return first; } /// max_element /// /// max_element finds the largest element in the range [first, last). /// It returns the first iterator i in [first, last) such that no other /// iterator in [first, last) points to a value greater than *i. /// The return value is last if and only if [first, last) is an empty range. /// /// Returns: The first iterator i in the range [first, last) such that /// for any iterator j in the range [first, last) the following corresponding /// condition holds: compare(*i, *j) == false. /// /// Complexity: Exactly 'max((last - first) - 1, 0)' applications of the /// corresponding comparisons. /// template ForwardIterator max_element(ForwardIterator first, ForwardIterator last, Compare compare) { if(first != last) { ForwardIterator currentMax = first; while(++first != last) { if(compare(*currentMax, *first)) currentMax = first; } return currentMax; } return first; } #if EASTL_MINMAX_ENABLED /// min /// /// Min returns the lesser of its two arguments; it returns the first /// argument if neither is less than the other. The two arguments are /// compared with operator <. /// /// This min and our other min implementations are defined as returning: /// b < a ? b : a /// which for example may in practice result in something different than: /// b <= a ? b : a /// in the case where b is different from a (though they compare as equal). /// We choose the specific ordering here because that's the ordering /// done by other STL implementations. /// /// Some compilers (e.g. VS20003 - VS2013) generate poor code for the case of /// scalars returned by reference, so we provide a specialization for those cases. /// The specialization returns T by value instead of reference, which is /// not that the Standard specifies. The Standard allows you to use /// an expression like &max(x, y), which would be impossible in this case. /// However, we have found no actual code that uses min or max like this and /// this specialization causes no problems in practice. Microsoft has acknowledged /// the problem and may fix it for a future VS version. /// template inline EA_CONSTEXPR typename eastl::enable_if::value, T>::type min(T a, T b) { return b < a ? b : a; } template inline EA_CONSTEXPR typename eastl::enable_if::value, const T&>::type min(const T& a, const T& b) { return b < a ? b : a; } inline EA_CONSTEXPR float min(float a, float b) { return b < a ? b : a; } inline EA_CONSTEXPR double min(double a, double b) { return b < a ? b : a; } inline EA_CONSTEXPR long double min(long double a, long double b) { return b < a ? b : a; } #endif // EASTL_MINMAX_ENABLED /// min_alt /// /// This is an alternative version of min that avoids any possible /// collisions with Microsoft #defines of min and max. /// /// See min(a, b) for detailed specifications. /// template inline EA_CONSTEXPR typename eastl::enable_if::value, T>::type min_alt(T a, T b) { return b < a ? b : a; } template inline typename eastl::enable_if::value, const T&>::type min_alt(const T& a, const T& b) { return b < a ? b : a; } inline EA_CONSTEXPR float min_alt(float a, float b) { return b < a ? b : a; } inline EA_CONSTEXPR double min_alt(double a, double b) { return b < a ? b : a; } inline EA_CONSTEXPR long double min_alt(long double a, long double b) { return b < a ? b : a; } #if EASTL_MINMAX_ENABLED /// min /// /// Min returns the lesser of its two arguments; it returns the first /// argument if neither is less than the other. The two arguments are /// compared with the Compare function (or function object), which /// takes two arguments and returns true if the first is less than /// the second. /// /// See min(a, b) for detailed specifications. /// /// Example usage: /// struct A{ int a; }; /// struct Struct{ bool operator()(const A& a1, const A& a2){ return a1.a < a2.a; } }; /// /// A a1, a2, a3; /// a3 = min(a1, a2, Struct()); /// /// Example usage: /// struct B{ int b; }; /// inline bool Function(const B& b1, const B& b2){ return b1.b < b2.b; } /// /// B b1, b2, b3; /// b3 = min(b1, b2, Function); /// template inline const T& min(const T& a, const T& b, Compare compare) { return compare(b, a) ? b : a; } #endif // EASTL_MINMAX_ENABLED /// min_alt /// /// This is an alternative version of min that avoids any possible /// collisions with Microsoft #defines of min and max. /// /// See min(a, b) for detailed specifications. /// template inline const T& min_alt(const T& a, const T& b, Compare compare) { return compare(b, a) ? b : a; } #if EASTL_MINMAX_ENABLED /// max /// /// Max returns the greater of its two arguments; it returns the first /// argument if neither is greater than the other. The two arguments are /// compared with operator < (and not operator >). /// /// This min and our other min implementations are defined as returning: /// a < b ? b : a /// which for example may in practice result in something different than: /// a <= b ? b : a /// in the case where b is different from a (though they compare as equal). /// We choose the specific ordering here because that's the ordering /// done by other STL implementations. /// template inline EA_CONSTEXPR typename eastl::enable_if::value, T>::type max(T a, T b) { return a < b ? b : a; } template inline EA_CONSTEXPR typename eastl::enable_if::value, const T&>::type max(const T& a, const T& b) { return a < b ? b : a; } inline EA_CONSTEXPR float max(float a, float b) { return a < b ? b : a; } inline EA_CONSTEXPR double max(double a, double b) { return a < b ? b : a; } inline EA_CONSTEXPR long double max(long double a, long double b) { return a < b ? b : a; } #endif // EASTL_MINMAX_ENABLED /// max_alt /// /// This is an alternative version of max that avoids any possible /// collisions with Microsoft #defines of min and max. /// template inline EA_CONSTEXPR typename eastl::enable_if::value, T>::type max_alt(T a, T b) { return a < b ? b : a; } template inline EA_CONSTEXPR typename eastl::enable_if::value, const T&>::type max_alt(const T& a, const T& b) { return a < b ? b : a; } inline EA_CONSTEXPR float max_alt(float a, float b) { return a < b ? b : a; } inline EA_CONSTEXPR double max_alt(double a, double b) { return a < b ? b : a; } inline EA_CONSTEXPR long double max_alt(long double a, long double b) { return a < b ? b : a; } #if EASTL_MINMAX_ENABLED /// max /// /// Min returns the lesser of its two arguments; it returns the first /// argument if neither is less than the other. The two arguments are /// compared with the Compare function (or function object), which /// takes two arguments and returns true if the first is less than /// the second. /// template inline const T& max(const T& a, const T& b, Compare compare) { return compare(a, b) ? b : a; } #endif /// max_alt /// /// This is an alternative version of max that avoids any possible /// collisions with Microsoft #defines of min and max. /// template inline const T& max_alt(const T& a, const T& b, Compare compare) { return compare(a, b) ? b : a; } /// min(std::initializer_list) /// template T min(std::initializer_list ilist) { return *eastl::min_element(ilist.begin(), ilist.end()); } /// min(std::initializer_list, Compare) /// template T min(std::initializer_list ilist, Compare compare) { return *eastl::min_element(ilist.begin(), ilist.end(), compare); } /// max(std::initializer_list) /// template T max(std::initializer_list ilist) { return *eastl::max_element(ilist.begin(), ilist.end()); } /// max(std::initializer_list, Compare) /// template T max(std::initializer_list ilist, Compare compare) { return *eastl::max_element(ilist.begin(), ilist.end(), compare); } /// minmax_element /// /// Returns: make_pair(first, first) if [first, last) is empty, otherwise make_pair(m, M), /// where m is the first iterator in [first,last) such that no iterator in the range /// refers to a smaller element, and where M is the last iterator in [first,last) such /// that no iterator in the range refers to a larger element. /// /// Complexity: At most max([(3/2)*(N - 1)], 0) applications of the corresponding predicate, /// where N is distance(first, last). /// template eastl::pair minmax_element(ForwardIterator first, ForwardIterator last, Compare compare) { eastl::pair result(first, first); if(!(first == last) && !(++first == last)) { if(compare(*first, *result.first)) { result.second = result.first; result.first = first; } else result.second = first; while(++first != last) { ForwardIterator i = first; if(++first == last) { if(compare(*i, *result.first)) result.first = i; else if(!compare(*i, *result.second)) result.second = i; break; } else { if(compare(*first, *i)) { if(compare(*first, *result.first)) result.first = first; if(!compare(*i, *result.second)) result.second = i; } else { if(compare(*i, *result.first)) result.first = i; if(!compare(*first, *result.second)) result.second = first; } } } } return result; } template eastl::pair minmax_element(ForwardIterator first, ForwardIterator last) { typedef typename eastl::iterator_traits::value_type value_type; return eastl::minmax_element(first, last, eastl::less()); } /// minmax /// /// Requires: Type T shall be LessThanComparable. /// Returns: pair(b, a) if b is smaller than a, and pair(a, b) otherwise. /// Remarks: Returns pair(a, b) when the arguments are equivalent. /// Complexity: Exactly one comparison. /// // The following optimization is a problem because it changes the return value in a way that would break // users unless they used auto (e.g. auto result = minmax(17, 33); ) // // template // inline EA_CONSTEXPR typename eastl::enable_if::value, eastl::pair >::type // minmax(T a, T b) // { // return (b < a) ? eastl::make_pair(b, a) : eastl::make_pair(a, b); // } // // template // inline typename eastl::enable_if::value, eastl::pair >::type // minmax(const T& a, const T& b) // { // return (b < a) ? eastl::make_pair(b, a) : eastl::make_pair(a, b); // } // It turns out that the following conforming definition of minmax generates a warning when used with VC++ up // to at least VS2012. The VS2012 version of minmax is a broken and non-conforming definition, and we don't // want to do that. We could do it for scalars alone, though we'd have to decide if we are going to do that // for all compilers, because it changes the return value from a pair of references to a pair of values. template inline eastl::pair minmax(const T& a, const T& b) { return (b < a) ? eastl::make_pair(b, a) : eastl::make_pair(a, b); } template eastl::pair minmax(const T& a, const T& b, Compare compare) { return compare(b, a) ? eastl::make_pair(b, a) : eastl::make_pair(a, b); } template eastl::pair minmax(std::initializer_list ilist) { typedef typename std::initializer_list::iterator iterator_type; eastl::pair iteratorPair = eastl::minmax_element(ilist.begin(), ilist.end()); return eastl::make_pair(*iteratorPair.first, *iteratorPair.second); } template eastl::pair minmax(std::initializer_list ilist, Compare compare) { typedef typename std::initializer_list::iterator iterator_type; eastl::pair iteratorPair = eastl::minmax_element(ilist.begin(), ilist.end(), compare); return eastl::make_pair(*iteratorPair.first, *iteratorPair.second); } template inline T&& median_impl(T&& a, T&& b, T&& c) { if(a < b) { if(b < c) return eastl::forward(b); else if(a < c) return eastl::forward(c); else return eastl::forward(a); } else if(a < c) return eastl::forward(a); else if(b < c) return eastl::forward(c); return eastl::forward(b); } /// median /// /// median finds which element of three (a, b, d) is in-between the other two. /// If two or more elements are equal, the first (e.g. a before b) is chosen. /// /// Complexity: Either two or three comparisons will be required, depending /// on the values. /// template inline const T& median(const T& a, const T& b, const T& c) { return median_impl(a, b, c); } /// median /// /// median finds which element of three (a, b, d) is in-between the other two. /// If two or more elements are equal, the first (e.g. a before b) is chosen. /// /// Complexity: Either two or three comparisons will be required, depending /// on the values. /// template inline T&& median(T&& a, T&& b, T&& c) { return eastl::forward(median_impl(eastl::forward(a), eastl::forward(b), eastl::forward(c))); } template inline T&& median_impl(T&& a, T&& b, T&& c, Compare compare) { if(compare(a, b)) { if(compare(b, c)) return eastl::forward(b); else if(compare(a, c)) return eastl::forward(c); else return eastl::forward(a); } else if(compare(a, c)) return eastl::forward(a); else if(compare(b, c)) return eastl::forward(c); return eastl::forward(b); } /// median /// /// median finds which element of three (a, b, d) is in-between the other two. /// If two or more elements are equal, the first (e.g. a before b) is chosen. /// /// Complexity: Either two or three comparisons will be required, depending /// on the values. /// template inline const T& median(const T& a, const T& b, const T& c, Compare compare) { return median_impl(a, b, c, compare); } /// median /// /// median finds which element of three (a, b, d) is in-between the other two. /// If two or more elements are equal, the first (e.g. a before b) is chosen. /// /// Complexity: Either two or three comparisons will be required, depending /// on the values. /// template inline T&& median(T&& a, T&& b, T&& c, Compare compare) { return eastl::forward(median_impl(eastl::forward(a), eastl::forward(b), eastl::forward(c), compare)); } /// all_of /// /// Returns: true if the unary predicate p returns true for all elements in the range [first, last) /// template inline bool all_of(InputIterator first, InputIterator last, Predicate p) { for(; first != last; ++first) { if(!p(*first)) return false; } return true; } /// any_of /// /// Returns: true if the unary predicate p returns true for any of the elements in the range [first, last) /// template inline bool any_of(InputIterator first, InputIterator last, Predicate p) { for(; first != last; ++first) { if(p(*first)) return true; } return false; } /// none_of /// /// Returns: true if the unary predicate p returns true for none of the elements in the range [first, last) /// template inline bool none_of(InputIterator first, InputIterator last, Predicate p) { for(; first != last; ++first) { if(p(*first)) return false; } return true; } /// adjacent_find /// /// Returns: The first iterator i such that both i and i + 1 are in the range /// [first, last) for which the following corresponding conditions hold: *i == *(i + 1). /// Returns last if no such iterator is found. /// /// Complexity: Exactly 'find(first, last, value) - first' applications of the corresponding predicate. /// template inline ForwardIterator adjacent_find(ForwardIterator first, ForwardIterator last) { if(first != last) { ForwardIterator i = first; for(++i; i != last; ++i) { if(*first == *i) return first; first = i; } } return last; } /// adjacent_find /// /// Returns: The first iterator i such that both i and i + 1 are in the range /// [first, last) for which the following corresponding conditions hold: predicate(*i, *(i + 1)) != false. /// Returns last if no such iterator is found. /// /// Complexity: Exactly 'find(first, last, value) - first' applications of the corresponding predicate. /// template inline ForwardIterator adjacent_find(ForwardIterator first, ForwardIterator last, BinaryPredicate predicate) { if(first != last) { ForwardIterator i = first; for(++i; i != last; ++i) { if(predicate(*first, *i)) return first; first = i; } } return last; } /// shuffle /// /// New for C++11 /// Randomizes a sequence of values via a user-supplied UniformRandomNumberGenerator. /// The difference between this and the original random_shuffle function is that this uses the more /// advanced and flexible UniformRandomNumberGenerator interface as opposed to the more /// limited RandomNumberGenerator interface of random_shuffle. /// /// Effects: Shuffles the elements in the range [first, last) with uniform distribution. /// /// Complexity: Exactly '(last - first) - 1' swaps. /// /// Example usage: /// struct Rand{ eastl_size_t operator()(eastl_size_t n) { return (eastl_size_t)(rand() % n); } }; // Note: The C rand function is poor and slow. /// Rand randInstance; /// shuffle(pArrayBegin, pArrayEnd, randInstance); /// // See the C++11 Standard, 26.5.1.3, Uniform random number generator requirements. // Also http://en.cppreference.com/w/cpp/numeric/random/uniform_int_distribution template void shuffle(RandomAccessIterator first, RandomAccessIterator last, UniformRandomNumberGenerator&& urng) { if(first != last) { typedef typename eastl::iterator_traits::difference_type difference_type; typedef typename eastl::make_unsigned::type unsigned_difference_type; typedef typename eastl::uniform_int_distribution uniform_int_distribution; typedef typename uniform_int_distribution::param_type uniform_int_distribution_param_type; uniform_int_distribution uid; for(RandomAccessIterator i = first + 1; i != last; ++i) iter_swap(i, first + uid(urng, uniform_int_distribution_param_type(0, i - first))); } } /// random_shuffle /// /// Randomizes a sequence of values. /// /// Effects: Shuffles the elements in the range [first, last) with uniform distribution. /// /// Complexity: Exactly '(last - first) - 1' swaps. /// /// Example usage: /// eastl_size_t Rand(eastl_size_t n) { return (eastl_size_t)(rand() % n); } // Note: The C rand function is poor and slow. /// random_shuffle(pArrayBegin, pArrayEnd, Rand); /// /// Example usage: /// struct Rand{ eastl_size_t operator()(eastl_size_t n) { return (eastl_size_t)(rand() % n); } }; // Note: The C rand function is poor and slow. /// Rand randInstance; /// random_shuffle(pArrayBegin, pArrayEnd, randInstance); /// template inline void random_shuffle(RandomAccessIterator first, RandomAccessIterator last, RandomNumberGenerator&& rng) { typedef typename eastl::iterator_traits::difference_type difference_type; // We must do 'rand((i - first) + 1)' here and cannot do 'rand(last - first)', // as it turns out that the latter results in unequal distribution probabilities. // http://www.cigital.com/papers/download/developer_gambling.php const difference_type swapMax = eastl::distance(first, last); // deliberately start at 1. for (difference_type swapIter = 1; swapIter < swapMax; ++swapIter) { RandomAccessIterator i = first + swapIter; iter_swap(i, first + (difference_type)rng((eastl_size_t)((i - first) + 1))); } } /// random_shuffle /// /// Randomizes a sequence of values. /// /// Effects: Shuffles the elements in the range [first, last) with uniform distribution. /// /// Complexity: Exactly '(last - first) - 1' swaps. /// /// Example usage: /// random_shuffle(pArrayBegin, pArrayEnd); /// /// *** Disabled until we decide if we want to get into the business of writing random number generators. *** /// /// template /// inline void random_shuffle(RandomAccessIterator first, RandomAccessIterator last) /// { /// for(RandomAccessIterator i = first + 1; i < last; ++i) /// iter_swap(i, first + SomeRangedRandomNumberGenerator((i - first) + 1)); /// } /// move_n /// /// Same as move(InputIterator, InputIterator, OutputIterator) except based on count instead of iterator range. /// template inline OutputIterator move_n_impl(InputIterator first, Size n, OutputIterator result, eastl::input_iterator_tag) { for(; n > 0; --n) *result++ = eastl::move(*first++); return result; } template inline OutputIterator move_n_impl(RandomAccessIterator first, Size n, OutputIterator result, eastl::random_access_iterator_tag) { return eastl::move(first, first + n, result); // Take advantage of the optimizations present in the move algorithm. } template inline OutputIterator move_n(InputIterator first, Size n, OutputIterator result) { typedef typename eastl::iterator_traits::iterator_category IC; return eastl::move_n_impl(first, n, result, IC()); } /// copy_n /// /// Same as copy(InputIterator, InputIterator, OutputIterator) except based on count instead of iterator range. /// Effects: Copies exactly count values from the range beginning at first to the range beginning at result, if count > 0. Does nothing otherwise. /// Returns: Iterator in the destination range, pointing past the last element copied if count>0 or first otherwise. /// Complexity: Exactly count assignments, if count > 0. /// template inline OutputIterator copy_n_impl(InputIterator first, Size n, OutputIterator result, eastl::input_iterator_tag) { for(; n > 0; --n) *result++ = *first++; return result; } template inline OutputIterator copy_n_impl(RandomAccessIterator first, Size n, OutputIterator result, eastl::random_access_iterator_tag) { return eastl::copy(first, first + n, result); // Take advantage of the optimizations present in the copy algorithm. } template inline OutputIterator copy_n(InputIterator first, Size n, OutputIterator result) { typedef typename eastl::iterator_traits::iterator_category IC; return eastl::copy_n_impl(first, n, result, IC()); } /// copy_if /// /// Effects: Assigns to the result iterator only if the predicate is true. /// template inline OutputIterator copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate) { // This implementation's performance could be improved by taking a more complicated approach like with the copy algorithm. for(; first != last; ++first) { if(predicate(*first)) *result++ = *first; } return result; } // Implementation moving copying both trivial and non-trivial data via a lesser iterator than random-access. template struct move_and_copy_backward_helper { template static BidirectionalIterator2 move_or_copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) { while(first != last) *--resultEnd = *--last; return resultEnd; // resultEnd now points to the beginning of the destination sequence instead of the end. } }; // Specialization for moving non-trivial data via a lesser iterator than random-access. template struct move_and_copy_backward_helper { template static BidirectionalIterator2 move_or_copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) { while(first != last) *--resultEnd = eastl::move(*--last); return resultEnd; // resultEnd now points to the beginning of the destination sequence instead of the end. } }; // Specialization for moving non-trivial data via a random-access iterator. It's theoretically faster because the compiler can see the count when its a compile-time const. template<> struct move_and_copy_backward_helper { template static BidirectionalIterator2 move_or_copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) { typedef typename eastl::iterator_traits::difference_type difference_type; for(difference_type n = (last - first); n > 0; --n) *--resultEnd = eastl::move(*--last); return resultEnd; // resultEnd now points to the beginning of the destination sequence instead of the end. } }; // Specialization for copying non-trivial data via a random-access iterator. It's theoretically faster because the compiler can see the count when its a compile-time const. // This specialization converts the random access BidirectionalIterator1 last-first to an integral type. There's simple way for us to take advantage of a random access output iterator, // as the range is specified by the input instead of the output, and distance(first, last) for a non-random-access iterator is potentially slow. template <> struct move_and_copy_backward_helper { template static BidirectionalIterator2 move_or_copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) { typedef typename eastl::iterator_traits::difference_type difference_type; for(difference_type n = (last - first); n > 0; --n) *--resultEnd = *--last; return resultEnd; // resultEnd now points to the beginning of the destination sequence instead of the end. } }; // Specialization for when we can use memmove/memcpy. See the notes above for what conditions allow this. template struct move_and_copy_backward_helper { template static T* move_or_copy_backward(const T* first, const T* last, T* resultEnd) { const size_t n = (size_t)((uintptr_t)last - (uintptr_t)first); // We could use memcpy here if there's no range overlap, but memcpy is rarely much faster than memmove. if (n > 0) return (T*)memmove(resultEnd - (last - first), first, n); else return resultEnd; } }; template inline BidirectionalIterator2 move_and_copy_backward_chooser(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) { typedef typename eastl::iterator_traits::iterator_category IIC; const bool canBeMemmoved = internal::can_be_memmoved_helper::value; return eastl::move_and_copy_backward_helper::move_or_copy_backward(first, last, resultEnd); // Need to chose based on the input iterator tag and not the output iterator tag, because containers accept input ranges of iterator types different than self. } template EASTL_REMOVE_AT_2024_SEPT inline BidirectionalIterator2 move_and_copy_backward_unwrapper(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) { EASTL_INTERNAL_DISABLE_DEPRECATED() // 'unwrap_iterator': was declared deprecated return BidirectionalIterator2(eastl::move_and_copy_backward_chooser(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), eastl::unwrap_iterator(resultEnd))); // Have to convert to BidirectionalIterator2 because result.base() could be a T* EASTL_INTERNAL_RESTORE_DEPRECATED() } /// move_backward /// /// The elements are moved in reverse order (the last element is moved first), but their relative order is preserved. /// After this operation the elements in the moved-from range will still contain valid values of the /// appropriate type, but not necessarily the same values as before the move. /// Returns the beginning of the result range. /// Note: When moving between containers, the dest range must be valid; this function doesn't resize containers. /// Note: If result is within [first, last), move must be used instead of move_backward. /// /// Example usage: /// eastl::move_backward(myArray.begin(), myArray.end(), myDestArray.end()); /// /// Reference implementation: /// template /// BidirectionalIterator2 move_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) /// { /// while(last != first) /// *--resultEnd = eastl::move(*--last); /// return resultEnd; /// } /// template inline BidirectionalIterator2 move_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) { return eastl::move_and_copy_backward_chooser(first, last, resultEnd); } /// copy_backward /// /// copies memory in the range of [first, last) to the range *ending* with result. /// /// Effects: Copies elements in the range [first, last) into the range /// [result - (last - first), result) starting from last 1 and proceeding to first. /// For each positive integer n <= (last - first), performs *(result n) = *(last - n). /// /// Requires: result shall not be in the range [first, last). /// /// Returns: result - (last - first). That is, returns the beginning of the result range. /// /// Complexity: Exactly 'last - first' assignments. /// template inline BidirectionalIterator2 copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) { return eastl::move_and_copy_backward_chooser(first, last, resultEnd); } /// count /// /// Counts the number of items in the range of [first, last) which equal the input value. /// /// Effects: Returns the number of iterators i in the range [first, last) for which the /// following corresponding conditions hold: *i == value. /// /// Complexity: At most 'last - first' applications of the corresponding predicate. /// /// Note: The predicate version of count is count_if and not another variation of count. /// This is because both versions would have three parameters and there could be ambiguity. /// template inline typename eastl::iterator_traits::difference_type count(InputIterator first, InputIterator last, const T& value) { typename eastl::iterator_traits::difference_type result = 0; for(; first != last; ++first) { if(*first == value) ++result; } return result; } // C++ doesn't define a count with predicate, as it can effectively be synthesized via count_if // with an appropriate predicate. However, it's often simpler to just have count with a predicate. template inline typename eastl::iterator_traits::difference_type count(InputIterator first, InputIterator last, const T& value, Predicate predicate) { typename eastl::iterator_traits::difference_type result = 0; for(; first != last; ++first) { if(predicate(*first, value)) ++result; } return result; } /// count_if /// /// Counts the number of items in the range of [first, last) which match /// the input value as defined by the input predicate function. /// /// Effects: Returns the number of iterators i in the range [first, last) for which the /// following corresponding conditions hold: predicate(*i) != false. /// /// Complexity: At most 'last - first' applications of the corresponding predicate. /// /// Note: The non-predicate version of count_if is count and not another variation of count_if. /// This is because both versions would have three parameters and there could be ambiguity. /// template inline typename eastl::iterator_traits::difference_type count_if(InputIterator first, InputIterator last, Predicate predicate) { typename eastl::iterator_traits::difference_type result = 0; for(; first != last; ++first) { if(predicate(*first)) ++result; } return result; } /// find /// /// finds the value within the unsorted range of [first, last). /// /// Returns: The first iterator i in the range [first, last) for which /// the following corresponding conditions hold: *i == value. /// Returns last if no such iterator is found. /// /// Complexity: At most 'last - first' applications of the corresponding predicate. /// This is a linear search and not a binary one. /// /// Note: The predicate version of find is find_if and not another variation of find. /// This is because both versions would have three parameters and there could be ambiguity. /// template inline InputIterator find(InputIterator first, InputIterator last, const T& value) { while((first != last) && !(*first == value)) // Note that we always express value comparisons in terms of < or ==. ++first; return first; } // C++ doesn't define a find with predicate, as it can effectively be synthesized via find_if // with an appropriate predicate. However, it's often simpler to just have find with a predicate. template inline InputIterator find(InputIterator first, InputIterator last, const T& value, Predicate predicate) { while((first != last) && !predicate(*first, value)) ++first; return first; } /// find_if /// /// finds the value within the unsorted range of [first, last). /// /// Returns: The first iterator i in the range [first, last) for which /// the following corresponding conditions hold: pred(*i) != false. /// Returns last if no such iterator is found. /// If the sequence of elements to search for (i.e. first2 - last2) is empty, /// the find always fails and last1 will be returned. /// /// Complexity: At most 'last - first' applications of the corresponding predicate. /// /// Note: The non-predicate version of find_if is find and not another variation of find_if. /// This is because both versions would have three parameters and there could be ambiguity. /// template inline InputIterator find_if(InputIterator first, InputIterator last, Predicate predicate) { while((first != last) && !predicate(*first)) ++first; return first; } /// find_if_not /// /// find_if_not works the same as find_if except it tests for if the predicate /// returns false for the elements instead of true. /// template inline InputIterator find_if_not(InputIterator first, InputIterator last, Predicate predicate) { for(; first != last; ++first) { if(!predicate(*first)) return first; } return last; } /// find_first_of /// /// find_first_of is similar to find in that it performs linear search through /// a range of ForwardIterators. The difference is that while find searches /// for one particular value, find_first_of searches for any of several values. /// Specifically, find_first_of searches for the first occurrance in the /// range [first1, last1) of any of the elements in [first2, last2). /// This function is thus similar to the strpbrk standard C string function. /// If the sequence of elements to search for (i.e. first2-last2) is empty, /// the find always fails and last1 will be returned. /// /// Effects: Finds an element that matches one of a set of values. /// /// Returns: The first iterator i in the range [first1, last1) such that for some /// integer j in the range [first2, last2) the following conditions hold: *i == *j. /// Returns last1 if no such iterator is found. /// /// Complexity: At most '(last1 - first1) * (last2 - first2)' applications of the /// corresponding predicate. /// template ForwardIterator1 find_first_of(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2) { for(; first1 != last1; ++first1) { for(ForwardIterator2 i = first2; i != last2; ++i) { if(*first1 == *i) return first1; } } return last1; } /// find_first_of /// /// find_first_of is similar to find in that it performs linear search through /// a range of ForwardIterators. The difference is that while find searches /// for one particular value, find_first_of searches for any of several values. /// Specifically, find_first_of searches for the first occurrance in the /// range [first1, last1) of any of the elements in [first2, last2). /// This function is thus similar to the strpbrk standard C string function. /// /// Effects: Finds an element that matches one of a set of values. /// /// Returns: The first iterator i in the range [first1, last1) such that for some /// integer j in the range [first2, last2) the following conditions hold: pred(*i, *j) != false. /// Returns last1 if no such iterator is found. /// /// Complexity: At most '(last1 - first1) * (last2 - first2)' applications of the /// corresponding predicate. /// template ForwardIterator1 find_first_of(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2, BinaryPredicate predicate) { for(; first1 != last1; ++first1) { for(ForwardIterator2 i = first2; i != last2; ++i) { if(predicate(*first1, *i)) return first1; } } return last1; } /// find_first_not_of /// /// Searches through first range for the first element that does not belong the second input range. /// This is very much like the C++ string find_first_not_of function. /// /// Returns: The first iterator i in the range [first1, last1) such that for some /// integer j in the range [first2, last2) the following conditions hold: !(*i == *j). /// Returns last1 if no such iterator is found. /// /// Complexity: At most '(last1 - first1) * (last2 - first2)' applications of the /// corresponding predicate. /// template ForwardIterator1 find_first_not_of(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2) { for(; first1 != last1; ++first1) { if(eastl::find(first2, last2, *first1) == last2) break; } return first1; } /// find_first_not_of /// /// Searches through first range for the first element that does not belong the second input range. /// This is very much like the C++ string find_first_not_of function. /// /// Returns: The first iterator i in the range [first1, last1) such that for some /// integer j in the range [first2, last2) the following conditions hold: pred(*i, *j) == false. /// Returns last1 if no such iterator is found. /// /// Complexity: At most '(last1 - first1) * (last2 - first2)' applications of the /// corresponding predicate. /// template inline ForwardIterator1 find_first_not_of(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2, BinaryPredicate predicate) { typedef typename eastl::iterator_traits::value_type value_type; for(; first1 != last1; ++first1) { if(eastl::find_if(first2, last2, [&predicate, first1](value_type& rhs) { return predicate(*first1, rhs); }) == last2) break; } return first1; } template inline BidirectionalIterator1 find_last_of(BidirectionalIterator1 first1, BidirectionalIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2) { if((first1 != last1) && (first2 != last2)) { BidirectionalIterator1 it1(last1); while((--it1 != first1) && (eastl::find(first2, last2, *it1) == last2)) ; // Do nothing if((it1 != first1) || (eastl::find(first2, last2, *it1) != last2)) return it1; } return last1; } template BidirectionalIterator1 find_last_of(BidirectionalIterator1 first1, BidirectionalIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2, BinaryPredicate predicate) { typedef typename eastl::iterator_traits::value_type value_type; if((first1 != last1) && (first2 != last2)) { BidirectionalIterator1 it1(last1); while((--it1 != first1) && (eastl::find_if(first2, last2, [&predicate, it1](value_type& rhs) { return predicate(*it1, rhs); }) == last2)) ; // Do nothing if((it1 != first1) || (eastl::find_if(first2, last2, [&predicate, it1](value_type& rhs) { return predicate(*it1, rhs); }) != last2)) return it1; } return last1; } template inline BidirectionalIterator1 find_last_not_of(BidirectionalIterator1 first1, BidirectionalIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2) { if((first1 != last1) && (first2 != last2)) { BidirectionalIterator1 it1(last1); while((--it1 != first1) && (eastl::find(first2, last2, *it1) != last2)) ; // Do nothing if((it1 != first1) || (eastl::find( first2, last2, *it1) == last2)) return it1; } return last1; } template inline BidirectionalIterator1 find_last_not_of(BidirectionalIterator1 first1, BidirectionalIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2, BinaryPredicate predicate) { typedef typename eastl::iterator_traits::value_type value_type; if((first1 != last1) && (first2 != last2)) { BidirectionalIterator1 it1(last1); while((--it1 != first1) && (eastl::find_if(first2, last2, [&predicate, it1](value_type& rhs) { return predicate(*it1, rhs); }) != last2)) ; // Do nothing if((it1 != first1) || (eastl::find_if(first2, last2, [&predicate, it1](value_type& rhs) { return predicate(*it1, rhs); })) != last2) return it1; } return last1; } /// for_each /// /// Calls the Function function for each value in the range [first, last). /// Function takes a single parameter: the current value. /// /// Effects: Applies function to the result of dereferencing every iterator in /// the range [first, last), starting from first and proceeding to last 1. /// /// Returns: function. /// /// Complexity: Applies function exactly 'last - first' times. /// /// Note: If function returns a result, the result is ignored. /// template inline Function for_each(InputIterator first, InputIterator last, Function function) { for(; first != last; ++first) function(*first); return function; } /// for_each_n /// /// Calls the Function function for each value in the range [first, first + n). /// Function takes a single parameter: the current value. /// /// Effects: Applies function to the result of dereferencing every iterator in /// the range [first, first + n), starting from first and proceeding to last 1. /// /// Returns: first + n. /// /// Complexity: Applies function exactly 'first + n' times. /// /// Note: //// * If function returns a result, the result is ignored. //// * If n < 0, behaviour is undefined. /// template EA_CPP14_CONSTEXPR inline InputIterator for_each_n(InputIterator first, Size n, Function function) { for (Size i = 0; i < n; ++first, i++) function(*first); return first; } /// generate /// /// Iterates the range of [first, last) and assigns to each element the /// result of the function generator. Generator is a function which takes /// no arguments. /// /// Complexity: Exactly 'last - first' invocations of generator and assignments. /// template inline void generate(ForwardIterator first, ForwardIterator last, Generator generator) { for(; first != last; ++first) // We cannot call generate_n(first, last-first, generator) *first = generator(); // because the 'last-first' might not be supported by the } // given iterator. /// generate_n /// /// Iterates an interator n times and assigns the result of generator /// to each succeeding element. Generator is a function which takes /// no arguments. /// /// Complexity: Exactly n invocations of generator and assignments. /// template inline OutputIterator generate_n(OutputIterator first, Size n, Generator generator) { for(; n > 0; --n, ++first) *first = generator(); return first; } /// transform /// /// Iterates the input range of [first, last) and the output iterator result /// and assigns the result of unaryOperation(input) to result. /// /// Effects: Assigns through every iterator i in the range [result, result + (last1 - first1)) /// a new corresponding value equal to unaryOperation(*(first1 + (i - result)). /// /// Requires: op shall not have any side effects. /// /// Returns: result + (last1 - first1). That is, returns the end of the output range. /// /// Complexity: Exactly 'last1 - first1' applications of unaryOperation. /// /// Note: result may be equal to first. /// template inline OutputIterator transform(InputIterator first, InputIterator last, OutputIterator result, UnaryOperation unaryOperation) { for(; first != last; ++first, ++result) *result = unaryOperation(*first); return result; } /// transform /// /// Iterates the input range of [first, last) and the output iterator result /// and assigns the result of binaryOperation(input1, input2) to result. /// /// Effects: Assigns through every iterator i in the range [result, result + (last1 - first1)) /// a new corresponding value equal to binaryOperation(*(first1 + (i - result), *(first2 + (i - result))). /// /// Requires: binaryOperation shall not have any side effects. /// /// Returns: result + (last1 - first1). That is, returns the end of the output range. /// /// Complexity: Exactly 'last1 - first1' applications of binaryOperation. /// /// Note: result may be equal to first1 or first2. /// template inline OutputIterator transform(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryOperation binaryOperation) { for(; first1 != last1; ++first1, ++first2, ++result) *result = binaryOperation(*first1, *first2); return result; } /// equal /// /// Returns: true if for every iterator i in the range [first1, last1) the /// following corresponding conditions hold: predicate(*i, *(first2 + (i - first1))) != false. /// Otherwise, returns false. /// /// Complexity: At most last1 first1 applications of the corresponding predicate. /// /// To consider: Make specializations of this for scalar types and random access /// iterators that uses memcmp or some trick memory comparison function. /// We should verify that such a thing results in an improvement. /// template EA_CPP14_CONSTEXPR inline bool equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2) { for(; first1 != last1; ++first1, ++first2) { if(!(*first1 == *first2)) // Note that we always express value comparisons in terms of < or ==. return false; } return true; } /* Enable the following if there was shown to be some benefit. A glance and Microsoft VC++ memcmp shows that it is not optimized in any way, much less one that would benefit us here. inline bool equal(const bool* first1, const bool* last1, const bool* first2) { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } inline bool equal(const char* first1, const char* last1, const char* first2) { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } inline bool equal(const unsigned char* first1, const unsigned char* last1, const unsigned char* first2) { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } inline bool equal(const signed char* first1, const signed char* last1, const signed char* first2) { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } #ifndef EA_WCHAR_T_NON_NATIVE inline bool equal(const wchar_t* first1, const wchar_t* last1, const wchar_t* first2) { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } #endif inline bool equal(const int16_t* first1, const int16_t* last1, const int16_t* first2) { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } inline bool equal(const uint16_t* first1, const uint16_t* last1, const uint16_t* first2) { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } inline bool equal(const int32_t* first1, const int32_t* last1, const int32_t* first2) { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } inline bool equal(const uint32_t* first1, const uint32_t* last1, const uint32_t* first2) { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } inline bool equal(const int64_t* first1, const int64_t* last1, const int64_t* first2) { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } inline bool equal(const uint64_t* first1, const uint64_t* last1, const uint64_t* first2) { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } */ /// equal /// /// Returns: true if for every iterator i in the range [first1, last1) the /// following corresponding conditions hold: pred(*i, *(first2 + (i first1))) != false. /// Otherwise, returns false. /// /// Complexity: At most last1 first1 applications of the corresponding predicate. /// template inline bool equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate predicate) { for(; first1 != last1; ++first1, ++first2) { if(!predicate(*first1, *first2)) return false; } return true; } /// identical /// /// Returns true if the two input ranges are equivalent. /// There is a subtle difference between this algorithm and /// the 'equal' algorithm. The equal algorithm assumes the /// two ranges are of equal length. This algorithm efficiently /// compares two ranges for both length equality and for /// element equality. There is no other standard algorithm /// that can do this. /// /// Returns: true if the sequence of elements defined by the range /// [first1, last1) is of the same length as the sequence of /// elements defined by the range of [first2, last2) and if /// the elements in these ranges are equal as per the /// equal algorithm. /// /// Complexity: At most 'min((last1 - first1), (last2 - first2))' applications /// of the corresponding comparison. /// template bool identical(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2) { while((first1 != last1) && (first2 != last2) && (*first1 == *first2)) { ++first1; ++first2; } return (first1 == last1) && (first2 == last2); } /// identical /// template bool identical(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, BinaryPredicate predicate) { while((first1 != last1) && (first2 != last2) && predicate(*first1, *first2)) { ++first1; ++first2; } return (first1 == last1) && (first2 == last2); } /// lexicographical_compare /// /// Returns: true if the sequence of elements defined by the range /// [first1, last1) is lexicographically less than the sequence of /// elements defined by the range [first2, last2). Returns false otherwise. /// /// Complexity: At most 'min((last1 - first1), (last2 - first2))' applications /// of the corresponding comparison. /// /// Note: If two sequences have the same number of elements and their /// corresponding elements are equivalent, then neither sequence is /// lexicographically less than the other. If one sequence is a prefix /// of the other, then the shorter sequence is lexicographically less /// than the longer sequence. Otherwise, the lexicographical comparison /// of the sequences yields the same result as the comparison of the first /// corresponding pair of elements that are not equivalent. /// template inline bool lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2) { for(; (first1 != last1) && (first2 != last2); ++first1, ++first2) { if(*first1 < *first2) return true; if(*first2 < *first1) return false; } return (first1 == last1) && (first2 != last2); } inline bool // Specialization for const char*. lexicographical_compare(const char* first1, const char* last1, const char* first2, const char* last2) { const ptrdiff_t n1(last1 - first1), n2(last2 - first2); const size_t n = (size_t)eastl::min_alt(n1, n2); if (n == 0) // don't call memcmp with n == 0 return false; const int result = memcmp(first1, first2, n); return result ? (result < 0) : (n1 < n2); } inline bool // Specialization for char*. lexicographical_compare(char* first1, char* last1, char* first2, char* last2) { const ptrdiff_t n1(last1 - first1), n2(last2 - first2); const size_t n = (size_t)eastl::min_alt(n1, n2); if (n == 0) // don't call memcmp with n == 0 return false; const int result = memcmp(first1, first2, n); return result ? (result < 0) : (n1 < n2); } inline bool // Specialization for const unsigned char*. lexicographical_compare(const unsigned char* first1, const unsigned char* last1, const unsigned char* first2, const unsigned char* last2) { const ptrdiff_t n1(last1 - first1), n2(last2 - first2); const size_t n = (size_t)eastl::min_alt(n1, n2); if (n == 0) // don't call memcmp with n == 0 return false; const int result = memcmp(first1, first2, n); return result ? (result < 0) : (n1 < n2); } inline bool // Specialization for unsigned char*. lexicographical_compare(unsigned char* first1, unsigned char* last1, unsigned char* first2, unsigned char* last2) { const ptrdiff_t n1(last1 - first1), n2(last2 - first2); const size_t n = (size_t)eastl::min_alt(n1, n2); if (n == 0) // don't call memcmp with n == 0 return false; const int result = memcmp(first1, first2, n); return result ? (result < 0) : (n1 < n2); } inline bool // Specialization for const signed char*. lexicographical_compare(const signed char* first1, const signed char* last1, const signed char* first2, const signed char* last2) { const ptrdiff_t n1(last1 - first1), n2(last2 - first2); const size_t n = (size_t)eastl::min_alt(n1, n2); if (n == 0) // don't call memcmp with n == 0 return false; const int result = memcmp(first1, first2, n); return result ? (result < 0) : (n1 < n2); } inline bool // Specialization for signed char*. lexicographical_compare(signed char* first1, signed char* last1, signed char* first2, signed char* last2) { const ptrdiff_t n1(last1 - first1), n2(last2 - first2); const size_t n = (size_t)eastl::min_alt(n1, n2); if (n == 0) // don't call memcmp with n == 0 return false; const int result = memcmp(first1, first2, n); return result ? (result < 0) : (n1 < n2); } #if defined(_MSC_VER) // If using the VC++ compiler (and thus bool is known to be a single byte)... //Not sure if this is a good idea. //inline bool // Specialization for const bool*. //lexicographical_compare(const bool* first1, const bool* last1, const bool* first2, const bool* last2) //{ // const ptrdiff_t n1(last1 - first1), n2(last2 - first2); // const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2)); // return result ? (result < 0) : (n1 < n2); //} // //inline bool // Specialization for bool*. //lexicographical_compare(bool* first1, bool* last1, bool* first2, bool* last2) //{ // const ptrdiff_t n1(last1 - first1), n2(last2 - first2); // const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2)); // return result ? (result < 0) : (n1 < n2); //} #endif /// lexicographical_compare /// /// Returns: true if the sequence of elements defined by the range /// [first1, last1) is lexicographically less than the sequence of /// elements defined by the range [first2, last2). Returns false otherwise. /// /// Complexity: At most 'min((last1 -first1), (last2 - first2))' applications /// of the corresponding comparison. /// /// Note: If two sequences have the same number of elements and their /// corresponding elements are equivalent, then neither sequence is /// lexicographically less than the other. If one sequence is a prefix /// of the other, then the shorter sequence is lexicographically less /// than the longer sequence. Otherwise, the lexicographical comparison /// of the sequences yields the same result as the comparison of the first /// corresponding pair of elements that are not equivalent. /// /// Note: False is always returned if range 1 is exhausted before range 2. /// The result of this is that you can't do a successful reverse compare /// (e.g. use greater<> as the comparison instead of less<>) unless the /// two sequences are of identical length. What you want to do is reverse /// the order of the arguments in order to get the desired effect. /// template inline bool lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare compare) { for(; (first1 != last1) && (first2 != last2); ++first1, ++first2) { if(compare(*first1, *first2)) return true; if(compare(*first2, *first1)) return false; } return (first1 == last1) && (first2 != last2); } #if defined(EA_COMPILER_HAS_THREE_WAY_COMPARISON) /// lexicographical_compare_three_way /// /// Returns: The comparison category ordering between both ranges. For the first non-equivalent pair in the ranges, /// the comparison will be returned. Else if the first range is a subset (superset) of the second range, then the /// less (greater) ordering will be returned. /// /// Complexity: At most N iterations, where N = min(last1-first1, last2-first2) of the applications /// of the corresponding comparison. /// /// Note: If two sequences have the same number of elements and their /// corresponding elements are equivalent, then neither sequence is /// lexicographically less than the other. If one sequence is a prefix /// of the other, then the shorter sequence is lexicographically less /// than the longer sequence. Otherwise, the lexicographical comparison /// of the sequences yields the same result as the comparison of the first /// corresponding pair of elements that are not equivalent. /// template constexpr auto lexicographical_compare_three_way(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, Compare compare) -> decltype(compare(*first1, *first2)) { for (; (first1 != last1) && (first2 != last2); ++first1, ++first2) { if (auto c = compare(*first1, *first2); c != 0) return c; } return (first1 != last1) ? std::strong_ordering::greater : (first2 != last2) ? std::strong_ordering::less : std::strong_ordering::equal; } #endif /// mismatch /// /// Finds the first position where the two ranges [first1, last1) and /// [first2, first2 + (last1 - first1)) differ. The two versions of /// mismatch use different tests for whether elements differ. /// /// Returns: A pair of iterators i and j such that j == first2 + (i - first1) /// and i is the first iterator in the range [first1, last1) for which the /// following corresponding condition holds: !(*i == *(first2 + (i - first1))). /// Returns the pair last1 and first2 + (last1 - first1) if such an iterator /// i is not found. /// /// Complexity: At most last1 first1 applications of the corresponding predicate. /// template inline eastl::pair mismatch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2) // , InputIterator2 last2) { while((first1 != last1) && (*first1 == *first2)) // && (first2 != last2) <- C++ standard mismatch function doesn't check first2/last2. { ++first1; ++first2; } return eastl::pair(first1, first2); } /// mismatch /// /// Finds the first position where the two ranges [first1, last1) and /// [first2, first2 + (last1 - first1)) differ. The two versions of /// mismatch use different tests for whether elements differ. /// /// Returns: A pair of iterators i and j such that j == first2 + (i - first1) /// and i is the first iterator in the range [first1, last1) for which the /// following corresponding condition holds: pred(*i, *(first2 + (i - first1))) == false. /// Returns the pair last1 and first2 + (last1 - first1) if such an iterator /// i is not found. /// /// Complexity: At most last1 first1 applications of the corresponding predicate. /// template inline eastl::pair mismatch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, // InputIterator2 last2, BinaryPredicate predicate) { while((first1 != last1) && predicate(*first1, *first2)) // && (first2 != last2) <- C++ standard mismatch function doesn't check first2/last2. { ++first1; ++first2; } return eastl::pair(first1, first2); } /// lower_bound /// /// Finds the position of the first element in a sorted range that has a value /// greater than or equivalent to a specified value. /// /// Effects: Finds the first position into which value can be inserted without /// violating the ordering. /// /// Returns: The furthermost iterator i in the range [first, last) such that /// for any iterator j in the range [first, i) the following corresponding /// condition holds: *j < value. /// /// Complexity: At most 'log(last - first) + 1' comparisons. /// /// Optimizations: We have no need to specialize this implementation for random /// access iterators (e.g. contiguous array), as the code below will already /// take advantage of them. /// template ForwardIterator lower_bound(ForwardIterator first, ForwardIterator last, const T& value) { typedef typename eastl::iterator_traits::difference_type DifferenceType; DifferenceType d = eastl::distance(first, last); // This will be efficient for a random access iterator such as an array. while(d > 0) { ForwardIterator i = first; DifferenceType d2 = d >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure. eastl::advance(i, d2); // This will be efficient for a random access iterator such as an array. if(*i < value) { // Disabled because std::lower_bound doesn't specify (23.3.3.3, p3) this can be done: EASTL_VALIDATE_COMPARE(!(value < *i)); // Validate that the compare function is sane. first = ++i; d -= d2 + 1; } else d = d2; } return first; } /// lower_bound /// /// Finds the position of the first element in a sorted range that has a value /// greater than or equivalent to a specified value. The input Compare function /// takes two arguments and returns true if the first argument is less than /// the second argument. /// /// Effects: Finds the first position into which value can be inserted without /// violating the ordering. /// /// Returns: The furthermost iterator i in the range [first, last) such that /// for any iterator j in the range [first, i) the following corresponding /// condition holds: compare(*j, value) != false. /// /// Complexity: At most 'log(last - first) + 1' comparisons. /// /// Optimizations: We have no need to specialize this implementation for random /// access iterators (e.g. contiguous array), as the code below will already /// take advantage of them. /// template ForwardIterator lower_bound(ForwardIterator first, ForwardIterator last, const T& value, Compare compare) { typedef typename eastl::iterator_traits::difference_type DifferenceType; DifferenceType d = eastl::distance(first, last); // This will be efficient for a random access iterator such as an array. while(d > 0) { ForwardIterator i = first; DifferenceType d2 = d >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure. eastl::advance(i, d2); // This will be efficient for a random access iterator such as an array. if(compare(*i, value)) { // Disabled because std::lower_bound doesn't specify (23.3.3.1, p3) this can be done: EASTL_VALIDATE_COMPARE(!compare(value, *i)); // Validate that the compare function is sane. first = ++i; d -= d2 + 1; } else d = d2; } return first; } /// upper_bound /// /// Finds the position of the first element in a sorted range that has a /// value that is greater than a specified value. /// /// Effects: Finds the furthermost position into which value can be inserted /// without violating the ordering. /// /// Returns: The furthermost iterator i in the range [first, last) such that /// for any iterator j in the range [first, i) the following corresponding /// condition holds: !(value < *j). /// /// Complexity: At most 'log(last - first) + 1' comparisons. /// template ForwardIterator upper_bound(ForwardIterator first, ForwardIterator last, const T& value) { typedef typename eastl::iterator_traits::difference_type DifferenceType; DifferenceType len = eastl::distance(first, last); while(len > 0) { ForwardIterator i = first; DifferenceType len2 = len >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure. eastl::advance(i, len2); if(!(value < *i)) // Note that we always express value comparisons in terms of < or ==. { first = ++i; len -= len2 + 1; } else { // Disabled because std::upper_bound doesn't specify (23.3.3.2, p3) this can be done: EASTL_VALIDATE_COMPARE(!(*i < value)); // Validate that the compare function is sane. len = len2; } } return first; } /// upper_bound /// /// Finds the position of the first element in a sorted range that has a /// value that is greater than a specified value. The input Compare function /// takes two arguments and returns true if the first argument is less than /// the second argument. /// /// Effects: Finds the furthermost position into which value can be inserted /// without violating the ordering. /// /// Returns: The furthermost iterator i in the range [first, last) such that /// for any iterator j in the range [first, i) the following corresponding /// condition holds: compare(value, *j) == false. /// /// Complexity: At most 'log(last - first) + 1' comparisons. /// template ForwardIterator upper_bound(ForwardIterator first, ForwardIterator last, const T& value, Compare compare) { typedef typename eastl::iterator_traits::difference_type DifferenceType; DifferenceType len = eastl::distance(first, last); while(len > 0) { ForwardIterator i = first; DifferenceType len2 = len >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure. eastl::advance(i, len2); if(!compare(value, *i)) { first = ++i; len -= len2 + 1; } else { // Disabled because std::upper_bound doesn't specify (23.3.3.2, p3) this can be done: EASTL_VALIDATE_COMPARE(!compare(*i, value)); // Validate that the compare function is sane. len = len2; } } return first; } /// equal_range /// /// Effects: Finds the largest subrange [i, j) such that the value can be inserted /// at any iterator k in it without violating the ordering. k satisfies the /// corresponding conditions: !(*k < value) && !(value < *k). /// /// Complexity: At most '2 * log(last - first) + 1' comparisons. /// template pair equal_range(ForwardIterator first, ForwardIterator last, const T& value) { return equal_range(first, last, value, eastl::less<>{}); } /// equal_range /// /// Effects: Finds the largest subrange [i, j) such that the value can be inserted /// at any iterator k in it without violating the ordering. k satisfies the /// corresponding conditions: compare(*k, value) == false && compare(value, *k) == false. /// /// Complexity: At most '2 * log(last - first) + 1' comparisons. /// template pair equal_range(ForwardIterator first, ForwardIterator last, const T& value, Compare compare) { typedef pair ResultType; typedef typename eastl::iterator_traits::difference_type DifferenceType; DifferenceType d = eastl::distance(first, last); while(d > 0) { ForwardIterator i(first); DifferenceType d2 = d >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure. eastl::advance(i, d2); if(compare(*i, value)) { EASTL_VALIDATE_COMPARE(!compare(value, *i)); // Validate that the compare function is sane. first = ++i; d -= d2 + 1; } else if(compare(value, *i)) { EASTL_VALIDATE_COMPARE(!compare(*i, value)); // Validate that the compare function is sane. d = d2; last = i; } else { ForwardIterator j(i); return ResultType(eastl::lower_bound(first, i, value, compare), eastl::upper_bound(++j, last, value, compare)); } } return ResultType(first, first); } /// replace /// /// Effects: Substitutes elements referred by the iterator i in the range [first, last) /// with new_value, when the following corresponding conditions hold: *i == old_value. /// /// Complexity: Exactly 'last - first' applications of the corresponding predicate. /// /// Note: The predicate version of replace is replace_if and not another variation of replace. /// This is because both versions would have the same parameter count and there could be ambiguity. /// template inline void replace(ForwardIterator first, ForwardIterator last, const T& old_value, const T& new_value) { for(; first != last; ++first) { if(*first == old_value) *first = new_value; } } /// replace_if /// /// Effects: Substitutes elements referred by the iterator i in the range [first, last) /// with new_value, when the following corresponding conditions hold: predicate(*i) != false. /// /// Complexity: Exactly 'last - first' applications of the corresponding predicate. /// /// Note: The predicate version of replace_if is replace and not another variation of replace_if. /// This is because both versions would have the same parameter count and there could be ambiguity. /// template inline void replace_if(ForwardIterator first, ForwardIterator last, Predicate predicate, const T& new_value) { for(; first != last; ++first) { if(predicate(*first)) *first = new_value; } } /// remove_copy /// /// Effects: Copies all the elements referred to by the iterator i in the range /// [first, last) for which the following corresponding condition does not hold: /// *i == value. /// /// Requires: The ranges [first, last) and [result, result + (last - first)) shall not overlap. /// /// Returns: The end of the resulting range. /// /// Complexity: Exactly 'last - first' applications of the corresponding predicate. /// template inline OutputIterator remove_copy(InputIterator first, InputIterator last, OutputIterator result, const T& value) { for(; first != last; ++first) { if(!(*first == value)) // Note that we always express value comparisons in terms of < or ==. { *result = *first; ++result; } } return result; } /// remove_copy_if /// /// Effects: Copies all the elements referred to by the iterator i in the range /// [first, last) for which the following corresponding condition does not hold: /// predicate(*i) != false. /// /// Requires: The ranges [first, last) and [result, result + (last - first)) shall not overlap. /// /// Returns: The end of the resulting range. /// /// Complexity: Exactly 'last - first' applications of the corresponding predicate. /// template inline OutputIterator remove_copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate) { for(; first != last; ++first) { if(!predicate(*first)) { *result = *first; ++result; } } return result; } /// remove /// /// Effects: Eliminates all the elements referred to by iterator i in the /// range [first, last) for which the following corresponding condition /// holds: *i == value. /// /// Returns: The end of the resulting range. /// /// Complexity: Exactly 'last - first' applications of the corresponding predicate. /// /// Note: The predicate version of remove is remove_if and not another variation of remove. /// This is because both versions would have the same parameter count and there could be ambiguity. /// /// Note: Since this function moves the element to the back of the heap and /// doesn't actually remove it from the given container, the user must call /// the container erase function if the user wants to erase the element /// from the container. /// /// Example usage: /// vector intArray; /// ... /// intArray.erase(remove(intArray.begin(), intArray.end(), 4), intArray.end()); // Erase all elements of value 4. /// template inline ForwardIterator remove(ForwardIterator first, ForwardIterator last, const T& value) { first = eastl::find(first, last, value); if(first != last) { for (ForwardIterator i = first; ++i != last;) { if (!(*i == value)) { *first++ = eastl::move(*i); } } } return first; } /// remove_if /// /// Effects: Eliminates all the elements referred to by iterator i in the /// range [first, last) for which the following corresponding condition /// holds: predicate(*i) != false. /// /// Returns: The end of the resulting range. /// /// Complexity: Exactly 'last - first' applications of the corresponding predicate. /// /// Note: The predicate version of remove_if is remove and not another variation of remove_if. /// This is because both versions would have the same parameter count and there could be ambiguity. /// /// Note: Since this function moves the element to the back of the heap and /// doesn't actually remove it from the given container, the user must call /// the container erase function if the user wants to erase the element /// from the container. /// /// Example usage: /// vector intArray; /// ... /// intArray.erase(remove(intArray.begin(), intArray.end(), bind(less(), (int)3)), intArray.end()); // Erase all elements less than 3. /// template inline ForwardIterator remove_if(ForwardIterator first, ForwardIterator last, Predicate predicate) { first = eastl::find_if(first, last, predicate); if(first != last) { for (ForwardIterator i = first; ++i != last;) { if (!predicate(*i)) { *first++ = eastl::move(*i); } } } return first; } /// apply_and_remove_if /// /// Calls the Function function for all elements referred to my iterator i in the range /// [first, last) for which the following corresponding condition holds: /// predicate(*i) == true /// and then left shift moves potential non-matching elements over it. /// /// Returns: a past-the-end iterator for the new end of the range. /// /// Complexity: Exactly 'last - first' applications of the corresponding predicate + applies /// function once for every time the condition holds. /// /// Note: Since removing is done by shifting (by means of copy move assignment) the elements /// in the range in such a way that the elements that are not to be removed appear in the /// beginning of the range doesn't actually remove it from the given container, the user must call /// the container erase function if the user wants to erase the element /// from the container. I.e. in the same they as for remove_if the excess elements /// are left in a valid but possibly moved from state. /// template inline ForwardIterator apply_and_remove_if(ForwardIterator first, ForwardIterator last, Function function, Predicate predicate) { first = eastl::find_if(first, last, predicate); if (first != last) { function(*first); for (auto i = next(first); i != last; ++i) { if (predicate(*i)) { function(*i); continue; } *first = eastl::move(*i); ++first; } } return first; } /// apply_and_remove /// /// Calls the Function function for all elements referred to my iterator i in the range /// [first, last) for which the following corresponding condition holds: /// value == *i /// and then left shift moves potential non-matching elements over it. /// /// Returns: a past-the-end iterator for the new end of the range. /// /// Complexity: Exactly 'last - first' applications of the corresponding equality test /// + applies function once for every time the condition holds. /// /// Note: Since removing is done by shifting (by means of copy move assignment) the elements /// in the range in such a way that the elements that are not to be removed appear in the /// beginning of the range doesn't actually remove it from the given container, the user must call /// the container erase function if the user wants to erase the element /// from the container. I.e. in the same they as for remove_if the excess elements /// are left in a valid but possibly moved from state. /// template inline ForwardIterator apply_and_remove(ForwardIterator first, ForwardIterator last, Function function, const T& value) { first = eastl::find(first, last, value); if (first != last) { function(*first); for (auto i = next(first); i != last; ++i) { if (value == *i) { function(*i); continue; } *first = eastl::move(*i); ++first; } } return first; } /// replace_copy /// /// Effects: Assigns to every iterator i in the range [result, result + (last - first)) /// either new_value or *(first + (i - result)) depending on whether the following /// corresponding conditions hold: *(first + (i - result)) == old_value. /// /// Requires: The ranges [first, last) and [result, result + (last - first)) shall not overlap. /// /// Returns: result + (last - first). /// /// Complexity: Exactly 'last - first' applications of the corresponding predicate. /// /// Note: The predicate version of replace_copy is replace_copy_if and not another variation of replace_copy. /// This is because both versions would have the same parameter count and there could be ambiguity. /// template inline OutputIterator replace_copy(InputIterator first, InputIterator last, OutputIterator result, const T& old_value, const T& new_value) { for(; first != last; ++first, ++result) *result = (*first == old_value) ? new_value : *first; return result; } /// replace_copy_if /// /// Effects: Assigns to every iterator i in the range [result, result + (last - first)) /// either new_value or *(first + (i - result)) depending on whether the following /// corresponding conditions hold: predicate(*(first + (i - result))) != false. /// /// Requires: The ranges [first, last) and [result, result+(lastfirst)) shall not overlap. /// /// Returns: result + (last - first). /// /// Complexity: Exactly 'last - first' applications of the corresponding predicate. /// /// Note: The predicate version of replace_copy_if is replace_copy and not another variation of replace_copy_if. /// This is because both versions would have the same parameter count and there could be ambiguity. /// template inline OutputIterator replace_copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate, const T& new_value) { for(; first != last; ++first, ++result) *result = predicate(*first) ? new_value : *first; return result; } // reverse // // We provide helper functions which allow reverse to be implemented more // efficiently for some types of iterators and types. // template inline void reverse_impl(BidirectionalIterator first, BidirectionalIterator last, eastl::bidirectional_iterator_tag) { for(; (first != last) && (first != --last); ++first) // We are not allowed to use operator <, <=, >, >= with a eastl::iter_swap(first, last); // generic (bidirectional or otherwise) iterator. } template inline void reverse_impl(RandomAccessIterator first, RandomAccessIterator last, eastl::random_access_iterator_tag) { if(first != last) { for(; first < --last; ++first) // With a random access iterator, we can use operator < to more efficiently implement eastl::iter_swap(first, last); // this algorithm. A generic iterator doesn't necessarily have an operator < defined. } } /// reverse /// /// Reverses the values within the range [first, last). /// /// Effects: For each nonnegative integer i <= (last - first) / 2, /// applies swap to all pairs of iterators first + i, (last i) - 1. /// /// Complexity: Exactly '(last - first) / 2' swaps. /// template inline void reverse(BidirectionalIterator first, BidirectionalIterator last) { typedef typename eastl::iterator_traits::iterator_category IC; eastl::reverse_impl(first, last, IC()); } /// reverse_copy /// /// Copies the range [first, last) in reverse order to the result. /// /// Effects: Copies the range [first, last) to the range /// [result, result + (last - first)) such that for any nonnegative /// integer i < (last - first) the following assignment takes place: /// *(result + (last - first) - i) = *(first + i) /// /// Requires: The ranges [first, last) and [result, result + (last - first)) /// shall not overlap. /// /// Returns: result + (last - first). That is, returns the end of the output range. /// /// Complexity: Exactly 'last - first' assignments. /// template inline OutputIterator reverse_copy(BidirectionalIterator first, BidirectionalIterator last, OutputIterator result) { for(; first != last; ++result) *result = *--last; return result; } /// search /// /// Search finds a subsequence within the range [first1, last1) that is identical to [first2, last2) /// when compared element-by-element. It returns an iterator pointing to the beginning of that /// subsequence, or else last1 if no such subsequence exists. As such, it is very much like /// the C strstr function, with the primary difference being that strstr uses 0-terminated strings /// whereas search uses an end iterator to specify the end of a string. /// /// Returns: The first iterator i in the range [first1, last1 - (last2 - first2)) such that for /// any nonnegative integer n less than 'last2 - first2' the following corresponding condition holds: /// *(i + n) == *(first2 + n). Returns last1 if no such iterator is found. /// /// Complexity: At most (last1 first1) * (last2 first2) applications of the corresponding predicate. /// template ForwardIterator1 search(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2) { if(first2 != last2) // If there is anything to search for... { // We need to make a special case for a pattern of one element, // as the logic below prevents one element patterns from working. ForwardIterator2 temp2(first2); ++temp2; if(temp2 != last2) // If what we are searching for has a length > 1... { ForwardIterator1 cur1(first1); ForwardIterator2 p2; while(first1 != last1) { // The following loop is the equivalent of eastl::find(first1, last1, *first2) while((first1 != last1) && !(*first1 == *first2)) ++first1; if(first1 != last1) { p2 = temp2; cur1 = first1; if(++cur1 != last1) { while(*cur1 == *p2) { if(++p2 == last2) return first1; if(++cur1 == last1) return last1; } ++first1; continue; } } return last1; } // Fall through to the end. } else return eastl::find(first1, last1, *first2); } return first1; #if 0 /* Another implementation which is a little more simpler but executes a little slower on average. typedef typename eastl::iterator_traits::difference_type difference_type_1; typedef typename eastl::iterator_traits::difference_type difference_type_2; const difference_type_2 d2 = eastl::distance(first2, last2); for(difference_type_1 d1 = eastl::distance(first1, last1); d1 >= d2; ++first1, --d1) { ForwardIterator1 temp1 = first1; for(ForwardIterator2 temp2 = first2; ; ++temp1, ++temp2) { if(temp2 == last2) return first1; if(!(*temp1 == *temp2)) break; } } return last1; */ #endif } /// search /// /// Search finds a subsequence within the range [first1, last1) that is identical to [first2, last2) /// when compared element-by-element. It returns an iterator pointing to the beginning of that /// subsequence, or else last1 if no such subsequence exists. As such, it is very much like /// the C strstr function, with the only difference being that strstr uses 0-terminated strings /// whereas search uses an end iterator to specify the end of a string. /// /// Returns: The first iterator i in the range [first1, last1 - (last2 - first2)) such that for /// any nonnegative integer n less than 'last2 - first2' the following corresponding condition holds: /// predicate(*(i + n), *(first2 + n)) != false. Returns last1 if no such iterator is found. /// /// Complexity: At most (last1 first1) * (last2 first2) applications of the corresponding predicate. /// template ForwardIterator1 search(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2, BinaryPredicate predicate) { typedef typename eastl::iterator_traits::difference_type difference_type_1; typedef typename eastl::iterator_traits::difference_type difference_type_2; difference_type_2 d2 = eastl::distance(first2, last2); if(d2 != 0) { ForwardIterator1 i(first1); eastl::advance(i, d2); for(difference_type_1 d1 = eastl::distance(first1, last1); d1 >= d2; --d1) { if(eastl::equal(first1, i, first2, predicate)) return first1; if(d1 > d2) // To do: Find a way to make the algorithm more elegant. { ++first1; ++i; } } return last1; } return first1; // Just like with strstr, we return first1 if the match string is empty. } // search_n helper functions // template ForwardIterator // Generic implementation. search_n_impl(ForwardIterator first, ForwardIterator last, Size count, const T& value, eastl::forward_iterator_tag) { if(count <= 0) return first; Size d1 = (Size)eastl::distance(first, last); // Should d1 be of type Size, ptrdiff_t, or iterator_traits::difference_type? // The problem with using iterator_traits::difference_type is that if(count > d1) // ForwardIterator may not be a true iterator but instead something like a pointer. return last; for(; d1 >= count; ++first, --d1) { ForwardIterator i(first); for(Size n = 0; n < count; ++n, ++i, --d1) { if(!(*i == value)) // Note that we always express value comparisons in terms of < or ==. goto not_found; } return first; not_found: first = i; } return last; } template inline RandomAccessIterator // Random access iterator implementation. Much faster than generic implementation. search_n_impl(RandomAccessIterator first, RandomAccessIterator last, Size count, const T& value, eastl::random_access_iterator_tag) { if(count <= 0) return first; else if(count == 1) return eastl::find(first, last, value); else if(last > first) { RandomAccessIterator lookAhead; RandomAccessIterator backTrack; Size skipOffset = (count - 1); Size tailSize = (Size)(last - first); Size remainder; Size prevRemainder; for(lookAhead = first + skipOffset; tailSize >= count; lookAhead += count) { tailSize -= count; if(*lookAhead == value) { remainder = skipOffset; for(backTrack = lookAhead - 1; *backTrack == value; --backTrack) { if(--remainder == 0) return (lookAhead - skipOffset); // success } if(remainder <= tailSize) { prevRemainder = remainder; while(*(++lookAhead) == value) { if(--remainder == 0) return (backTrack + 1); // success } tailSize -= (prevRemainder - remainder); } else return last; // failure } // lookAhead here is always pointing to the element of the last mismatch. } } return last; // failure } /// search_n /// /// Returns: The first iterator i in the range [first, last count) such that /// for any nonnegative integer n less than count the following corresponding /// conditions hold: *(i + n) == value, pred(*(i + n),value) != false. /// Returns last if no such iterator is found. /// /// Complexity: At most '(last1 - first1) * count' applications of the corresponding predicate. /// template ForwardIterator search_n(ForwardIterator first, ForwardIterator last, Size count, const T& value) { typedef typename eastl::iterator_traits::iterator_category IC; return eastl::search_n_impl(first, last, count, value, IC()); } /// binary_search /// /// Returns: true if there is an iterator i in the range [first last) that /// satisfies the corresponding conditions: !(*i < value) && !(value < *i). /// /// Complexity: At most 'log(last - first) + 2' comparisons. /// /// Note: The reason binary_search returns bool instead of an iterator is /// that search_n, lower_bound, or equal_range already return an iterator. /// However, there are arguments that binary_search should return an iterator. /// Note that we provide binary_search_i (STL extension) to return an iterator. /// /// To use search_n to find an item, do this: /// iterator i = search_n(begin, end, 1, value); /// To use lower_bound to find an item, do this: /// iterator i = lower_bound(begin, end, value); /// if((i != last) && !(value < *i)) /// /// It turns out that the above lower_bound method is as fast as binary_search /// would be if it returned an iterator. /// template inline bool binary_search(ForwardIterator first, ForwardIterator last, const T& value) { // To do: This can be made slightly faster by not using lower_bound. ForwardIterator i(eastl::lower_bound(first, last, value)); return ((i != last) && !(value < *i)); // Note that we always express value comparisons in terms of < or ==. } /// binary_search /// /// Returns: true if there is an iterator i in the range [first last) that /// satisfies the corresponding conditions: compare(*i, value) == false && /// compare(value, *i) == false. /// /// Complexity: At most 'log(last - first) + 2' comparisons. /// /// Note: See comments above regarding the bool return value of binary_search. /// template inline bool binary_search(ForwardIterator first, ForwardIterator last, const T& value, Compare compare) { // To do: This can be made slightly faster by not using lower_bound. ForwardIterator i(eastl::lower_bound(first, last, value, compare)); return ((i != last) && !compare(value, *i)); } /// binary_search_i /// /// Returns: iterator if there is an iterator i in the range [first last) that /// satisfies the corresponding conditions: !(*i < value) && !(value < *i). /// Returns last if the value is not found. /// /// Complexity: At most 'log(last - first) + 2' comparisons. /// template inline ForwardIterator binary_search_i(ForwardIterator first, ForwardIterator last, const T& value) { // To do: This can be made slightly faster by not using lower_bound. ForwardIterator i(eastl::lower_bound(first, last, value)); if((i != last) && !(value < *i)) // Note that we always express value comparisons in terms of < or ==. return i; return last; } /// binary_search_i /// /// Returns: iterator if there is an iterator i in the range [first last) that /// satisfies the corresponding conditions: !(*i < value) && !(value < *i). /// Returns last if the value is not found. /// /// Complexity: At most 'log(last - first) + 2' comparisons. /// template inline ForwardIterator binary_search_i(ForwardIterator first, ForwardIterator last, const T& value, Compare compare) { // To do: This can be made slightly faster by not using lower_bound. ForwardIterator i(eastl::lower_bound(first, last, value, compare)); if((i != last) && !compare(value, *i)) return i; return last; } /// unique /// /// Given a sorted range, this function removes duplicated items. /// Note that if you have a container then you will probably want /// to call erase on the container with the return value if your /// goal is to remove the duplicated items from the container. /// /// Effects: Eliminates all but the first element from every consecutive /// group of equal elements referred to by the iterator i in the range /// [first, last) for which the following corresponding condition holds: /// *i == *(i - 1). /// /// Returns: The end of the resulting range. /// /// Complexity: If the range (last - first) is not empty, exactly (last - first) /// applications of the corresponding predicate, otherwise no applications of the predicate. /// /// Example usage: /// vector intArray; /// ... /// intArray.erase(unique(intArray.begin(), intArray.end()), intArray.end()); /// template ForwardIterator unique(ForwardIterator first, ForwardIterator last) { first = eastl::adjacent_find(first, last); if(first != last) // We expect that there are duplicated items, else the user wouldn't be calling this function. { ForwardIterator dest(first); for(++first; first != last; ++first) { if(!(*dest == *first)) // Note that we always express value comparisons in terms of < or ==. *++dest = *first; } return ++dest; } return last; } /// unique /// /// Given a sorted range, this function removes duplicated items. /// Note that if you have a container then you will probably want /// to call erase on the container with the return value if your /// goal is to remove the duplicated items from the container. /// /// Effects: Eliminates all but the first element from every consecutive /// group of equal elements referred to by the iterator i in the range /// [first, last) for which the following corresponding condition holds: /// predicate(*i, *(i - 1)) != false. /// /// Returns: The end of the resulting range. /// /// Complexity: If the range (last - first) is not empty, exactly (last - first) /// applications of the corresponding predicate, otherwise no applications of the predicate. /// template ForwardIterator unique(ForwardIterator first, ForwardIterator last, BinaryPredicate predicate) { first = eastl::adjacent_find(first, last, predicate); if(first != last) // We expect that there are duplicated items, else the user wouldn't be calling this function. { ForwardIterator dest(first); for(++first; first != last; ++first) { if(!predicate(*dest, *first)) *++dest = *first; } return ++dest; } return last; } // find_end // // We provide two versions here, one for a bidirectional iterators and one for // regular forward iterators. Given that we are searching backward, it's a bit // more efficient if we can use backwards iteration to implement our search, // though this requires an iterator that can be reversed. // template ForwardIterator1 find_end_impl(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2, eastl::forward_iterator_tag, eastl::forward_iterator_tag) { if(first2 != last2) // We have to do this check because the search algorithm below will return first1 (and not last1) if the first2/last2 range is empty. { for(ForwardIterator1 result(last1); ; ) { const ForwardIterator1 resultNext(eastl::search(first1, last1, first2, last2)); if(resultNext != last1) // If another sequence was found... { first1 = result = resultNext; ++first1; } else return result; } } return last1; } template BidirectionalIterator1 find_end_impl(BidirectionalIterator1 first1, BidirectionalIterator1 last1, BidirectionalIterator2 first2, BidirectionalIterator2 last2, eastl::bidirectional_iterator_tag, eastl::bidirectional_iterator_tag) { typedef eastl::reverse_iterator reverse_iterator1; typedef eastl::reverse_iterator reverse_iterator2; reverse_iterator1 rresult(eastl::search(reverse_iterator1(last1), reverse_iterator1(first1), reverse_iterator2(last2), reverse_iterator2(first2))); if(rresult.base() != first1) // If we found something... { BidirectionalIterator1 result(rresult.base()); eastl::advance(result, -eastl::distance(first2, last2)); // We have an opportunity to optimize this, as the return result; // search function already calculates this distance. } return last1; } /// find_end /// /// Finds the last occurrence of the second sequence in the first sequence. /// As such, this function is much like the C string function strrstr and it /// is also the same as a reversed version of 'search'. It is called find_end /// instead of the possibly more consistent search_end simply because the C++ /// standard algorithms have such naming. /// /// Returns an iterator between first1 and last1 if the sequence is found. /// returns last1 (the end of the first seqence) if the sequence is not found. /// template inline ForwardIterator1 find_end(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2) { typedef typename eastl::iterator_traits::iterator_category IC1; typedef typename eastl::iterator_traits::iterator_category IC2; return eastl::find_end_impl(first1, last1, first2, last2, IC1(), IC2()); } // To consider: Fold the predicate and non-predicate versions of // this algorithm into a single function. template ForwardIterator1 find_end_impl(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2, BinaryPredicate predicate, eastl::forward_iterator_tag, eastl::forward_iterator_tag) { if(first2 != last2) // We have to do this check because the search algorithm below will return first1 (and not last1) if the first2/last2 range is empty. { for(ForwardIterator1 result = last1; ; ) { const ForwardIterator1 resultNext(eastl::search(first1, last1, first2, last2, predicate)); if(resultNext != last1) // If another sequence was found... { first1 = result = resultNext; ++first1; } else return result; } } return last1; } template BidirectionalIterator1 find_end_impl(BidirectionalIterator1 first1, BidirectionalIterator1 last1, BidirectionalIterator2 first2, BidirectionalIterator2 last2, BinaryPredicate predicate, eastl::bidirectional_iterator_tag, eastl::bidirectional_iterator_tag) { typedef eastl::reverse_iterator reverse_iterator1; typedef eastl::reverse_iterator reverse_iterator2; reverse_iterator1 rresult(eastl::search (reverse_iterator1(last1), reverse_iterator1(first1), reverse_iterator2(last2), reverse_iterator2(first2), predicate)); if(rresult.base() != first1) // If we found something... { BidirectionalIterator1 result(rresult.base()); eastl::advance(result, -eastl::distance(first2, last2)); return result; } return last1; } /// find_end /// /// Effects: Finds a subsequence of equal values in a sequence. /// /// Returns: The last iterator i in the range [first1, last1 - (last2 - first2)) /// such that for any nonnegative integer n < (last2 - first2), the following /// corresponding conditions hold: pred(*(i+n),*(first2+n)) != false. Returns /// last1 if no such iterator is found. /// /// Complexity: At most (last2 - first2) * (last1 - first1 - (last2 - first2) + 1) /// applications of the corresponding predicate. /// template inline ForwardIterator1 find_end(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2, BinaryPredicate predicate) { typedef typename eastl::iterator_traits::iterator_category IC1; typedef typename eastl::iterator_traits::iterator_category IC2; return eastl::find_end_impl (first1, last1, first2, last2, predicate, IC1(), IC2()); } /// set_difference /// /// set_difference iterates over both input ranges and copies elements present /// in the first range but not the second to the output range. /// /// Effects: Copies the elements of the range [first1, last1) which are not /// present in the range [first2, last2) to the range beginning at result. /// The elements in the constructed range are sorted. /// /// Requires: The input ranges must be sorted. /// Requires: The output range shall not overlap with either of the original ranges. /// /// Returns: The end of the output range. /// /// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons. /// template OutputIterator set_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result) { while((first1 != last1) && (first2 != last2)) { if(*first1 < *first2) { *result = *first1; ++first1; ++result; } else if(*first2 < *first1) ++first2; else { ++first1; ++first2; } } return eastl::copy(first1, last1, result); } template OutputIterator set_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare compare) { while((first1 != last1) && (first2 != last2)) { if(compare(*first1, *first2)) { EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane. *result = *first1; ++first1; ++result; } else if(compare(*first2, *first1)) { EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane. ++first2; } else { ++first1; ++first2; } } return eastl::copy(first1, last1, result); } /// set_difference_2 /// /// set_difference_2 iterates over both input ranges and copies elements present /// in the first range but not the second to the first output range and copies /// elements present in the second range but not in the first to the second output /// range. /// /// Effects: Copies the elements of the range [first1, last1) which are not /// present in the range [first2, last2) to the first output range beginning at /// result1 AND copies the element of range [first2, last2) which are not present /// in the range [first1, last) to the second output range beginning at result2. /// The elements in the constructed range are sorted. /// /// Requires: The input ranges must be sorted. /// Requires: The output ranges shall not overlap with either of the original ranges. /// /// Returns: Nothing. /// /// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons. /// template void set_difference_2(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result1, OutputIterator result2, Compare compare) { while ((first1 != last1) && (first2 != last2)) { if (compare(*first1, *first2)) { EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane. *result1++ = *first1++; } else if (compare(*first2, *first1)) { EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane. *result2++ = *first2++; } else { ++first1; ++first2; } } eastl::copy(first2, last2, result2); eastl::copy(first1, last1, result1); } /// set_difference_2 /// /// set_difference_2 with the default comparison object is eastl::less<>. /// template void set_difference_2(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result1, OutputIterator result2) { eastl::set_difference_2(first1, last1, first2, last2, result1, result2, eastl::less<>{}); } /// set_symmetric_difference /// /// set_difference iterates over both input ranges and copies elements present /// in the either range but not the other to the output range. /// /// Effects: Copies the elements of the range [first1, last1) which are not /// present in the range [first2, last2), and the elements of the range [first2, last2) /// which are not present in the range [first1, last1) to the range beginning at result. /// The elements in the constructed range are sorted. /// /// Requires: The input ranges must be sorted. /// Requires: The resulting range shall not overlap with either of the original ranges. /// /// Returns: The end of the constructed range. /// /// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons. /// template OutputIterator set_symmetric_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result) { while((first1 != last1) && (first2 != last2)) { if(*first1 < *first2) { *result = *first1; ++first1; ++result; } else if(*first2 < *first1) { *result = *first2; ++first2; ++result; } else { ++first1; ++first2; } } return eastl::copy(first2, last2, eastl::copy(first1, last1, result)); } template OutputIterator set_symmetric_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare compare) { while((first1 != last1) && (first2 != last2)) { if(compare(*first1, *first2)) { EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane. *result = *first1; ++first1; ++result; } else if(compare(*first2, *first1)) { EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane. *result = *first2; ++first2; ++result; } else { ++first1; ++first2; } } return eastl::copy(first2, last2, eastl::copy(first1, last1, result)); } /// set_intersection /// /// set_intersection over both ranges and copies elements present in /// both ranges to the output range. /// /// Effects: Constructs a sorted intersection of the elements from the /// two ranges; that is, the set of elements that are present in both of the ranges. /// /// Requires: The input ranges must be sorted. /// Requires: The resulting range shall not overlap with either of the original ranges. /// /// Returns: The end of the constructed range. /// /// Complexity: At most 2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons. /// /// Note: The copying operation is stable; if an element is present in both ranges, /// the one from the first range is copied. /// template OutputIterator set_intersection(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result) { while((first1 != last1) && (first2 != last2)) { if(*first1 < *first2) ++first1; else if(*first2 < *first1) ++first2; else { *result = *first1; ++first1; ++first2; ++result; } } return result; } template OutputIterator set_intersection(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare compare) { while((first1 != last1) && (first2 != last2)) { if(compare(*first1, *first2)) { EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane. ++first1; } else if(compare(*first2, *first1)) { EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane. ++first2; } else { *result = *first1; ++first1; ++first2; ++result; } } return result; } /// set_union /// /// set_union iterates over both ranges and copies elements present in /// both ranges to the output range. /// /// Effects: Constructs a sorted union of the elements from the two ranges; /// that is, the set of elements that are present in one or both of the ranges. /// /// Requires: The input ranges must be sorted. /// Requires: The resulting range shall not overlap with either of the original ranges. /// /// Returns: The end of the constructed range. /// /// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons. /// /// Note: The copying operation is stable; if an element is present in both ranges, /// the one from the first range is copied. /// template OutputIterator set_union(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result) { while((first1 != last1) && (first2 != last2)) { if(*first1 < *first2) { *result = *first1; ++first1; } else if(*first2 < *first1) { *result = *first2; ++first2; } else { *result = *first1; ++first1; ++first2; } ++result; } return eastl::copy(first2, last2, eastl::copy(first1, last1, result)); } template OutputIterator set_union(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare compare) { while((first1 != last1) && (first2 != last2)) { if(compare(*first1, *first2)) { EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane. *result = *first1; ++first1; } else if(compare(*first2, *first1)) { EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane. *result = *first2; ++first2; } else { *result = *first1; ++first1; ++first2; } ++result; } return eastl::copy(first2, last2, eastl::copy(first1, last1, result)); } /// set_decomposition /// /// set_decomposition iterates over both ranges and copies elements to one of the three /// categories of output ranges. /// /// Effects: Constructs three sorted containers of the elements from the two ranges. /// * OutputIterator1 is elements only in Container1. /// * OutputIterator2 is elements only in Container2. /// * OutputIterator3 is elements that are in both Container1 and Container2. /// /// Requires: The input ranges must be sorted. /// Requires: The resulting ranges shall not overlap with either of the original ranges. /// /// Returns: The end of the constructed range of elements in both Container1 and Container2. /// /// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons. /// template OutputIterator3 set_decomposition(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator1 result1, OutputIterator2 result2, OutputIterator3 result3, Compare compare) { while ((first1 != last1) && (first2 != last2)) { if (compare(*first1, *first2)) { EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane. *result1++ = *first1++; } else if (compare(*first2, *first1)) { EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane. *result2++ = *first2++; } else { *result3++ = *first1++; ++first2; } } eastl::copy(first1, last1, result1); eastl::copy(first2, last2, result2); return result3; } /// set_decomposition /// /// set_decomposition with the default comparison object is eastl::less<>. /// template OutputIterator3 set_decomposition(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator1 result1, OutputIterator2 result2, OutputIterator3 result3) { return eastl::set_decomposition(first1, last1, first2, last2, result1, result2, result3, eastl::less<>{}); } /// includes /// /// Returns true if the sorted range [first2, last2) is a subsequence of the sorted range [first1, last1). /// Note: a subsequence need not be contiguous!. /// If [first1, last1) or [first2, last2) is not sorted with respect to comp, the behavior is undefined. template bool includes(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Compare comp) { for (; first2 != last2; ++first1) { if (first1 == last1 || comp(*first2, *first1)) { return false; } if (!comp(*first1, *first2)) { ++first2; } } return true; } /// includes /// /// Returns true if the sorted range [first2, last2) is a subsequence of the sorted range [first1, last1). /// Note: a subsequence need not be contiguous!. /// If [first1, last1) or [first2, last2) is not sorted with respect to eastl::less, the behavior is undefined. template bool includes(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) { return eastl::includes(first1, last1, first2, last2, eastl::less<>{}); } /// is_permutation /// template bool is_permutation(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2) { typedef typename eastl::iterator_traits::difference_type difference_type; // Skip past any equivalent initial elements. while((first1 != last1) && (*first1 == *first2)) { ++first1; ++first2; } if(first1 != last1) { const difference_type first1Size = eastl::distance(first1, last1); ForwardIterator2 last2 = first2; eastl::advance(last2, first1Size); for(ForwardIterator1 i = first1; i != last1; ++i) { if(i == eastl::find(first1, i, *i)) { const difference_type c = eastl::count(first2, last2, *i); if((c == 0) || (c != eastl::count(i, last1, *i))) return false; } } } return true; } /// is_permutation /// template bool is_permutation(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, BinaryPredicate predicate) { typedef typename eastl::iterator_traits::difference_type difference_type; // Skip past any equivalent initial elements. while((first1 != last1) && predicate(*first1, *first2)) { ++first1; ++first2; } if(first1 != last1) { const difference_type first1Size = eastl::distance(first1, last1); ForwardIterator2 last2 = first2; eastl::advance(last2, first1Size); for(ForwardIterator1 i = first1; i != last1; ++i) { if(i == eastl::find(first1, i, *i, predicate)) { const difference_type c = eastl::count(first2, last2, *i, predicate); if((c == 0) || (c != eastl::count(i, last1, *i, predicate))) return false; } } } return true; } /// next_permutation /// /// mutates the range [first, last) to the next permutation. Returns true if the /// new range is not the final permutation (sorted like the starting permutation). /// Permutations start with a sorted range, and false is returned when next_permutation /// results in the initial sorted range, or if the range has <= 1 element. /// Note that elements are compared by operator < (as usual) and that elements deemed /// equal via this are not rearranged. /// /// http://marknelson.us/2002/03/01/next-permutation/ /// Basically we start with an ordered range and reverse it's order one specifically /// chosen swap and reverse at a time. It happens that this require going through every /// permutation of the range. We use the same variable names as the document above. /// /// To consider: Significantly improved permutation/combination functionality: /// http://home.roadrunner.com/~hinnant/combinations.html /// /// Example usage: /// vector intArray; /// // /// sort(intArray.begin(), intArray.end()); /// do { /// // /// } while(next_permutation(intArray.begin(), intArray.end())); /// template bool next_permutation(BidirectionalIterator first, BidirectionalIterator last, Compare compare) { if(first != last) // If there is anything in the range... { BidirectionalIterator i = last; if(first != --i) // If the range has more than one item... { for(;;) { BidirectionalIterator ii(i), j; if(compare(*--i, *ii)) // Find two consecutive values where the first is less than the second. { j = last; while(!compare(*i, *--j)) // Find the final value that's greater than the first (it may be equal to the second). {} eastl::iter_swap(i, j); // Swap the first and the final. eastl::reverse(ii, last); // Reverse the ranget from second to last. return true; } if(i == first) // There are no two consecutive values where the first is less than the second, meaning the range is in reverse order. The reverse ordered range is always the last permutation. { eastl::reverse(first, last); break; // We are done. } } } } return false; } template bool next_permutation(BidirectionalIterator first, BidirectionalIterator last) { typedef typename eastl::iterator_traits::value_type value_type; return eastl::next_permutation(first, last, eastl::less()); } /// rotate /// /// Effects: For each non-negative integer i < (last - first), places the element from the /// position first + i into position first + (i + (last - middle)) % (last - first). /// /// Returns: first + (last - middle). That is, returns where first went to. /// /// Remarks: This is a left rotate. /// /// Requires: [first,middle) and [middle,last) shall be valid ranges. ForwardIterator shall /// satisfy the requirements of ValueSwappable (17.6.3.2). The type of *first shall satisfy /// the requirements of MoveConstructible (Table 20) and the requirements of MoveAssignable. /// /// Complexity: At most last - first swaps. /// /// Note: While rotate works on ForwardIterators (e.g. slist) and BidirectionalIterators (e.g. list), /// you can get much better performance (O(1) instead of O(n)) with slist and list rotation by /// doing splice operations on those lists instead of calling this rotate function. /// /// http://www.cs.bell-labs.com/cm/cs/pearls/s02b.pdf / http://books.google.com/books?id=kse_7qbWbjsC&pg=PA14&lpg=PA14&dq=Programming+Pearls+flipping+hands /// http://books.google.com/books?id=tjOlkl7ecVQC&pg=PA189&lpg=PA189&dq=stepanov+Elements+of+Programming+rotate /// http://stackoverflow.com/questions/21160875/why-is-stdrotate-so-fast /// /// Strategy: /// - We handle the special case of (middle == first) and (middle == last) no-ops /// up front in the main rotate entry point. /// - There's a basic ForwardIterator implementation (rotate_general_impl) which is /// a fallback implementation that's not as fast as others but works for all cases. /// - There's a slightly better BidirectionalIterator implementation. /// - We have specialized versions for rotating elements that are trivially copyable. /// These versions will use memmove for when we have a RandomAccessIterator. /// - We have a specialized version for rotating by only a single position, as that allows us /// (with any iterator type) to avoid a lot of logic involved with algorithms like "flipping hands" /// and achieve near optimal O(n) behavior. it turns out that rotate-by-one is a common use /// case in practice. /// namespace Internal { template ForwardIterator rotate_general_impl(ForwardIterator first, ForwardIterator middle, ForwardIterator last) { using eastl::swap; ForwardIterator current = middle; do { swap(*first++, *current++); if(first == middle) middle = current; } while(current != last); ForwardIterator result = first; current = middle; while(current != last) { swap(*first++, *current++); if(first == middle) middle = current; else if(current == last) current = middle; } return result; // result points to first + (last - middle). } template ForwardIterator move_rotate_left_by_one(ForwardIterator first, ForwardIterator last) { typedef typename eastl::iterator_traits::value_type value_type; value_type temp(eastl::move(*first)); ForwardIterator result = eastl::move(eastl::next(first), last, first); // Note that while our template type is BidirectionalIterator, if the actual *result = eastl::move(temp); // iterator is a RandomAccessIterator then this move will be a memmove for trivially copyable types. return result; // result points to the final element in the range. } template BidirectionalIterator move_rotate_right_by_one(BidirectionalIterator first, BidirectionalIterator last) { typedef typename eastl::iterator_traits::value_type value_type; BidirectionalIterator beforeLast = eastl::prev(last); value_type temp(eastl::move(*beforeLast)); BidirectionalIterator result = eastl::move_backward(first, beforeLast, last); // Note that while our template type is BidirectionalIterator, if the actual *first = eastl::move(temp); // iterator is a RandomAccessIterator then this move will be a memmove for trivially copyable types. return result; // result points to the first element in the range. } template struct rotate_helper { template static ForwardIterator rotate_impl(ForwardIterator first, ForwardIterator middle, ForwardIterator last) { return Internal::rotate_general_impl(first, middle, last); } }; template <> struct rotate_helper { template static ForwardIterator rotate_impl(ForwardIterator first, ForwardIterator middle, ForwardIterator last) { if(eastl::next(first) == middle) // If moving trivial types by a single element, memcpy is fast for that case. return Internal::move_rotate_left_by_one(first, last); return Internal::rotate_general_impl(first, middle, last); } }; template <> struct rotate_helper { template static BidirectionalIterator rotate_impl(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last) { return Internal::rotate_general_impl(first, middle, last); } // rotate_general_impl outperforms the flipping hands algorithm. /* // Simplest "flipping hands" implementation. Disabled because it's slower on average than rotate_general_impl. template static BidirectionalIterator rotate_impl(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last) { eastl::reverse(first, middle); eastl::reverse(middle, last); eastl::reverse(first, last); return first + (last - middle); // This can be slow for large ranges because operator + and - are O(n). } // Smarter "flipping hands" implementation, but still disabled because benchmarks are showing it to be slower than rotate_general_impl. template static BidirectionalIterator rotate_impl(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last) { // This is the "flipping hands" algorithm. eastl::reverse_impl(first, middle, eastl::bidirectional_iterator_tag()); // Reverse the left side. eastl::reverse_impl(middle, last, eastl::bidirectional_iterator_tag()); // Reverse the right side. // Reverse the entire range. while((first != middle) && (middle != last)) { eastl::iter_swap(first, --last); ++first; } if(first == middle) // Finish reversing the entire range. { eastl::reverse_impl(middle, last, bidirectional_iterator_tag()); return last; } else { eastl::reverse_impl(first, middle, bidirectional_iterator_tag()); return first; } } */ }; template <> struct rotate_helper { template static BidirectionalIterator rotate_impl(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last) { if(eastl::next(first) == middle) // If moving trivial types by a single element, memcpy is fast for that case. return Internal::move_rotate_left_by_one(first, last); if(eastl::next(middle) == last) return Internal::move_rotate_right_by_one(first, last); return Internal::rotate_general_impl(first, middle, last); } }; template inline Integer greatest_common_divisor(Integer x, Integer y) { do { Integer t = (x % y); x = y; y = t; } while(y); return x; } template <> struct rotate_helper { // This is the juggling algorithm, using move operations. // In practice this implementation is about 25% faster than rotate_general_impl. We may want to // consider sticking with just rotate_general_impl and avoid the code generation of this function. template static RandomAccessIterator rotate_impl(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last) { typedef typename iterator_traits::difference_type difference_type; typedef typename iterator_traits::value_type value_type; const difference_type m1 = (middle - first); const difference_type m2 = (last - middle); const difference_type g = Internal::greatest_common_divisor(m1, m2); value_type temp; for(RandomAccessIterator p = first + g; p != first;) { temp = eastl::move(*--p); RandomAccessIterator p1 = p; RandomAccessIterator p2 = p + m1; do { *p1 = eastl::move(*p2); p1 = p2; const difference_type d = (last - p2); if(m1 < d) p2 += m1; else p2 = first + (m1 - d); } while(p2 != p); *p1 = eastl::move(temp); } return first + m2; } }; template <> struct rotate_helper { // Experiments were done which tested the performance of using an intermediate buffer // to do memcpy's to as opposed to executing a swapping algorithm. It turns out this is // actually slower than even rotate_general_impl, partly because the average case involves // memcpy'ing a quarter of the element range twice. Experiments were done with various kinds // of PODs with various element counts. template static RandomAccessIterator rotate_impl(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last) { if(eastl::next(first) == middle) // If moving trivial types by a single element, memcpy is fast for that case. return Internal::move_rotate_left_by_one(first, last); if(eastl::next(middle) == last) return Internal::move_rotate_right_by_one(first, last); if((last - first) < 32) // For small ranges rotate_general_impl is faster. return Internal::rotate_general_impl(first, middle, last); return Internal::rotate_helper::rotate_impl(first, middle, last); } }; } // namespace Internal template ForwardIterator rotate(ForwardIterator first, ForwardIterator middle, ForwardIterator last) { if(middle != first) { if(middle != last) { typedef typename eastl::iterator_traits::iterator_category IC; typedef typename eastl::iterator_traits::value_type value_type; // the implementations for is_trivially_copyable types simply check whether we have a single element to rotate and if so, // defer to either move_rotate_left_by_one or move_rotate_right_by_one, which are optimized for trivially copyable types. // otherwise, use the same implementation as non-trivially copyable types. return Internal::rotate_helper::value>::rotate_impl(first, middle, last); } return first; } return last; } /// rotate_copy /// /// Similar to rotate except writes the output to the OutputIterator and /// returns an OutputIterator to the element past the last element copied /// (i.e. result + (last - first)) /// template OutputIterator rotate_copy(ForwardIterator first, ForwardIterator middle, ForwardIterator last, OutputIterator result) { return eastl::copy(first, middle, eastl::copy(middle, last, result)); } /// clamp /// /// Returns a reference to a clamped value within the range of [lo, hi]. /// /// http://en.cppreference.com/w/cpp/algorithm/clamp /// template EA_CONSTEXPR const T& clamp(const T& v, const T& lo, const T& hi, Compare comp) { EASTL_ASSERT(!comp(hi, lo)); return comp(v, lo) ? lo : comp(hi, v) ? hi : v; } template EA_CONSTEXPR const T& clamp(const T& v, const T& lo, const T& hi) { return eastl::clamp(v, lo, hi, eastl::less<>()); } /// is_partitioned /// /// Returns true if all the elements in the range [first, last) is empty, or is /// partitioned by predicate. Being partitioned means that all elements v for which /// predicate(v) evaluates to true appear before any elements for which predicate(v) /// is false. /// template EA_CONSTEXPR bool is_partitioned(InputIterator first, InputIterator last, UnaryPredicate predicate) { for (; first != last; ++first) { if (!predicate(*first)) { // advance the iterator, we don't need to call the predicate on this item // again in the "false" loop below. ++first; break; } } for (; first != last; ++first) { if (predicate(*first)) { return false; } } return true; } /// partition_point /// /// Precondition: for this function to work correctly the input range [first, last) /// must be partitioned by the predicate. i.e. all values for which predicate(v) is /// true should precede any value in the range for which predicate(v) is false. /// /// Returns: the iterator past the end of the first partition within [first, last) or /// last if all elements satisfy the predicate. /// /// Note: this is a more general version of lower_bound. template EA_CONSTEXPR ForwardIterator partition_point(ForwardIterator first, ForwardIterator last, UnaryPredicate predicate) { // Just binary chop our way to the first one where predicate(x) is false for (auto length = eastl::distance(first, last); 0 < length;) { const auto half = length / 2; const auto middle = eastl::next(first, half); if (predicate(*middle)) { first = eastl::next(middle); length -= (half + 1); } else { length = half; } } return first; } } // namespace eastl #endif // Header include guard ================================================ FILE: include/EASTL/allocator.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #ifndef EASTL_ALLOCATOR_H #define EASTL_ALLOCATOR_H #include #include #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif namespace eastl { /// alloc_flags /// /// Defines allocation flags. /// enum alloc_flags { MEM_TEMP = 0, // Low memory, not necessarily actually temporary. MEM_PERM = 1 // High memory, for things that won't be unloaded. }; /// allocator /// /// In this allocator class, note that it is not templated on any type and /// instead it simply allocates blocks of memory much like the C malloc and /// free functions. It can be thought of as similar to C++ std::allocator. /// The flags parameter has meaning that is specific to the allocation /// /// C++11's std::allocator (20.6.9) doesn't have a move constructor or assignment /// operator. This is possibly because std::allocators are associated with types /// instead of as instances. The potential non-equivalance of C++ std::allocator /// instances has been a source of some acknowledged design problems. /// We don't implement support for move construction or assignment in eastl::allocator, /// but users can define their own allocators which do have move functions and /// the eastl containers are compatible with such allocators (i.e. nothing unexpected /// will happen). /// class EASTL_API allocator { public: EASTL_ALLOCATOR_EXPLICIT allocator(const char* pName = EASTL_NAME_VAL(EASTL_ALLOCATOR_DEFAULT_NAME)); allocator(const allocator& x); allocator(const allocator& x, const char* pName); allocator& operator=(const allocator& x); void* allocate(size_t n, int flags = 0); void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0); void deallocate(void* p, size_t n); const char* get_name() const; void set_name(const char* pName); protected: #if EASTL_NAME_ENABLED const char* mpName; // Debug name, used to track memory. #endif }; bool operator==(const allocator& a, const allocator& b); #if !defined(EA_COMPILER_HAS_THREE_WAY_COMPARISON) bool operator!=(const allocator& a, const allocator& b); #endif /// dummy_allocator /// /// Defines an allocator which does nothing. It returns NULL from allocate calls. /// class EASTL_API dummy_allocator { public: EASTL_ALLOCATOR_EXPLICIT dummy_allocator(const char* = NULL) { } dummy_allocator(const dummy_allocator&) { } dummy_allocator(const dummy_allocator&, const char*) { } dummy_allocator& operator=(const dummy_allocator&) { return *this; } void* allocate(size_t, int = 0) { return NULL; } void* allocate(size_t, size_t, size_t, int = 0) { return NULL; } void deallocate(void*, size_t) { } const char* get_name() const { return ""; } void set_name(const char*) { } }; inline bool operator==(const dummy_allocator&, const dummy_allocator&) { return true; } #if !defined(EA_COMPILER_HAS_THREE_WAY_COMPARISON) inline bool operator!=(const dummy_allocator&, const dummy_allocator&) { return false; } #endif /// Defines a static default allocator which is constant across all types. /// This is different from get_default_allocator, which is is bound at /// compile-time and expected to differ per allocator type. /// Currently this Default Allocator applies only to CoreAllocatorAdapter. /// To consider: This naming of this function is too similar to get_default_allocator /// and instead should be named something like GetStaticDefaultAllocator. EASTL_API allocator* GetDefaultAllocator(); EASTL_API allocator* SetDefaultAllocator(allocator* pAllocator); /// get_default_allocator /// /// This templated function allows the user to implement a default allocator /// retrieval function that any part of EASTL can use. EASTL containers take /// an Allocator parameter which identifies an Allocator class to use. But /// different kinds of allocators have different mechanisms for retrieving /// a default allocator instance, and some don't even intrinsically support /// such functionality. The user can override this get_default_allocator /// function in order to provide the glue between EASTL and whatever their /// system's default allocator happens to be. /// /// Example usage: /// MyAllocatorType* gpSystemAllocator; /// /// MyAllocatorType* get_default_allocator(const MyAllocatorType*) /// { return gpSystemAllocator; } /// template Allocator* get_default_allocator(const Allocator*); EASTLAllocatorType* get_default_allocator(const EASTLAllocatorType*); /// default_allocfreemethod /// /// Implements a default allocfreemethod which uses the default global allocator. /// This version supports only default alignment. /// void* default_allocfreemethod(size_t n, void* pBuffer, void* /*pContext*/); /// allocate_memory /// /// This is a memory allocation dispatching function. /// To do: Make aligned and unaligned specializations. /// Note that to do this we will need to use a class with a static /// function instead of a standalone function like below. /// template void* allocate_memory(Allocator& a, size_t n, size_t alignment, size_t alignmentOffset); } // namespace eastl #ifndef EASTL_USER_DEFINED_ALLOCATOR // If the user hasn't declared that he has defined a different allocator implementation elsewhere... EA_DISABLE_ALL_VC_WARNINGS() #include EA_RESTORE_ALL_VC_WARNINGS() #if !EASTL_DLL // If building a regular library and not building EASTL as a DLL... // It is expected that the application define the following // versions of operator new for the application. Either that or the // user needs to override the implementation of the allocator class. void* operator new[](size_t size, const char* pName, int flags, unsigned debugFlags, const char* file, int line); void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line); #endif namespace eastl { inline allocator::allocator(const char* EASTL_NAME(pName)) { #if EASTL_NAME_ENABLED mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME; #endif } inline allocator::allocator(const allocator& EASTL_NAME(alloc)) { #if EASTL_NAME_ENABLED mpName = alloc.mpName; #endif } inline allocator::allocator(const allocator&, const char* EASTL_NAME(pName)) { #if EASTL_NAME_ENABLED mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME; #endif } inline allocator& allocator::operator=(const allocator& EASTL_NAME(alloc)) { #if EASTL_NAME_ENABLED mpName = alloc.mpName; #endif return *this; } inline const char* allocator::get_name() const { #if EASTL_NAME_ENABLED return mpName; #else return EASTL_ALLOCATOR_DEFAULT_NAME; #endif } inline void allocator::set_name(const char* EASTL_NAME(pName)) { #if EASTL_NAME_ENABLED mpName = pName; #endif } inline void* allocator::allocate(size_t n, int flags) { #if EASTL_NAME_ENABLED #define pName mpName #else #define pName EASTL_ALLOCATOR_DEFAULT_NAME #endif #if EASTL_DLL return allocate(n, EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT, 0, flags); #elif (EASTL_DEBUGPARAMS_LEVEL <= 0) return ::new((char*)0, flags, 0, (char*)0, 0) char[n]; #elif (EASTL_DEBUGPARAMS_LEVEL == 1) return ::new( pName, flags, 0, (char*)0, 0) char[n]; #else return ::new( pName, flags, 0, __FILE__, __LINE__) char[n]; #endif } inline void* allocator::allocate(size_t n, size_t alignment, size_t offset, int flags) { #if EASTL_DLL // We currently have no support for implementing flags when // using the C runtime library operator new function. The user // can use SetDefaultAllocator to override the default allocator. EA_UNUSED(offset); EA_UNUSED(flags); size_t adjustedAlignment = (alignment > EA_PLATFORM_PTR_SIZE) ? alignment : EA_PLATFORM_PTR_SIZE; void* p = new char[n + adjustedAlignment + EA_PLATFORM_PTR_SIZE]; void* pPlusPointerSize = (void*)((uintptr_t)p + EA_PLATFORM_PTR_SIZE); void* pAligned = (void*)(((uintptr_t)pPlusPointerSize + adjustedAlignment - 1) & ~(adjustedAlignment - 1)); void** pStoredPtr = (void**)pAligned - 1; EASTL_ASSERT(pStoredPtr >= p); *(pStoredPtr) = p; EASTL_ASSERT(((size_t)pAligned & ~(alignment - 1)) == (size_t)pAligned); return pAligned; #elif (EASTL_DEBUGPARAMS_LEVEL <= 0) return ::new(alignment, offset, (char*)0, flags, 0, (char*)0, 0) char[n]; #elif (EASTL_DEBUGPARAMS_LEVEL == 1) return ::new(alignment, offset, pName, flags, 0, (char*)0, 0) char[n]; #else return ::new(alignment, offset, pName, flags, 0, __FILE__, __LINE__) char[n]; #endif #undef pName // See above for the definition of this. } inline void allocator::deallocate(void* p, size_t) { #if EASTL_DLL if (p != nullptr) { void* pOriginalAllocation = *((void**)p - 1); delete[](char*)pOriginalAllocation; } #else delete[](char*)p; #endif } inline bool operator==(const allocator&, const allocator&) { return true; // All allocators are considered equal, as they merely use global new/delete. } #if !defined(EA_COMPILER_HAS_THREE_WAY_COMPARISON) inline bool operator!=(const allocator&, const allocator&) { return false; // All allocators are considered equal, as they merely use global new/delete. } #endif } // namespace eastl #endif // EASTL_USER_DEFINED_ALLOCATOR namespace eastl { template inline Allocator* get_default_allocator(const Allocator*) { return NULL; // By default we return NULL; the user must make specialization of this function in order to provide their own implementation. } inline EASTLAllocatorType* get_default_allocator(const EASTLAllocatorType*) { return EASTLAllocatorDefault(); // For the built-in allocator EASTLAllocatorType, we happen to already have a function for returning the default allocator instance, so we provide it. } inline void* default_allocfreemethod(size_t n, void* pBuffer, void* /*pContext*/) { EASTLAllocatorType* const pAllocator = EASTLAllocatorDefault(); if(pBuffer) // If freeing... { EASTLFree(*pAllocator, pBuffer, n); return NULL; // The return value is meaningless for the free. } else // allocating return EASTLAlloc(*pAllocator, n); } /// allocate_memory /// /// This is a memory allocation dispatching function. /// To do: Make aligned and unaligned specializations. /// Note that to do this we will need to use a class with a static /// function instead of a standalone function like below. /// template inline void* allocate_memory(Allocator& a, size_t n, size_t alignment, size_t alignmentOffset) { void *result; if (alignment <= EASTL_ALLOCATOR_MIN_ALIGNMENT) { result = EASTLAlloc(a, n); // Ensure the result is correctly aligned. An assertion likely indicates a mismatch between EASTL_ALLOCATOR_MIN_ALIGNMENT and the minimum alignment // of EASTLAlloc. If there is a mismatch it may be necessary to define EASTL_ALLOCATOR_MIN_ALIGNMENT to be the minimum alignment of EASTLAlloc, or // to increase the alignment of EASTLAlloc to match EASTL_ALLOCATOR_MIN_ALIGNMENT. EASTL_ASSERT((reinterpret_cast(result)& ~(alignment - 1)) == reinterpret_cast(result)); } else { result = EASTLAllocAligned(a, n, alignment, alignmentOffset); // Ensure the result is correctly aligned. An assertion here may indicate a bug in the allocator. auto resultMinusOffset = (char*)result - alignmentOffset; EA_UNUSED(resultMinusOffset); EASTL_ASSERT((reinterpret_cast(resultMinusOffset)& ~(alignment - 1)) == reinterpret_cast(resultMinusOffset)); } return result; } } #endif // Header include guard ================================================ FILE: include/EASTL/allocator_malloc.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #ifndef EASTL_ALLOCATOR_MALLOC_H #define EASTL_ALLOCATOR_MALLOC_H #include #include #include // EASTL_ALIGNED_MALLOC_AVAILABLE // // Identifies if the standard library provides a built-in aligned version of malloc. // Defined as 0 or 1, depending on the standard library or platform availability. // None of the viable C functions provides for an aligned malloc with offset, so we // don't consider that supported in any case. // // Options for aligned allocations: // C11 aligned_alloc http://linux.die.net/man/3/aligned_alloc // glibc memalign http://linux.die.net/man/3/posix_memalign // Posix posix_memalign http://pubs.opengroup.org/onlinepubs/000095399/functions/posix_memalign.html // VC++ _aligned_malloc http://msdn.microsoft.com/en-us/library/8z34s9c6%28VS.80%29.aspx This is not suitable, since it has a limitation that you need to free via _aligned_free. // #if !defined EASTL_ALIGNED_MALLOC_AVAILABLE #if defined(EA_PLATFORM_POSIX) && !defined(EA_PLATFORM_APPLE) // memalign is more consistently available than posix_memalign, though its location isn't consistent across // platforms and compiler libraries. Typically it's declared in one of three headers: stdlib.h, malloc.h, or malloc/malloc.h #include // memalign, posix_memalign. #define EASTL_ALIGNED_MALLOC_AVAILABLE 1 #if EA_HAS_INCLUDE_AVAILABLE #if EA_HAS_INCLUDE() #include #elif EA_HAS_INCLUDE() #include #endif #elif defined(EA_PLATFORM_BSD) #include #elif defined(__clang__) #if __has_include() #include #elif __has_include() #include #endif #else #include #endif #else #define EASTL_ALIGNED_MALLOC_AVAILABLE 0 #endif #endif namespace eastl { /////////////////////////////////////////////////////////////////////////////// // allocator_malloc // // Implements an EASTL allocator that uses malloc/free as opposed to // new/delete or PPMalloc Malloc/Free. // // Example usage: // vector intVector; // class allocator_malloc { public: allocator_malloc(const char* = NULL) { } allocator_malloc(const allocator_malloc&) { } allocator_malloc(const allocator_malloc&, const char*) { } allocator_malloc& operator=(const allocator_malloc&) { return *this; } bool operator==(const allocator_malloc&) { return true; } bool operator!=(const allocator_malloc&) { return false; } void* allocate(size_t n, int /*flags*/ = 0) { return malloc(n); } void* allocate(size_t n, size_t alignment, size_t alignmentOffset, int /*flags*/ = 0) { #if EASTL_ALIGNED_MALLOC_AVAILABLE if((alignmentOffset % alignment) == 0) // We check for (offset % alignmnent == 0) instead of (offset == 0) because any block which is aligned on e.g. 64 also is aligned at an offset of 64 by definition. return memalign(alignment, n); // memalign is more consistently available than posix_memalign. #else if((alignment <= EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT) && ((alignmentOffset % alignment) == 0)) return malloc(n); #endif return NULL; } void deallocate(void* p, size_t /*n*/) { free(p); } const char* get_name() const { return "allocator_malloc"; } void set_name(const char*) { } }; } // namespace eastl #endif // Header include guard ================================================ FILE: include/EASTL/any.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // This file implements the eastl::any which is part of the C++ standard STL // library specification. // // eastl::any is a type-safe container for single values of any type. Our // implementation makes use of the "small local buffer" optimization to avoid // unnecessary dynamic memory allocation if the specified type is eligible to // be stored in its local buffer. The user type must satisfy the size // requirements and must be no-throw move-constructible to qualify for the local // buffer optimization. // // To consider: Implement a fixed_any variant to allow users to customize // the size of the "small local buffer" optimization. // // http://en.cppreference.com/w/cpp/utility/any /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_ANY_H #define EASTL_ANY_H #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif #include #include #if EASTL_RTTI_ENABLED #include #endif #if EASTL_EXCEPTIONS_ENABLED #include #endif namespace eastl { /////////////////////////////////////////////////////////////////////////////// // bad_any_cast // // The type thrown by any_cast on failure. // // http://en.cppreference.com/w/cpp/utility/any/bad_any_cast // #if EASTL_EXCEPTIONS_ENABLED struct bad_cast : std::exception { const char* what() const EA_NOEXCEPT EA_OVERRIDE { return "bad cast"; } }; struct bad_any_cast : public bad_cast { const char* what() const EA_NOEXCEPT EA_OVERRIDE { return "bad_any_cast"; } }; #endif namespace Internal { // utility to switch between exceptions and asserts inline void DoBadAnyCast() { #if EASTL_EXCEPTIONS_ENABLED throw bad_any_cast(); #else EASTL_ASSERT_MSG(false, "bad_any_cast\n"); // NOTE(rparolin): CRASH! // You crashed here because you requested a type that was not contained in the object. // We choose to intentionally crash here instead of returning invalid data to the calling // code which could cause hard to track down bugs. *((volatile int*)0) = 0xDEADC0DE; #endif } template void* DefaultConstruct(Args&&... args) { auto* pMem = EASTLAllocatorDefault()->allocate(sizeof(T), alignof(T), 0); return ::new(pMem) T(eastl::forward(args)...); } template void DefaultDestroy(T* p) { p->~T(); EASTLAllocatorDefault()->deallocate(static_cast(p), sizeof(T)); } } /////////////////////////////////////////////////////////////////////////////// // 20.7.3, class any // class any { ////////////////////////////////////////////////////////////////////////////////////////// // storage_operation // // operations supported by the storage handler // enum class storage_operation { GET, DESTROY, COPY, MOVE, TYPE_INFO }; ////////////////////////////////////////////////////////////////////////////////////////// // storage // // the underlying storage type which enables the switching between objects stored in // the heap and objects stored within the any type. // union storage { typedef aligned_storage_t<4 * sizeof(void*), alignment_of::value> internal_storage_t; void* external_storage = nullptr; internal_storage_t internal_storage; }; ////////////////////////////////////////////////////////////////////////////////////////// // use_internal_storage // // determines when the "local buffer optimization" is used // template using use_internal_storage = bool_constant < is_nothrow_move_constructible::value && (sizeof(T) <= sizeof(storage)) && (alignment_of::value % alignment_of::value == 0) >; ////////////////////////////////////////////////////////////////////////////////////////// // non-member friend functions // template friend const ValueType* any_cast(const any* pAny) EA_NOEXCEPT; template friend ValueType* any_cast(any* pAny) EA_NOEXCEPT; template friend ValueType any_cast(const any& operand); template friend ValueType any_cast(any& operand); template friend ValueType any_cast(any&& operand); //Adding Unsafe any cast operations template friend const ValueType* unsafe_any_cast(const any* pAny) EA_NOEXCEPT; template friend ValueType* unsafe_any_cast(any* pAny) EA_NOEXCEPT; ////////////////////////////////////////////////////////////////////////////////////////// // internal storage handler // template struct storage_handler_internal { template static void construct(storage& s, V&& v) { ::new(&s.internal_storage) T(eastl::forward(v)); } template static void construct_inplace(storage& s, Args... args) { ::new(&s.internal_storage) T(eastl::forward(args)...); } template static void construct_inplace(storage& s, std::initializer_list il, Args&&... args) { ::new(&s.internal_storage) NT(il, eastl::forward(args)...); } static inline void destroy(any& refAny) { T& t = *static_cast(static_cast(&refAny.m_storage.internal_storage)); EA_UNUSED(t); t.~T(); refAny.m_handler = nullptr; } static void* get(const any* pThis) { EASTL_ASSERT(pThis); return (void*)(&pThis->m_storage.internal_storage); } static void* handler_func(storage_operation op, const any* pThis, any* pOther) { switch (op) { case storage_operation::GET: { return get(pThis); } break; case storage_operation::DESTROY: { EASTL_ASSERT(pThis); destroy(const_cast(*pThis)); } break; case storage_operation::COPY: { EASTL_ASSERT(pThis); EASTL_ASSERT(pOther); construct(pOther->m_storage, *(T*)(&pThis->m_storage.internal_storage)); } break; case storage_operation::MOVE: { EASTL_ASSERT(pThis); EASTL_ASSERT(pOther); construct(pOther->m_storage, eastl::move(*(T*)(&pThis->m_storage.internal_storage))); destroy(const_cast(*pThis)); } break; case storage_operation::TYPE_INFO: { #if EASTL_RTTI_ENABLED return (void*)&typeid(T); #endif } break; default: { EASTL_ASSERT_MSG(false, "unknown storage operation\n"); } break; }; return nullptr; } }; ////////////////////////////////////////////////////////////////////////////////////////// // external storage handler // template struct storage_handler_external { template static inline void construct(storage& s, V&& v) { s.external_storage = Internal::DefaultConstruct(eastl::forward(v)); } template static inline void construct_inplace(storage& s, Args... args) { s.external_storage = Internal::DefaultConstruct(eastl::forward(args)...); } template static inline void construct_inplace(storage& s, std::initializer_list il, Args&&... args) { s.external_storage = Internal::DefaultConstruct(il, eastl::forward(args)...); } static inline void destroy(any& refAny) { Internal::DefaultDestroy(static_cast(refAny.m_storage.external_storage)); refAny.m_handler = nullptr; } static void* get(const any* pThis) { EASTL_ASSERT(pThis); EASTL_ASSERT(pThis->m_storage.external_storage); return static_cast(pThis->m_storage.external_storage); } static void* handler_func(storage_operation op, const any* pThis, any* pOther) { switch (op) { case storage_operation::GET: { return get(pThis); } break; case storage_operation::DESTROY: { EASTL_ASSERT(pThis); destroy(*const_cast(pThis)); } break; case storage_operation::COPY: { EASTL_ASSERT(pThis); EASTL_ASSERT(pOther); construct(pOther->m_storage, *static_cast(pThis->m_storage.external_storage)); } break; case storage_operation::MOVE: { EASTL_ASSERT(pThis); EASTL_ASSERT(pOther); construct(pOther->m_storage, eastl::move(*(T*)(pThis->m_storage.external_storage))); destroy(const_cast(*pThis)); } break; case storage_operation::TYPE_INFO: { #if EASTL_RTTI_ENABLED return (void*)&typeid(T); #endif } break; default: { EASTL_ASSERT_MSG(false, "unknown storage operation\n"); } break; }; return nullptr; } }; ////////////////////////////////////////////////////////////////////////////////////////// // storage_handler_ptr // // defines the function signature of the storage handler that both the internal and // external storage handlers must implement to retrieve the underlying type of the any // object. // using storage_handler_ptr = void* (*)(storage_operation, const any*, any*); ////////////////////////////////////////////////////////////////////////////////////////// // storage_handler // // based on the specified type T we select the appropriate underlying storage handler // based on the 'use_internal_storage' trait. // template using storage_handler = typename conditional::value, storage_handler_internal, storage_handler_external>::type; ////////////////////////////////////////////////////////////////////////////////////////// // data layout // storage m_storage; storage_handler_ptr m_handler; public: #ifndef EA_COMPILER_GNUC // TODO(rparolin): renable constexpr for GCC EA_CONSTEXPR #endif any() EA_NOEXCEPT : m_storage(), m_handler(nullptr) {} any(const any& other) : m_handler(nullptr) { if (other.m_handler) { // NOTE(rparolin): You can not simply copy the underlying // storage because it could hold a pointer to an object on the // heap which breaks the copy semantics of the language. other.m_handler(storage_operation::COPY, &other, this); m_handler = other.m_handler; } } any(any&& other) EA_NOEXCEPT : m_handler(nullptr) { if(other.m_handler) { // NOTE(rparolin): You can not simply move the underlying // storage because because the storage class has effectively // type erased user type so we have to defer to the handler // function to get the type back and pass on the move request. m_handler = eastl::move(other.m_handler); other.m_handler(storage_operation::MOVE, &other, this); } } ~any() { reset(); } template any(ValueType&& value, typename eastl::enable_if::type, any>::value>::type* = 0) { typedef decay_t DecayedValueType; static_assert(is_copy_constructible::value, "ValueType must be copy-constructible"); storage_handler::construct(m_storage, eastl::forward(value)); m_handler = &storage_handler::handler_func; } template explicit any(in_place_type_t, Args&&... args) { typedef storage_handler> StorageHandlerT; static_assert(eastl::is_constructible::value, "T must be constructible with Args..."); StorageHandlerT::construct_inplace(m_storage, eastl::forward(args)...); m_handler = &StorageHandlerT::handler_func; } template explicit any(in_place_type_t, std::initializer_list il, Args&&... args, typename eastl::enable_if&, Args...>::value, void>::type* = 0) { typedef storage_handler> StorageHandlerT; StorageHandlerT::construct_inplace(m_storage, il, eastl::forward(args)...); m_handler = &StorageHandlerT::handler_func; } // 20.7.3.2, assignments template any& operator=(ValueType&& value) { static_assert(is_copy_constructible>::value, "ValueType must be copy-constructible"); any(eastl::forward(value)).swap(*this); return *this; } any& operator=(const any& other) { any(other).swap(*this); return *this; } any& operator=(any&& other) EA_NOEXCEPT { any(eastl::move(other)).swap(*this); return *this; } // 20.7.3.3, modifiers #if EASTL_VARIADIC_TEMPLATES_ENABLED template typename eastl::enable_if, Args...> && eastl::is_copy_constructible_v>, eastl::decay_t&>::type emplace(Args&&... args) { typedef storage_handler> StorageHandlerT; reset(); StorageHandlerT::construct_inplace(m_storage, eastl::forward(args)...); m_handler = &StorageHandlerT::handler_func; return *static_cast*>(StorageHandlerT::get(this)); } template typename eastl::enable_if, std::initializer_list&, Args...> && eastl::is_copy_constructible_v>, eastl::decay_t&>::type emplace(std::initializer_list il, Args&&... args) { typedef storage_handler> StorageHandlerT; reset(); StorageHandlerT::construct_inplace(m_storage, il, eastl::forward(args)...); m_handler = &StorageHandlerT::handler_func; return *static_cast*>(StorageHandlerT::get(this)); } #endif void reset() EA_NOEXCEPT { if(m_handler) m_handler(storage_operation::DESTROY, this, nullptr); } void swap(any& other) EA_NOEXCEPT { if(this == &other) return; if(m_handler && other.m_handler) { any tmp; tmp.m_handler = other.m_handler; other.m_handler(storage_operation::MOVE, &other, &tmp); other.m_handler = m_handler; m_handler(storage_operation::MOVE, this, &other); m_handler = tmp.m_handler; tmp.m_handler(storage_operation::MOVE, &tmp, this); } else if (m_handler == nullptr && other.m_handler) { eastl::swap(m_handler, other.m_handler); m_handler(storage_operation::MOVE, &other, this); } else if(m_handler && other.m_handler == nullptr) { eastl::swap(m_handler, other.m_handler); other.m_handler(storage_operation::MOVE, this, &other); } //else if (m_handler == nullptr && other.m_handler == nullptr) //{ // // nothing to swap //} } // 20.7.3.4, observers bool has_value() const EA_NOEXCEPT { return m_handler != nullptr; } #if EASTL_RTTI_ENABLED inline const std::type_info& type() const EA_NOEXCEPT { if(m_handler) { auto* pTypeInfo = m_handler(storage_operation::TYPE_INFO, this, nullptr); return *static_cast(pTypeInfo); } else { return typeid(void); } } #endif }; ////////////////////////////////////////////////////////////////////////////////////////// // 20.7.4, non-member functions // inline void swap(any& rhs, any& lhs) EA_NOEXCEPT { rhs.swap(lhs); } ////////////////////////////////////////////////////////////////////////////////////////// // 20.7.4, The non-member any_cast functions provide type-safe access to the contained object. // template inline ValueType any_cast(const any& operand) { static_assert(eastl::is_reference::value || eastl::is_copy_constructible::value, "ValueType must be a reference or copy constructible"); auto* p = any_cast::type>::type>(&operand); if(p == nullptr) Internal::DoBadAnyCast(); return *p; } template inline ValueType any_cast(any& operand) { static_assert(eastl::is_reference::value || eastl::is_copy_constructible::value, "ValueType must be a reference or copy constructible"); auto* p = any_cast::type>(&operand); if(p == nullptr) Internal::DoBadAnyCast(); return *p; } template inline ValueType any_cast(any&& operand) { static_assert(eastl::is_reference::value || eastl::is_copy_constructible::value, "ValueType must be a reference or copy constructible"); auto* p = any_cast::type>(&operand); if (p == nullptr) Internal::DoBadAnyCast(); return *p; } // NOTE(rparolin): The runtime type check was commented out because in DLL builds the templated function pointer // value will be different -- completely breaking the validation mechanism. Due to the fact that eastl::any uses // type erasure we can't refresh (on copy/move) the cached function pointer to the internal handler function because // we don't statically know the type. template inline const ValueType* any_cast(const any* pAny) EA_NOEXCEPT { return (pAny && pAny->m_handler EASTL_IF_NOT_DLL(== &any::storage_handler>::handler_func) #if EASTL_RTTI_ENABLED && pAny->type() == typeid(typename remove_reference::type) #endif ) ? static_cast(pAny->m_handler(any::storage_operation::GET, pAny, nullptr)) : nullptr; } template inline ValueType* any_cast(any* pAny) EA_NOEXCEPT { return (pAny && pAny->m_handler EASTL_IF_NOT_DLL(== &any::storage_handler>::handler_func) #if EASTL_RTTI_ENABLED && pAny->type() == typeid(typename remove_reference::type) #endif ) ? static_cast(pAny->m_handler(any::storage_operation::GET, pAny, nullptr)) : nullptr; } //Unsafe operations - use with caution template inline const ValueType* unsafe_any_cast(const any* pAny) EA_NOEXCEPT { return unsafe_any_cast(const_cast(pAny)); } template inline ValueType* unsafe_any_cast(any* pAny) EA_NOEXCEPT { return static_cast(pAny->m_handler(any::storage_operation::GET, pAny, nullptr)); } ////////////////////////////////////////////////////////////////////////////////////////// // make_any // #if EASTL_VARIADIC_TEMPLATES_ENABLED template inline any make_any(Args&&... args) { return any(eastl::in_place_type, eastl::forward(args)...); } template inline any make_any(std::initializer_list il, Args&&... args) { return any(eastl::in_place_type, il, eastl::forward(args)...); } #endif } // namespace eastl #endif // EASTL_ANY_H ================================================ FILE: include/EASTL/array.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Implements a templated array class as per the C++ standard TR1 (technical // report 1, which is a list of proposed C++ library amendments). // The primary distinctions between this array and TR1 array are: // - array::size_type is defined as eastl_size_t instead of size_t in order // to save memory and run faster on 64 bit systems. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_ARRAY_H #define EASTL_ARRAY_H #include #include #include #include #include #include #if EASTL_EXCEPTIONS_ENABLED EA_DISABLE_ALL_VC_WARNINGS() #include // std::out_of_range, std::length_error. EA_RESTORE_ALL_VC_WARNINGS() #endif // 4512/4626 - 'class' : assignment operator could not be generated. // This disabling would best be put elsewhere. EA_DISABLE_VC_WARNING(4512 4626); #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif namespace eastl { /////////////////////////////////////////////////////////////////////// /// array /// /// Implements a templated array class as per the C++ standard TR1. /// This class allows you to use a built-in C style array like an STL vector. /// It does not let you change its size, as it is just like a C built-in array. /// Our implementation here strives to remove function call nesting, as that /// makes it hard for us to profile debug builds due to function call overhead. /// Note that this is intentionally a struct with public data, as per the /// C++ standard update proposal requirements. /// /// Example usage: /// array a = { { 0, 1, 2, 3, 4 } }; // Strict compilers such as GCC require the double brackets. /// a[2] = 4; /// for(array::iterator i = a.begin(); i < a.end(); ++i) /// *i = 0; /// #if EA_IS_ENABLED(EA_DEPRECATIONS_FOR_2025_OCT) template #else template #endif struct array { public: typedef array this_type; typedef T value_type; typedef value_type& reference; typedef const value_type& const_reference; typedef value_type* iterator; typedef const value_type* const_iterator; typedef eastl::reverse_iterator reverse_iterator; typedef eastl::reverse_iterator const_reverse_iterator; typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. typedef ptrdiff_t difference_type; // Note that the member data is intentionally public. // This allows for aggregate initialization of the // object (e.g. array a = { 0, 3, 2, 4 }; ) // do not use this member directly (use data() instead). value_type mValue[N]; // We intentionally provide no constructor, destructor, or assignment operator. void fill(const value_type& value); // Unlike the swap function for other containers, array::swap takes linear time, // may exit via an exception, and does not cause iterators to become associated with the other container. void swap(this_type& x) EA_NOEXCEPT_IF(eastl::is_nothrow_swappable::value); EA_CPP14_CONSTEXPR iterator begin() EA_NOEXCEPT; EA_CPP14_CONSTEXPR const_iterator begin() const EA_NOEXCEPT; EA_CPP14_CONSTEXPR const_iterator cbegin() const EA_NOEXCEPT; EA_CPP14_CONSTEXPR iterator end() EA_NOEXCEPT; EA_CPP14_CONSTEXPR const_iterator end() const EA_NOEXCEPT; EA_CPP14_CONSTEXPR const_iterator cend() const EA_NOEXCEPT; EA_CPP14_CONSTEXPR reverse_iterator rbegin() EA_NOEXCEPT; EA_CPP14_CONSTEXPR const_reverse_iterator rbegin() const EA_NOEXCEPT; EA_CPP14_CONSTEXPR const_reverse_iterator crbegin() const EA_NOEXCEPT; EA_CPP14_CONSTEXPR reverse_iterator rend() EA_NOEXCEPT; EA_CPP14_CONSTEXPR const_reverse_iterator rend() const EA_NOEXCEPT; EA_CPP14_CONSTEXPR const_reverse_iterator crend() const EA_NOEXCEPT; EA_CPP14_CONSTEXPR bool empty() const EA_NOEXCEPT; EA_CPP14_CONSTEXPR size_type size() const EA_NOEXCEPT; EA_CPP14_CONSTEXPR size_type max_size() const EA_NOEXCEPT; EA_CPP14_CONSTEXPR T* data() EA_NOEXCEPT; EA_CPP14_CONSTEXPR const T* data() const EA_NOEXCEPT; EA_CPP14_CONSTEXPR reference operator[](size_type i); EA_CPP14_CONSTEXPR const_reference operator[](size_type i) const; EA_CPP14_CONSTEXPR const_reference at(size_type i) const; EA_CPP14_CONSTEXPR reference at(size_type i); EA_CPP14_CONSTEXPR reference front(); EA_CPP14_CONSTEXPR const_reference front() const; EA_CPP14_CONSTEXPR reference back(); EA_CPP14_CONSTEXPR const_reference back() const; bool validate() const; int validate_iterator(const_iterator i) const; }; // class array // declaring a C-style array of size 0 is not valid C++. // thus, we have to declare this partial specialization: template struct array { public: typedef array this_type; typedef T value_type; typedef value_type& reference; typedef const value_type& const_reference; typedef value_type* iterator; typedef const value_type* const_iterator; typedef eastl::reverse_iterator reverse_iterator; typedef eastl::reverse_iterator const_reverse_iterator; typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. typedef ptrdiff_t difference_type; // We intentionally provide no constructor, destructor, or assignment operator. void fill(const value_type&) {} // Unlike the swap function for other containers, array::swap takes linear time, // may exit via an exception, and does not cause iterators to become associated with the other container. void swap(this_type&) EA_NOEXCEPT {} EA_CPP14_CONSTEXPR iterator begin() EA_NOEXCEPT { return nullptr; } EA_CPP14_CONSTEXPR const_iterator begin() const EA_NOEXCEPT { return nullptr; } EA_CPP14_CONSTEXPR const_iterator cbegin() const EA_NOEXCEPT { return nullptr; } EA_CPP14_CONSTEXPR iterator end() EA_NOEXCEPT { return nullptr; } EA_CPP14_CONSTEXPR const_iterator end() const EA_NOEXCEPT { return nullptr; } EA_CPP14_CONSTEXPR const_iterator cend() const EA_NOEXCEPT { return nullptr; } EA_CPP14_CONSTEXPR reverse_iterator rbegin() EA_NOEXCEPT { return reverse_iterator(nullptr); } EA_CPP14_CONSTEXPR const_reverse_iterator rbegin() const EA_NOEXCEPT { return const_reverse_iterator(nullptr); } EA_CPP14_CONSTEXPR const_reverse_iterator crbegin() const EA_NOEXCEPT { return const_reverse_iterator(nullptr); } EA_CPP14_CONSTEXPR reverse_iterator rend() EA_NOEXCEPT { return reverse_iterator(nullptr); } EA_CPP14_CONSTEXPR const_reverse_iterator rend() const EA_NOEXCEPT { return const_reverse_iterator(nullptr); } EA_CPP14_CONSTEXPR const_reverse_iterator crend() const EA_NOEXCEPT { return const_reverse_iterator(nullptr); } EA_CPP14_CONSTEXPR bool empty() const EA_NOEXCEPT { return true; } EA_CPP14_CONSTEXPR size_type size() const EA_NOEXCEPT { return 0; } EA_CPP14_CONSTEXPR size_type max_size() const EA_NOEXCEPT { return 0; } EA_CPP14_CONSTEXPR T* data() EA_NOEXCEPT { return nullptr; } EA_CPP14_CONSTEXPR const T* data() const EA_NOEXCEPT { return nullptr; } EA_CPP14_CONSTEXPR reference operator[](size_type) { return *data(); } EA_CPP14_CONSTEXPR const_reference operator[](size_type) const { return *data(); } EA_DISABLE_VC_WARNING(4702); // unreachable code EA_CPP14_CONSTEXPR const_reference at(size_type) const { #if EASTL_EXCEPTIONS_ENABLED throw std::out_of_range("array::at -- out of range"); #elif EASTL_ASSERT_ENABLED EASTL_FAIL_MSG("array::at -- out of range"); #endif return *data(); } EA_RESTORE_VC_WARNING(); EA_DISABLE_VC_WARNING(4702); // unreachable code EA_CPP14_CONSTEXPR reference at(size_type) { #if EASTL_EXCEPTIONS_ENABLED throw std::out_of_range("array::at -- out of range"); #elif EASTL_ASSERT_ENABLED EASTL_FAIL_MSG("array::at -- out of range"); #endif return *data(); } EA_RESTORE_VC_WARNING(); EA_CPP14_CONSTEXPR reference front() { return *data(); } EA_CPP14_CONSTEXPR const_reference front() const { return *data(); } EA_CPP14_CONSTEXPR reference back() { return *data(); } EA_CPP14_CONSTEXPR const_reference back() const { return *data(); } bool validate() const { return true; } int validate_iterator(const_iterator) const { return isf_none; } }; // class array /////////////////////////////////////////////////////////////////////////// // template deduction guides /////////////////////////////////////////////////////////////////////////// #ifdef __cpp_deduction_guides template array(T, U...) -> array; #endif /////////////////////////////////////////////////////////////////////// // array /////////////////////////////////////////////////////////////////////// template inline void array::fill(const value_type& value) { eastl::fill_n(&mValue[0], N, value); } template inline void array::swap(this_type& x) EA_NOEXCEPT_IF(eastl::is_nothrow_swappable::value) { eastl::swap_ranges(&mValue[0], &mValue[N], &x.mValue[0]); } template EA_CPP14_CONSTEXPR inline typename array::iterator array::begin() EA_NOEXCEPT { return &mValue[0]; } template EA_CPP14_CONSTEXPR inline typename array::const_iterator array::begin() const EA_NOEXCEPT { return &mValue[0]; } template EA_CPP14_CONSTEXPR inline typename array::const_iterator array::cbegin() const EA_NOEXCEPT { return &mValue[0]; } template EA_CPP14_CONSTEXPR inline typename array::iterator array::end() EA_NOEXCEPT { return &mValue[N]; } template EA_CPP14_CONSTEXPR inline typename array::const_iterator array::end() const EA_NOEXCEPT { return &mValue[N]; } template EA_CPP14_CONSTEXPR inline typename array::const_iterator array::cend() const EA_NOEXCEPT { return &mValue[N]; } template EA_CPP14_CONSTEXPR inline typename array::reverse_iterator array::rbegin() EA_NOEXCEPT { return reverse_iterator(&mValue[N]); } template EA_CPP14_CONSTEXPR inline typename array::const_reverse_iterator array::rbegin() const EA_NOEXCEPT { return const_reverse_iterator(&mValue[N]); } template EA_CPP14_CONSTEXPR inline typename array::const_reverse_iterator array::crbegin() const EA_NOEXCEPT { return const_reverse_iterator(&mValue[N]); } template EA_CPP14_CONSTEXPR inline typename array::reverse_iterator array::rend() EA_NOEXCEPT { return reverse_iterator(&mValue[0]); } template EA_CPP14_CONSTEXPR inline typename array::const_reverse_iterator array::rend() const EA_NOEXCEPT { return const_reverse_iterator(static_cast(&mValue[0])); } template EA_CPP14_CONSTEXPR inline typename array::const_reverse_iterator array::crend() const EA_NOEXCEPT { return const_reverse_iterator(static_cast(&mValue[0])); } template EA_CPP14_CONSTEXPR inline typename array::size_type array::size() const EA_NOEXCEPT { return (size_type)N; } template EA_CPP14_CONSTEXPR inline typename array::size_type array::max_size() const EA_NOEXCEPT { return (size_type)N; } template EA_CPP14_CONSTEXPR inline bool array::empty() const EA_NOEXCEPT { return (N == 0); } template EA_CPP14_CONSTEXPR inline typename array::reference array::operator[](size_type i) { #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED if (EASTL_UNLIKELY(i >= N)) EASTL_FAIL_MSG("array::operator[] -- out of range"); #elif EASTL_ASSERT_ENABLED // We allow taking a reference to arr[0] if (EASTL_UNLIKELY((i != 0) && i >= N)) EASTL_FAIL_MSG("array::operator[] -- out of range"); #endif return mValue[i]; } template EA_CPP14_CONSTEXPR inline typename array::const_reference array::operator[](size_type i) const { #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED if (EASTL_UNLIKELY(i >= N)) EASTL_FAIL_MSG("array::operator[] -- out of range"); #elif EASTL_ASSERT_ENABLED // We allow taking a reference to arr[0] if (EASTL_UNLIKELY((i != 0) && i >= N)) EASTL_FAIL_MSG("array::operator[] -- out of range"); #endif return mValue[i]; } template EA_CPP14_CONSTEXPR inline typename array::reference array::front() { return mValue[0]; } template EA_CPP14_CONSTEXPR inline typename array::const_reference array::front() const { return mValue[0]; } template EA_CPP14_CONSTEXPR inline typename array::reference array::back() { return mValue[N - 1]; } template EA_CPP14_CONSTEXPR inline typename array::const_reference array::back() const { return mValue[N - 1]; } template EA_CPP14_CONSTEXPR inline T* array::data() EA_NOEXCEPT { return mValue; } template EA_CPP14_CONSTEXPR inline const T* array::data() const EA_NOEXCEPT { return mValue; } template EA_CPP14_CONSTEXPR inline typename array::const_reference array::at(size_type i) const { #if EASTL_EXCEPTIONS_ENABLED if(EASTL_UNLIKELY(i >= N)) throw std::out_of_range("array::at -- out of range"); #elif EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(i >= N)) EASTL_FAIL_MSG("array::at -- out of range"); #endif return static_cast(mValue[i]); } template EA_CPP14_CONSTEXPR inline typename array::reference array::at(size_type i) { #if EASTL_EXCEPTIONS_ENABLED if(EASTL_UNLIKELY(i >= N)) throw std::out_of_range("array::at -- out of range"); #elif EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(i >= N)) EASTL_FAIL_MSG("array::at -- out of range"); #endif return static_cast(mValue[i]); } template inline bool array::validate() const { return true; // There is nothing to do. } template inline int array::validate_iterator(const_iterator i) const { if(i >= mValue) { if(i < (mValue + N)) return (isf_valid | isf_current | isf_can_dereference); if(i <= (mValue + N)) return (isf_valid | isf_current); } return isf_none; } /////////////////////////////////////////////////////////////////////// // global operators /////////////////////////////////////////////////////////////////////// template EA_CPP14_CONSTEXPR inline bool operator==(const array& a, const array& b) { return eastl::equal(&a.mValue[0], &a.mValue[N], &b.mValue[0]); } #if defined(EA_COMPILER_HAS_THREE_WAY_COMPARISON) template inline synth_three_way_result operator<=>(const array& a, const array& b) { return eastl::lexicographical_compare_three_way(&a.mValue[0], &a.mValue[N], &b.mValue[0], &b.mValue[N], synth_three_way{}); } #else template EA_CPP14_CONSTEXPR inline bool operator<(const array& a, const array& b) { return eastl::lexicographical_compare(&a.mValue[0], &a.mValue[N], &b.mValue[0], &b.mValue[N]); } template EA_CPP14_CONSTEXPR inline bool operator!=(const array& a, const array& b) { return !eastl::equal(&a.mValue[0], &a.mValue[N], &b.mValue[0]); } template EA_CPP14_CONSTEXPR inline bool operator>(const array& a, const array& b) { return eastl::lexicographical_compare(&b.mValue[0], &b.mValue[N], &a.mValue[0], &a.mValue[N]); } template EA_CPP14_CONSTEXPR inline bool operator<=(const array& a, const array& b) { return !eastl::lexicographical_compare(&b.mValue[0], &b.mValue[N], &a.mValue[0], &a.mValue[N]); } template EA_CPP14_CONSTEXPR inline bool operator>=(const array& a, const array& b) { return !eastl::lexicographical_compare(&a.mValue[0], &a.mValue[N], &b.mValue[0], &b.mValue[N]); } #endif /////////////////////////////////////////////////////////////////////// // non-member functions /////////////////////////////////////////////////////////////////////// template EA_NODISCARD EA_CONSTEXPR T& get(array& value) EA_NOEXCEPT { static_assert(I < N, "array index out of bounds"); return value.mValue[I]; } template EA_NODISCARD EA_CONSTEXPR T&& get(array&& value) EA_NOEXCEPT { static_assert(I < N, "array index out of bounds"); return move(value.mValue[I]); } template EA_NODISCARD EA_CONSTEXPR const T& get(const array& value) EA_NOEXCEPT { static_assert(I < N, "array index out of bounds"); return value.mValue[I]; } template EA_NODISCARD EA_CONSTEXPR const T&& get(const array&& value) EA_NOEXCEPT { static_assert(I < N, "array index out of bounds"); return move(value.mValue[I]); } template inline void swap(array& a, array& b) { eastl::swap_ranges(&a.mValue[0], &a.mValue[N], &b.mValue[0]); } /////////////////////////////////////////////////////////////////////// // to_array /////////////////////////////////////////////////////////////////////// namespace internal { template EA_CONSTEXPR auto to_array(T (&a)[N], index_sequence) { return eastl::array, N>{{a[I]...}}; } template EA_CONSTEXPR auto to_array(T (&&a)[N], index_sequence) { return eastl::array, N>{{eastl::move(a[I])...}}; } } template EA_CONSTEXPR eastl::array, N> to_array(T (&a)[N]) { static_assert(eastl::is_constructible_v, "element type T must be copy-initializable"); static_assert(!eastl::is_array_v, "passing multidimensional arrays to to_array is ill-formed"); return internal::to_array(a, eastl::make_index_sequence{}); } template EA_CONSTEXPR eastl::array, N> to_array(T (&&a)[N]) { static_assert(eastl::is_move_constructible_v, "element type T must be move-constructible"); static_assert(!eastl::is_array_v, "passing multidimensional arrays to to_array is ill-formed"); return internal::to_array(eastl::move(a), eastl::make_index_sequence{}); } #if EASTL_TUPLE_ENABLED /////////////////////////////////////////////////////////////////////// // helper classes /////////////////////////////////////////////////////////////////////// template struct tuple_size> : public eastl::integral_constant {}; namespace internal { template struct tuple_element {}; template struct tuple_element> { using type = T; }; } template struct tuple_element> : internal::tuple_element {}; #endif // EASTL_TUPLE_ENABLED } // namespace eastl /////////////////////////////////////////////////////////////////////// // C++17 structured bindings support for eastl::array /////////////////////////////////////////////////////////////////////// #ifndef EA_COMPILER_NO_STRUCTURED_BINDING // we can't forward declare tuple_size and tuple_element because some std implementations // don't declare it in the std namespace, but instead alias it. #include namespace std { template struct tuple_size> : public eastl::integral_constant {}; template struct tuple_element> : public eastl::tuple_element> {}; } #endif EA_RESTORE_VC_WARNING(); #endif // Header include guard ================================================ FILE: include/EASTL/atomic.h ================================================ ///////////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_ATOMIC_H #define EASTL_ATOMIC_H #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once #endif ///////////////////////////////////////////////////////////////////////////////// // // Below is the documentation of the API of the eastl::atomic library. // This includes class and free functions. // Anything marked with a '+' in front of the name is an extension to the std API. // ///////////////////////////////////////////////////////////////////////////////// // // eastl::atomic memory_order API // // See below for full explanations on the memory orders and their guarantees. // // - eastl::memory_order_relaxed // - eastl::memory_order_acquire // - eastl::memory_order_release // - eastl::memory_order_acq_rel // - eastl::memory_order_seq_cst // - +eastl::memory_order_read_depends // ///////////////////////////////////////////////////////////////////////////////// // // eastl::atomic class API // // All jargon and prerequisite knowledge is explained below. // // Unless otherwise specified all orders except read_depends is a valid order // on the given operation. // Unless otherwise specified all operations are valid on all types T. // If no order is provided, seq_cst memory ordering is used for the operation. // // - atomic() : Value-initializes the underlying object as T{}. // // - atomic(T) : Initializes the underlying object with a copy of T. // // - T operator=(T) : Atomically assigns T as store(T, seq_cst). // // - is_lock_free() : true if the operations are lockfree. Always true for eastl. // // - store(T, order) : Atomically stores T affecting memory according to order. // : Valid orders are relaxed, release, and seq_cst. // // - T load(order) : Atomically loads T affecting memory according to order. // : Valid orders are relaxed, acquire, and seq_cst. // : If T is a pointer type, read_depends is another valid order. // // - operator T() : Atomically loads T as load(T, seq_cst). // // - T exchange(T, order) : Atomically performs a RMW that replaces the current value with T. // : Memory is affected according to order. // : Returns the previous value stored before the RMW operation. // // - bool compare_exchange_weak(T&, T, successOrder, failOrder) // : Atomically compares the value stored with that of T& and if equal replaces it with T. // : This is a RMW operation. // : If the comparison fails, loads the observed value into T&. This is a load operation. // : Memory is affected in the RMW operation according to successOrder. // : Memory is affected in the load operation according to failOrder. // : failOrder cannot be a stronger order than successOrder. // : Returns true or false if the comparison succeeded and T was stored into the atomic object. // : // : The weak variant may fail even if the observed value of the atomic object equals T&. // : This can yield performance gains on platforms with ld/str exclusive pair instructions especially // : when the compare_exchange operation is done in a loop. // : Only the bool return value can be used to determine if the operation was successful. // // - bool compare_exchange_weak(T&, T, order) // : Same as the above except that order is used for both the RMW and the load operation. // : If order == acq_rel then the order of the load operation equals acquire. // : If order == release then the order of the load operation equals relaxed. // // - bool compare_exchange_strong(T&, T, successOrder, failOrder) // - bool compare_exchange_strong(T&, T, order) // : This operation is the same as the above weak variants // : expect that it will not fail spuriously if the value stored equals T&. // // The below operations are only valid for Integral types. // // - T fetch_add(T, order) // : Atomically performs a RMW that increments the value stored with T. // : Returns the previous value stored before the RMW operation. // - T fetch_sub(T, order) // : Atomically performs a RMW that decrements the value stored with T. // : Returns the previous value stored before the RMW operation. // - T fetch_and(T, order) // : Atomically performs a RMW that bit-wise and's the value stored with T. // : Returns the previous value stored before the RMW operation. // - T fetch_or(T, order) // : Atomically performs a RMW that bit-wise or's the value stored with T. // : Returns the previous value stored before the RMW operation. // - T fetch_xor(T, order) // : Atomically performs a RMW that bit-wise xor's the value stored with T. // : Returns the previous value stored before the RMW operation. // // - +T add_fetch(T, order) // : Atomically performs a RMW that increments the value stored with T. // : Returns the new updated value after the operation. // - +T sub_fetch(T, order) // : Atomically performs a RMW that decrements the value stored with T. // : Returns the new updated value after the operation. // - +T and_fetch(T, order) // : Atomically performs a RMW that bit-wise and's the value stored with T. // : Returns the new updated value after the operation. // - +T or_fetch(T, order) // : Atomically performs a RMW that bit-wise or's the value stored with T. // : Returns the new updated value after the operation. // - +T xor_fetch(T, order) // : Atomically performs a RMW that bit-wise xor's the value stored with T. // : Returns the new updated value after the operation. // // - T operator++/--() // : Atomically increments or decrements the atomic value by one. // : Returns the previous value stored before the RMW operation. // : Memory is affected according to seq_cst ordering. // // - T ++/--operator() // : Atomically increments or decrements the atomic value by one. // : Returns the new updated value after the RMW operation. // : Memory is affected according to seq_cst ordering. // // - T operator+=/-=/&=/|=/^=(T) // : Atomically adds, subtracts, bitwise and/or/xor the atomic object with T. // : Returns the new updated value after the operation. // : Memory is affected according to seq_cst ordering. // // // The below operations are only valid for Pointer types // // - T* fetch_add(ptrdiff_t val, order) // : Atomically performs a RMW that increments the value store with sizeof(T) * val // : Returns the previous value stored before the RMW operation. // - T* fetch_sub(ptrdiff_t val, order) // : Atomically performs a RMW that decrements the value store with sizeof(T) * val // : Returns the previous value stored before the RMW operation. // // - +T* add_fetch(ptrdiff_t val, order) // : Atomically performs a RMW that increments the value store with sizeof(T) * val // : Returns the new updated value after the operation. // - +T* sub_fetch(ptrdiff_t val, order) // : Atomically performs a RMW that decrements the value store with sizeof(T) * val // : Returns the new updated value after the operation. // // - T* operator++/--() // : Atomically increments or decrements the atomic value by sizeof(T) * 1. // : Returns the previous value stored before the RMW operation. // : Memory is affected according to seq_cst ordering. // // - T* ++/--operator() // : Atomically increments or decrements the atomic value by sizeof(T) * 1. // : Returns the new updated value after the RMW operation. // : Memory is affected according to seq_cst ordering. // // // - +EASTL_ATOMIC_HAS_[len]BIT Macro Definitions // These macros provide the ability to compile-time switch on the availability of support for the specific // bit width of an atomic object. // Example: // // #if defined(EASTL_ATOMIC_HAS_128BIT) // #endif // // Indicates the support for 128-bit atomic operations on an eastl::atomic object. // ///////////////////////////////////////////////////////////////////////////////// // // eastl::atomic_flag class API // // Unless otherwise specified all orders except read_depends is a valid order // on the given operation. // // - atomic_flag() : Initializes the flag to false. // // - clear(order) // : Atomically stores the value false to the flag. // : Valid orders are relaxed, release, and seq_cst. // // - bool test_and_set(order) // : Atomically exchanges flag with true and returns the previous value that was held. // // - bool test(order) // : Atomically loads the flag value. // : Valid orders are relaxed, acquire, and seq_cst. // ///////////////////////////////////////////////////////////////////////////////// // // eastl::atomic standalone free function API // // All class methods have a standalone free function that takes a pointer to the // atomic object as the first argument. These functions just call the correct method // on the atomic object for the given operation. // These functions come in two variants, a non-explicit and an explicit variant // that take on the form atomic_op() and atomic_op_explicit() respectively. // The non-explicit variants take no order arguments and thus are all seq_cst. // The explicit variants take an order argument. // Only the standalone functions that do not have a class method equivalent pair will be // documented here which includes all new extensions to the std API. // // - +compiler_barrier() // : Read-Write Compiler Barrier. // - +compiler_barrier_data_dependency(const T&) // : Read-Write Compiler Barrier. // : Applies a fake input dependency on const T& so the compiler believes said variable is used. // : Useful for example when writing benchmark or testing code with local variables that must not get dead-store eliminated. // - +cpu_pause() // : Prevents speculative memory order violations in spin-wait loops. // : Allows giving up core resources, execution units, to other threads while in spin-wait loops. // - atomic_thread_fence(order) // : Read docs below. // - atomic_signal_fence(order) // : Prevents reordering with a signal handler. // - +atomic_load_cond(const eastl::atomic*, Predicate) // : continuously loads the atomic object until Predicate is true // : will properly ensure the spin-wait loop is optimal // : very useful when needing to spin-wait for some condition to be true which is common is many lock-free algorithms // : Memory is affected according to seq_cst ordering. // - +atomic_load_cond_explicit(const eastl::atomic*, Predicate, Order) // : Same as above but takes an order for how memory is affected // ///////////////////////////////////////////////////////////////////////////////// // // Deviations from the standard. This does not include new features added: // // 1. // Description: Atomics are always lock free // Reasoning : We don't want people to fall into performance traps where implicit locking // is done. If your user defined type is large enough to not support atomic // instructions then your user code should do the locking. // // 2. // Description: Atomic objects can not be volatile // Reasoning : Volatile objects do not make sense in the context of eastl::atomic. // Use the given memory orders to get the ordering you need. // Atomic objects have to become visible on the bus. See below for details. // // 3. // Description: Consume memory order is not supported // Reasoning : See below for the reasoning. // // 4. // Description: ATOMIC_INIT() macros and the ATOMIC_LOCK_FREE macros are not implemented // Reasoning : Use the is_lock_free() method instead of the macros. // ATOMIC_INIT() macros aren't needed since the default constructor value initializes. // // 5. // Description: compare_exchange failure memory order cannot be stronger than success memory order // Reasoning : Besides the argument that it ideologically does not make sense that a failure // of the atomic operation shouldn't have a stricter ordering guarantee than the // success of it; if that is required then just make the whole operation stronger. // This ability was added and allowed in C++17 only which makes supporting multiple // C++ versions harder when using the compiler provided intrinsics since their behaviour // is reliant on the C++ version being compiled. Also makes it harder to reason about code // using these atomic ops since C++ versions vary the behaviour. We have also noticed // that versions of compilers that say they support C++17 do not properly adhere to this // new requirement in their intrinsics. Thus we will not support this. // // 6. // Description: All memory orders are distinct types instead of enum values // Reasoning : This will not affect how the API is used in user code. // It allows us to statically assert on invalid memory orders since they are compile-time types // instead of potentially runtime enum values. // Allows for more efficient code gen without the use of switch statements or if-else conditionals // on the memory order enum values on compilers that do not provide intrinsics that take in a // memory order, such as MSVC, especially in debug and debug-opt builds. // ///////////////////////////////////////////////////////////////////////////////// // // ******** DISCLAIMER ******** // // This documentation is not meant to provide rigorous proofs on the memory models // of specific architectures or the C++ memory model introduced in C++11. It is not // meant to provide formal mathematical definitions and logic that shows that a given // implementation adheres to the C++ memory model. This isn't meant to be some infallible // oracle on memory models, barriers, observers, and architecture implementation details. // What I do hope a reader gets out of this is the following. An understanding of the C++ // memory model and how that relates to implementations on various architectures. Various // phenomena and ways that compilers and architectures can steer away from a sequentially // consistent system. To provide examples on how to use this library with common patterns // that will be seen in many code bases. Lastly I would like to provide insight and // further readings into the lesser known topics that aren't shared outside people // who live in this space and why certain things are done the way they are // such as cumulativity of memory barriers as one example. Sometimes specifying barriers // as LDLD/LDST/STST/STLD doesn't actually cut it, and finer grain semantics are needed // to describe cumulativity of memory barriers. // // ******** Layout of the Documentation ******** // // This document will first go through a variety of different hardware architectures with examples of the various kinds of // reordering that is allowed by these architectures. We will use the memory barriers provided by the hardware to "fix" these // examples. // Then we will introduce the C++ memory model and revisit the examples using the platform agnostic abstract memory model to "fix" // them. // The hope here is that we get a sense of the various types of architectures and weak memory consistency provided by them and thus // an appreciation for the design of the C++ abstract memory model. // // ******** REFERENCES ******** // [1] Dekker's mutual exclusion algorithm made RW-safe // [2] Handling Memory Ordering in Multithreaded Applications with Oracle Solaris // [3] Evaluating the Cost of Atomic Operations on Modern Architectures // [4] A Tutorial Introduction to the ARM and POWER Relaxed Memory Models // [5] Memory Barriers: a Hardware View for Software Hackers // [6] Memory Model = Instruction Reordering + Store Atomicity // [7] ArMOR: Defending Against Memory Consistency Model Mismatches in Heterogeneous Architectures // [8] Weak Memory Models: Balancing Definitional Simplicity and Implementation Flexibility // [9] Repairing Sequential Consistency in C/C++11 // [10] A high-level operational semantics for hardware weak memory models // [11] x86-TSO: A Rigorous and Usable Programmer's Model for x86 Multiprocessors // [12] Simplifying ARM Concurrency: Multicopy-Atomic Axiomatic and Operational Models for ARMv8 // [13] Mixed-size Concurrency: ARM, POWER, C/C++11, and SC // [14] P0668R4: Revising the C++ memory model // [15] Constructing a Weak Memory Model // [16] The Superfluous Load Queue // [17] P0190R1: Proposal for New memory_order_consume Definition // // ******** What does it mean to be Atomic? ******** // // The word atomic has been overloaded and can mean a lot of different things depending on the context, // so let's digest it. // // The first attribute for something to be atomic is that concurrent stores and loads // must not tear or shear. This means if two threads write 0x01 and 0x02 at the same time // then the only values that should ever be observed is 0x01 or 0x02. We can only see // the whole write of 0x01 or 0x02, not 0x03 as an example. Many algorithms rely on // this property; only very few such a Dekker's algorithm for mutual exclusion don't. // Well actually a recent paper, [1], showed that Dekker's isn't safe without atomic // loads and stores so this property is pretty fundamental and also hard to prove that // your algorithm is safe without this property on loads and stores. // // We need to ensure the compiler emits a single load instruction. // If we are doing 64-bit loads on a 32-bit platform, we need to ensure the load is one // instruction instead of 2 32-bit loads into two registers. // Another example is if we have this struct, struct { int32_t i; int32_t k; }, even on // a 64-bit system we have to ensure the compiler does one 64-bit load and not two // 32-bit loads for each individual member. // // We also need to ensure the correct instruction is emitted. A general load instruction // to do a 64-bit load on a 32-bit platform may perform a 64-bit load but it may not // be atomic, it may be turned into two 32-bit loads behind the scenes in the cpu. // For example on ARMv7 we would have to use ldrexd not ldrd for 64-bit loads // on a 32-bit ARMv7 core. // // An operation may be considered atomic if multiple sub-operations are done as one // transactional unit. This is commonly known as a Read-Modify-Write, RMW, operation. // Take a simple add operation; it is actually a load from memory into a register, // a modification of said register and then a store back to memory. If two threads // concurrently execute this add operation on the same memory location; any interleaving // of the 3 sub-operations is possible. It is possible that if the initial value is 0, // the result may be 1 because each thread executed in lockstep both loading 0, adding 1 // and then storing 1. A RMW operation may be considered atomic if the whole sequence of // sub-operations are serialized as one transactional unit. // // Atomicity may also refer to the order in which memory operations are observed and the // dependencies between memory operations to different memory locations. As a quick example // into the very thing we will be deep diving into that is not very intuitive. If I do, [STORE(A, 2); STORE(B, 1);], // in one thread and another thread does, [r0 = LOAD(B); r1 = LOAD(A);]; if r0 == 1, thus we observed // the store to B, will we observe r1 == 2. Our intuition tells us that well A was stored // first and then B, so if I read the new value of B then I must also read the new value // of A since the store to A happened before B so if I can see B then I must be able to // see everything before B which includes A. // This highlights the ordering of memory operations and why memory barriers and memory // models are so heavily attached to atomic operations because one could classify something // is atomic if the dependency highlighted in the above example is allowed to be maintained. // // This is what people mean when you hear that volatile does NOT mean atomicity of the operation. // Usually people imply a lot of implicit assumptions when they mark a variable as volatile. // All volatile gives us is the ability to tell the compiler it may not assume anything // about the state of that memory location. This means the compiler must always emit a load // or store instruction, cannot perform constant folding, dead-store elimination, or // do any sort of code movement on volatile variables. // // ******** Preliminary Basics ******** // // It is expected that the reader understands what a cache is, how it is organized and how data // is chunked into cachelines. It is helpful if the reader understands basic cache coherency // protocols such as MSI or MESI. // It is expected the reader understands alignment, especially natural alignment // of the processor and why alignment is important for data access. // The reader should have some understanding of how a processor executes instructions, // basics of what Out-of-Order execution means and basics of what speculative execution means. // It is expected that the reader has an understanding of threading, multi-threaded programming // and the use of concurrency primitives such as mutexes. // Memory Barrier, Barrier, Memory Fence and Fence are all interchangeable synonyms. // // Independent memory operations can be performed or observed, depending on your perspective, // in any order as long as the local cpu thinks its execution is happening in program order. // This can be a problem for inter-cpu communications and thus we need some way to enforce // that the compiler does not reorder instructions and that the cpu also does not reorder // instructions. This is what a barrier is, it is an enforcement of ordering on memory instructions, // so as the name suggests a barrier. Barriers can be one-sided or both-sided which means // the barrier enforces a partial order above or below or on both sides of said barrier. // // Processors will use tricks such as out-of-order execution, memory instruction buffering and // combining, speculative loads and speculative execution, branch prediction and many types of caching even // in various interconnects from the cpu to the memory itself. One key thing to note is that cpus // do not physically reorder the instruction stream. Instructions are dispatched and retired // in-order but executed out-of-order. Memory barriers will prevent these tricks from happening // by controlling the interaction of multiple cpus. // // Compilers will morph your code and physically move instructions around as long as the program // has the same observed behaviour. This is becoming increasingly true with more optimization techniques // such as Link Time Optimization becoming the norm where once people assumed compilers couldn't assume // something outside the given TU and now because they have the whole program view they know everything. // This means the compiler does indeed alter the instruction stream // and compiler barriers are a way to tell them to not move any memory instructions across the barrier. // This does not prevent a compiler from doing optimizations such as constant folding, merging of // overlapping loads, or even dead store elimination. Compiler barriers are also very cheap and // have zero impact on anything that the compiler knows isn't visible in memory such as local variables // whose addresses do not escape the function even if their address is taken. You can think of it // in terms of a sequence point as used with "volatile" qualified variables to denote a place in code where // things must be stable and the compiler doesn't cache any variables in registers or do any reordering. // // Memory Barriers come in many flavours that instill a partial or full ordering on memory operations. // Some memory operations themselves have implicit ordering guarantees already, for example // Total-Store Order, TSO, architectures like x86 guarantee that a store operation cannot be reordered with a // previous store operation thus a memory barrier that only orders stores is not needed // on this architecture other than ensuring the compiler doesn't do any shenanigans. // Considering we have 4 permutations of memory operations; a common way to describe an ordering // is via Load-Load/LDLD, Load-Store/LDST, Store-Store/STST or Store-Load/STLD notation. You read this // notation as follows; STLD memory barrier means a load cannot be reordered with a previous store. // For example, on TSO architecture we can say all stores provide a STST memory barrier, // since a store cannot be reordered with a previous store. // // Memory Barriers in itself are not a magic bullet, they come with caveats that must be known. // Each cpu architecture also has its own flavours and guarantees provided by said memory barriers. // There is no guarantee that memory instructions specified before a memory barrier will complete, // be written to memory or fully propagated throughout the rest of the system, when the memory barrier // instruction completes. The memory barrier creates a point in that local cpus queue of memory instructions // whereby they must not cross. There is no guarantee that using a memory barrier on one cpu will have // any effect at all on another remote cpu's observed view of memory. This also implies that executing // a memory barrier does not hinder, incur, stall or enforce any other cpus to serialize with each other cpu. // In order for a remote cpu to observe the correct effects it must also use a matching memory barrier. // This means code communicating in 2 threads through memory must both be employing the use of memory barriers. // For example, a store memory barrier that only orders stores, STST, in one thread must be paired with a load memory barrier // that only orders loads, LDLD, in the other thread trying to observe those stores in the correct order. // // ******** Memory Types && Devices ******** // // eastl::atomic and accompanying memory barriers ONLY ORDER MEMORY to cpu-to-cpu communication through whatever the // processor designates as normal cacheable memory. It does not order memory to devices. It does not provide any DMA ordering guarantees. // It does not order memory with other memory types such as Write Combining. It strictly orders memory only to shared memory that is used // to communicate between cpus only. // // ******** Sequentially Consistent Machine ******** // // The most intuitive as well as the model people naturally expect a concurrent system to have is Sequential Consistency. // You may have or definitely have heard this term if you dealt with any type of distributed system. Lamport's definition // articulates this consistency model the best. // Leslie Lamport: "the result of any execution is the same as if the operations of all the processors were executed in some // sequential order, and the operations of each individual processor appear in this sequence in the order // specified by its program". // // A Sequentially Consistent machine is modelled as follows: // // ------------ ------------ // | Thread 0 | ... | Thread N | // ------------ ------------ // | | | | // | | | | // ---------------------------------------- // | | // | Shared Memory | // | | // ---------------------------------------- // // This is a sequentially consistent machine. Each thread is executing instructions in program order which does loads and stores // that are serialized in some order to the shared memory. This means all communication is done through the shared memory with one cpu // doing one access at a time. This system has a couple key properties. // // 1. There is no local cpu memory reordering. Each cpu executes instructions in program order and all loads and stores must complete, // be visible in the shared memory or be visible in a register before starting the next instruction. // 2. Each memory operation becomes visible to all cpus at the same time. If a store hits the shared memory, then all subsequent loads // from every other cpu will always see the latest store. // // A Sequentially Consistent machine has, Single-Copy Store Atomicity: All stores must become visible to all cores in the system at the same time. // // ******** Adding Caches ******** // // Caches by nature implicitly add the potential for memory reordering. A centralized shared snoopy bus that we all learned in school // makes it easy to implement sequential consistency with caches. Writes and reads are all serialized in a total order via the cache bus transaction // ordering. Every modern day bus is not inorder, and most certainly not a shared centralized bus. Cache coherency guarantees that all memory operations // will be propagated eventually to all parties, but it doesn't guarantee in what order or in what time frame. Once you add // caches, various levels of caching and various interconnects between remote cpus, you inevitably run into the issue where // some cpus observe the effects of a store before other cpus. Obviously we have weakly-ordered and strongly-ordered cpus with // caches so why is that? The short answer is, where is the onus put, is it on the programmer or the hardware. Does the hardware // have dependency tracking, is it able to determine when a memory order violation occurs such as rolling back its speculative execution // and also how far along the chain of interconnects does the hardware wait before it determines that the memory operation has // been acknowledged or is considered to satisfy its memory ordering guarantees. Again this is a very high level view of the system // as a whole, but the takeaway is yes; caches do add the potential for reordering but other supporting hardware determines whether // that is observable by the programmer. There is also some debate whether weakly-ordered processors are actually more performant // than strongly-ordered cpus eluding to the fact that the hardware has a better picture of what is a violation versus the programmer // having to emit far more barriers on weakly-ordered architectures in multi-threaded code which may actually not be needed because the // hardware didn't commit a violation but it may have and we as the programmer cannot rely on may haves. // // ******** Store Buffers ******** // // Obviously having all stores serialize results in unnecessary stalls. Store buffers alleviate this issue. // Store buffers are simple fixed size structures that sit between the cpu and the memory hierarchy. This allows // each cpu to record its write in the store buffer and then move onto the next instruction. The store buffer will // eventually be flushed to the resulting memory hierarchy in FIFO order. How and when this flushing occurs is irrelevant to the // understanding of a store buffer. A read from an address will grab the most recent write to the same address in the store buffer. // // The introduction of a store buffer is our first dive into weaker memory consistency. The addition of this hardware turns the consistency model weaker, // into one that is commonly known as TSO, Total-Store Order. This is the exact model used by x86 cpus and we will see what this means // and what new effects are observed with the addition of the store buffer. Below is a diagram of how the machine may now look. // This type of store buffer is known as a FIFO store buffer, FIFO write buffer, or Load/Store Queue in some literature. This type of // store buffer introduces STLD reordering but still prevents STST reordering. We will take a look at another type of store buffer later. // Even with this store buffer, stores to the same address can still be merged so that only the latest store is written to the cache assuming // no other intermediary stores happen. x86 cpus do write merging even for consecutive stores, i.e. storing to A and A+1 can be merged into one two-byte store. // // ------------ ------------ // | Thread 0 | ... | Thread N | // ------------ ------------ // | | | | // | | | | // | Store | | Store | // | Buffer | | Buffer | // | | | | // ---------------------------------------- // | | // | Shared Memory | // | | // ---------------------------------------- // // ---- Store-Buffering / Dekker's Example ---- // This is a very common litmus test that showcases the introduction of STLD reordering. It is called Store-Buffering example because it is the only weaker // behaviour observed under TSO and also called Dekker's Example as it famously breaks Dekker's mutual exclusion algorithm. // // --------------------------- // Initial State: // x = 0; y = 0; // --------------------------- // Thread 0 | Thread 1 // --------------------------- // STORE(x, 1) | STORE(y, 1) // r0 = LOAD(y) | r1 = LOAD(x) // --------------------------- // Observed: r0 = 0 && r1 = 0 // --------------------------- // // We would normally assume that any interleaving of the two threads cannot possibly end up with both loads reading 0. We assume that the observed outcome // of r0 = 0 && r1 = 0 to be impossible, clearly that is not the case. Let's start by understanding the example with no reordering possible. Both threads // run and their first instruction is to write the value 1 into either x or y, the next instruction then loads from the opposite variable. This means no // matter the interleaving, one of the loads always executes after the other thread's store to that variable. // We could observe r0 = 1 && r1 = 1 if both threads execute in lockstep. // We could observe r0 = 0 && r1 = 1 if thread 0 executes and then thread 1 executes. // We could observe r0 = 1 && r1 = 0 if thread 1 executes and then thread 0 executes. // Since the stores always execute before that load in the other thread, one thread must always at least observe a store, so let's see why store buffers break this. // // What will happen is that STORE(x, 1) is stored to the store buffer but not made globally visible yet. // STORE(y, 1) is written to the store buffer and also is not made globally visible yet. // Both loads now read the initial state of x and y which is 0. We got the r0 = 0 && r1 = 0 outcome and just observed a Store-Load reordering. // It has appeared as if the loads have been reordered with the previous stores and thus executed before the stores. // Notice even if we execute the instructions in order, a series of other hardware side effects made it appear as if the instructions have been reordered. // We can solve this by placing a Store-Load barrier after the store and before the load as follows. // // --------------------------- // Thread 0 | Thread 1 // --------------------------- // STORE(x, 1) | STORE(y, 1) // STLD BARRIER | STLD BARRIER // r0 = LOAD(y) | r1 = LOAD(x) // --------------------------- // // This STLD barrier effectively will flush the store buffer into the memory hierarchy ensuring all stores in the buffer are visible to all other cpus at the same time // before executing the load instruction. Again nothing prevents a potential hardware from speculatively executing the load even with the STLD barrier, the hardware will have to do // a proper rollback if it detected a memory order violation otherwise it can continue on with its speculative load. The barrier just delimits a stability point. // // Most hardware does not provide granular barrier semantics such as STLD. Most provide a write memory barrier which only orders stores, STST, a read memory barrier // which only orders loads, LDLD, and then a full memory barrier which is all 4 permutations. So on x86 we will have to use the mfence, memory fence, instruction // which is a full memory barrier to get our desired STLD requirements. // // TSO also has the property that we call, Multi-Copy Store Atomicity. This means a cpu sees its own stores before they become visible to other cpus, // by forwarding them from the store buffer, but a store becomes visible to all other cpus at the same time when flushed from the store buffer. // // // Let's look at a non-FIFO store buffer now as seen in ARM cpus as an example and we will use a standard Message Passing example to see how it manifests in even weaker consistency. // A store buffer on ARM as an example allows write merging even with adjacent stores, is not a FIFO queue, any stores in the small hardware hash table may be ejected at any point // due to a collision eviction or the availability of cachelines in the cache hierarchy meaning that stores may bypass the buffer entirely if that cacheline is already owned by that cpu. // There is no guarantee that stores will be completed in order as in the FIFO case. // // --------------------------- // Initial State: // x = 0; y = 0; // --------------------------- // Thread 0 | Thread 1 // --------------------------- // STORE(x, 1) | while(LOAD(y) == 0); // STORE(y, 1) | r0 = LOAD(x) // --------------------------- // Observed: r0 = 0 // --------------------------- // // This is a classic Message Passing example that is very commonly used in production code. We store some values and then set a flag, STORE(y, 1) in this case. // The other thread waits until the flag is observed and then reads the value out of x. If we observed the flag then we should obviously see all stores before the flag was set. // Given our familiarity with TSO consistency above we know this definitely works on TSO and it is impossible to observe the load of x returning 0 under that consistency model. // Let's see how this breaks with a non-FIFO store buffer. // // Thread 0 executes the STORE(x, 1) but the cacheline for x is not in thread 0's cache so we write to the store buffer and wait for the cacheline. // Thread 1 executes the LOAD(y) and it also does not have y in its cacheline so it waits before completing the load. // Thread 0 moves on to STORE(y, 1). It owns this cacheline, hypothetically, so it may bypass the store buffer and store directly to the cache. // Thread 0 receives a message that Thread 1 needs y's cacheline, so it transfers the now modified cacheline to Thread 1. // Thread 1 completes the load with the updated value of y = 1 and branches out of the while loop since we saw the new value of y. // Thread 1 executes LOAD(x) which will return 0 since Thread 0 still hasn't flushed its store buffer waiting for x's cacheline. // Thread 0 receives x's cacheline and now flushes x = 1 to the cache. Thread 1 will also have invalidated its cacheline for x that it brought in via the previous load. // // We have now fallen victim to STST reordering, allowing Thread 1 to observe a load of x returning 0. Not only does this store buffer allow STLD reordering due to the nature of // buffering stores, but it also allows another reordering; that of Store-Store reordering. It was observed as if Thread 0 executed STORE(y, 1) before STORE(x, 1) which completely // broke our simple message passing scenario. // // --------------------------- // Thread 0 | Thread 1 // --------------------------- // STORE(x, 1) | while(LOAD(y) == 0); // STST BARRIER | // STORE(y, 1) | r0 = LOAD(x) // --------------------------- // // The STST memory barrier effectively ensures that the cpu will flush its store buffer before executing any subsequent stores. That is not entirely true, the cpu is still allowed // to continue and execute stores to the store buffer as long as it doesn't flush them to the cache before the previous stores are flushed to the cache. If nothing becomes // globally visible out of order then we are good. // The example above will change how the processor executes due to the STST memory barrier. Thread 0 will execute STORE(y, 1), write to the store buffer and mark all current entries. Even though it owns the cacheline // it cannot write the store to the cache until all marked entries, which are all the previous stores, are flushed to the cache. We have now fixed the message passing code by adding // a STST or write memory barrier and thus it is no longer possible to observe the load of x returning 0. // // ******** Invalidation Queues ******** // // Due to the cache coherency protocol in play, a write to a cacheline will have to send invalidation messages to all other cpus that may have that cacheline as well. // Immediately executing and responding to invalidation messages can cause quite a stall especially if the cache is busy at the moment with other requests. // The longer we wait to invalidate the cacheline, the longer the remote cpu doing the write is stalled waiting on us. We don't like this very much. // Invalidation Queues are just that, we queue up the action of actually invalidating the cacheline but immediately respond to the request saying we did it anyway. // Now the remote cpu thinks we invalidated said cacheline but actually it may very well still be in our cache ready to be read from. We just got weaker again, let's // see how this manifests in code by starting from the end of our previous example. // // --------------------------- // Initial State: // x = 0; y = 0; // --------------------------- // Thread 0 | Thread 1 // --------------------------- // STORE(x, 1) | while(LOAD(y) == 0); // STST BARRIER | // STORE(y, 1) | r0 = LOAD(x) // --------------------------- // Observed: r0 = 0 // --------------------------- // // Thread 1 receives the invalidate x's cacheline message and queues it because it is busy. // Thread 1 receives the invalidate y's cacheline message, but we don't have that cacheline so acknowledge immediately. // Thread 1 executes LOAD(y), loads in y's cacheline and branches out of the loop. // Thread 1 executes LOAD(x), and loads from the cache the old value of x because the invalidation message is still sitting in the invalidation queue. // // We have just again observed the load of x returning 0 but from a different type of reordering now on the reader side. // This is a form of LDLD, Load-Load, reordering as it appears as if LOAD(x) was executed before LOAD(y). This can be fixed as follows. // // --------------------------- // Thread 0 | Thread 1 // --------------------------- // STORE(x, 1) | while(LOAD(y) == 0); // STST BARRIER | LDLD BARRIER // STORE(y, 1) | r0 = LOAD(x) // --------------------------- // // The LDLD memory barrier essentially marks all entries currently in the invalidation queue. Any subsequent load must wait until all the marked entries have been // processed. This ensures once we observe y = 1, we process all entries that came before y and that way we observe all the stores that happened before y. // The insertion of the read memory barrier creates the required memory barrier pairing as discussed above and ensures that now our code executes as expected. // // It must be made clear that these are not the only hardware structure additions or ways that can relax STST, STLD and LDLD orderings. These are merely // 2 structures that are common and ones that I choose to use as examples of how hardware can reduce ordering guarantees. Knowing how the hardware does this // isn't always entirely clear but having a model that tells us what operations can be reordered is all we need to be able to reason about our code when executing on that hardware. // // ******** Load Buffering ******** // // The analog of the Store Buffering example, this litmus test has two threads read from two different locations and then write to the other locations. // The outcome of having LDST reordering is allowed and observable on many processors such as ARM. // // --------------------------- // Initial State: // x = 0; y = 0; // --------------------------- // Thread 0 | Thread 1 // --------------------------- // r0 = LOAD(x) | r1 = LOAD(y) // STORE(y, 1) | STORE(x, 1) // --------------------------- // Observed: r0 = 1 && r1 = 1 // --------------------------- // // This is possible because the processor does not have to wait for the other cpu's cacheline to arrive before storing into the cache. // Assume Thread 0 owns y's cacheline and Thread 1 owns x's cacheline. // The processor may execute the load and thus buffer the load waiting for the cacheline to arrive. // The processor may continue onto the store and since each cpu owns their respective cacheline, store the result into the cache. // The cpus now receive the cachelines for x and y with the now modified value. // We have just observed the loads returning 1 and thus observed LDST reordering. // // To forbid such outcome it suffices to add any full memory barrier to both threads or a local Read-After-Write/Read-To-Write dependency or a control dependency. // // ------------------------------- // Thread 0 | Thread 1 // ------------------------------- // r0 = LOAD(x) | r1 = LOAD(y) // if (r0 == 1) | if (r1 == 1) // STORE(y, 1) | STORE(x, 1) // ------------------------------- // // ----------------------------------------------------- // Thread 0 | Thread 1 // ----------------------------------------------------- // r0 = LOAD(x) | r1 = LOAD(y) // STORE(&(y + r0 - r1), 1) | STORE(&(x + r1 - r1), 1) // ----------------------------------------------------- // // Both fixes above ensure that both writes cannot be committed, made globally visible, until their program source code order preceding reads have been fully satisfied. // // ******** Compiler Barriers ******** // // Compiler barriers are both-sided barriers that prevent loads and stores from moving down past the compiler barrier and // loads and stores from moving up above the compiler barrier. Here we will see the various ways our code may be subject // to compiler optimizations and why compiler barriers are needed. Note as stated above, compiler barriers may not // prevent all compiler optimizations or transformations. Compiler barriers are usually implemented by reloading all // variables that are currently cached in registers and flushing all stores in registers back to memory. // This list isn't exhaustive but will hopefully try to outline what compiler barriers protect against and what they don't. // // Compiler may reorder loads. // LOAD A; LOAD B; -> LOAD B; LOAD A; // LOAD A; operation on A; LOAD B; operation on B; -> LOAD A; LOAD B; operation on A; operation on B // // Insert a compiler barrier in between the two loads to guarantee that they are kept in order. // LOAD A; COMPILER_BARRIER; LOAD B; // LOAD A; operation on A; COMPILER_BARRIER; LOAD B; operation on B; // // The same with stores. // STORE(A, 1); STORE(B, 1); -> STORE(B, 1); STORE(A, 1); // operations and STORE result into A; operations and STORE result int B; -> all operations; STORE result into B; STORE result into A; // // Insert a compiler barrier in between the two stores to guarantee that they are kept in order. // It is not required that the multiple stores to A before the barrier are not merged into one final store. // It is not required that the store to B after the barrier be written to memory, it may be cached in a register for some indeterminate // amount of time as an example. // STORE(A, 1); COMPILER_BARRIER; STORE(B, 1); // // The compiler is allowed to merge overlapping loads and stores. // Inserting a compiler barrier here will not prevent the compiler from doing this optimization as doing one wider load/store is // technically still abiding by the guarantee that the loads/stores are not reordered with each other. // LOAD A[0]; LOAD A[1]; -> A single wider LOAD instruction // STORE(A[0], 1); STORE(A[1], 2); -> A single wider STORE instruction // // Compilers do not have to reload the values pointers point to. This is especially common with RISC architectures with lots // of general purpose registers or even compiler optimizations such as inlining or Link-Time Optimization. // int i = *ptr; Do bunch of operations; if (*ptr) { do more; } // It is entirely possible the compiler may remove the last if statement because it can keep the *ptr in a register // and it may infer from the operations done on i that i is never 0. // // int i = *ptr; Do bunch of operations; COMPILER_BARRIER; if (*ptr) { do more; } // Inserting a compiler barrier at that location will cause the compiler to have reload *ptr thus keeping the if statement assuming // no other optimizations take place, such as the compiler knowing that *ptr is always greater than 0. // // The compiler is within its rights to also merge and reload loads as much as it pleases. // // while (int tmp = LOAD(A)) // process_tmp(tmp) // // Will be merged and transformed to // // if (int tmp = LOAD(A)) // for (;;) process_tmp(tmp) // // Inserting a compiler barrier will ensure that LOAD(A) is always reloaded and thus the unwanted transformation is avoided. // // while (int tmp = LOAD(A)) // { // process_tmp(tmp) // COMPILER_BARRIER // } // // Under heavy register pressure scenarios, say the loop body was larger, the compiler may reload A as follows. // Compiler barriers cannot prevent this from happening, even if we put it after process_tmp as above; // the compiler still kept those loads above the barrier so it satisfied its contract even though it reloaded // from A more than once. // // while (int tmp = LOAD(A)) // process_tmp(LOAD(A)) // // In the above transformation it is possible that another cpu stores 0 into A. When we reload A for process_tmp, we pass 0 // to process_tmp() which it would actually never expect to observe. Because if we observed 0, the while loop condition // would never be satisfied. If the compiler under register pressure instead stored and loaded tmp from its stack slot, that is fine // because we are just storing and loading the original observed value from A. Obviously that is slower than just reloading from // A again so an optimizing compiler may not do the stack slot store. This is an unwanted transformation which eastl::atomic prevents // even on relaxed loads. // // The compiler is allowed to do dead-store elimination if it knows that value has already been stored, or that only the last store // needs to be stored. The compiler does not assume or know that these variables are shared variables. // // STORE(A, 1); STORE(A, 1); // OPERATIONS; -> OPERATIONS; // STORE(A, 1); // // The compiler is well within its rights to omit the second store to A. Assuming we are doing some fancy lockfree communication // with another cpu and the last store is meant to ensure the ending value is 1 even if another cpu changed A in between; that // assumption will not be satisfied. A compiler barrier will not prevent the last store from being dead-store removed. // // STORE(A, 1); // OPERATIONS; // STORE(A, 2); // // Assuming these stores are meant to denote some state changes to communicate with a remote cpu. The compiler is allowed to // transform this as follows without a compiler barrier. Insert a compiler barrier between the two stores to prevent the transformation. // Something like this will also require memory barriers, but that is not the point of this section. // // STORE(A, 2); // OPERATIONS; // // The compiler is also allowed to invent stores as it may please. // First on many RISC architectures storing an immediate value either involves loading the immediate from the .data section // or combing a variety of load upper immediate and add or or immediate instructions to get our constant in a register and then // doing a single 32-bit store instruction from said register. Some ISAs have 16-bit stores with immediate value so that a store // may be broken into 2 16-bit store immediate values causing shearing. To reduce instruction dependencies it may also decide // to do two add immediates and then two 16-bit stores again causing shearing. // // lui $t0, 1 # t0 == 0x00010000 // ori $a0, $t0, 8 # t0 == 0x00010008 // strw $t0, 0($a1) # store t0 into address at a1 // -> // ori $a0, $t0, 1 # t0 == 0x00000001 // ori $a0, $t1, 8 # t0 == 0x00000008 // strhw $t0, 0($a1) # store t0 lower half at a1 // strhw $t1, 2($a1) # store t1 upper half at a1 // // The above shows a potential transformation that a compiler barrier cannot solve for us. // // A compiler may also introduce stores to save on branching. Let's see. // // if (a) // STORE(X, 10); // else // STORE(X, 20); // // STORE(X, 20); // if (a) // STORE(X, 10); // // This is a very common optimization as it saves a potentially more expensive branch instruction but breaks multi-threaded code. // This is also another case where a compiler barrier doesn't give us the granularity we need. // The branches may even be completely removed with the compiler instead choosing to use conditional move operations which would // actually be compliant since there would be one store only done, an extra store wouldn't have been added. // // You are now probably thinking that compiler barriers are useful and are definitely needed to tell the compiler to calm down // and guarantee our hardware guarantees are valid because the code we wrote is the instructions that were emitted. // But there are definitely lots of caveats where compiler barriers do not at all provide the guarantees we still need. // This where eastl::atomic comes into play, and under the relaxed memory ordering section it will be explained // what the standard guarantees and how we achieve those guarantees, like ensuring the compiler never does dead-store elimination or reloads. // // ******** Control Dependencies ******** // // Control dependencies are implicit local cpu ordering of memory instructions due to branching instructions, specifically // only conditional branches. The problem is compilers do not understand control dependencies, and control dependencies // are incredibly hard to understand. This is meant to make the reader aware they exist and to never use them // because they shouldn't be needed at all with eastl::atomic. Also control dependencies are categorized as LDLD or LDST, // store control dependencies inherently do not make sense since the conditional branch loads and compares two values. // // A LDLD control dependency is an anti-pattern since it is not guaranteed that any architecture will detect the memory-order violation. // r0 = LOAD(A); // if (r0) // r1 = LOAD(B) // // Given those sequence of instructions, it is entirely possible that a cpu attempts to speculatively predict and load the value of B // before the branch instruction has finished executing. It is entirely allowed that the cpu loads from B, assume B is in cache and A // is not in cache, before A. It is allowed, that even if the cpu was correct in it's prediction that it doesn't reload B and change the // fact that it speculatively got lucky. // // This is also what the x86 pause instruction inserted into spin wait loops is meant to solve. // LOOP: // r0 = LOAD(A); // if (!r0) pause; goto LOOP; // // In the above spin loop, after a couple of iterations the processor will fill the pipeline with speculated cmp and load instructions. // x86 will catch a memory order violation if it sees that an external store was done to A and thus must flush the entire // pipeline of all the speculated load A. Pause instruction tells the cpu to not do speculative loads so that the pipeline is not // filled with all said speculative load instructions. This ensures we do not incur the costly pipeline flushes from memory order // violations which are likely to occur in tight spin wait loops. This also allows other threads on the same physical core to use the // core's resources better since our speculative nature won't be hogging it all. // // A LDST control dependency is a true dependency in which the cpu cannot make a store visible to the system and other cpus until it // knows its prediction is correct. Thus a LDST ordering is guaranteed and can be always relied upon as in the following example. // // r0 = LOAD(A); // if (r0) // STORE(B, 1); // // The fun part comes in with how does the compiler actually break all of this. // First is that if the compiler can ensure that the value of A in the LDST example is always not zero, then it is always within its // rights to completely remove the if statement which would lend us with no control dependency. // // Things get more fun when we deal with conditionals with else and else if statements where the compiler might be able to employ // invariant code motion optimizations. Take this example. // // r0 = LOAD(A); // r1 = LOAD(B); // if (r0) // STORE(B, 1); // /* MORE CODE */ // else if (r1) // STORE(B, 1); // /* MORE CODE */ // else // STORE(B, 1); // /* MORE CODE */ // // If we were trying to be smart and entirely rely on the control dependency to ensure order, ya well just don't the compiler // is always smarter. The compiler is well within its rights to move all the STORE(B, 1) up and above all the conditionals breaking // our reliance on the LDST control dependency. // // Things can get even more complicated especially in C++ when values may come from constexpr, inline, inline constexpr, static const, etc, // variables and thus the compiler will do all sorts of transformations to reduce, remove, augment and change all your conditional code since // it knows the values of the expressions or even parts of it at compile time. Even more aggressive optimizations like LTO might break code that was being cautious. // Even adding simple short circuiting logic or your classic likely/unlikely macros can alter conditionals in ways you didn't expect. // In short know enough about control dependencies to know not to ever use them. // // ******** Multi-Copy Store Atomicity && Barrier Cumulativity ******** // // Single-Copy Store Atomicity: All stores must become visible to all cores in the system at the same time. // // Multi-Copy Store Atomicity : This means a cpu sees its own stores before they become visible to other cpus, by forwarding them from the store buffer, // but a store becomes visible to all other cpus at the same time when flushed from the store buffer. // // Non-Atomic Store Atomicity : A store becomes visible to different cpus at different times. // // Those are the above variations of Store Atomicity. Most processors have Non-Atomic Store Atomicity and thus you must program to that lowest common denominator. // We can use barriers, with some caveats, to restore Multi-Copy Store Atomicity to a Non-Atomic system though we need to define a new granular definition for // memory barriers to define this behaviour. Simple LDLD/LDST/STST/STLD definition is not enough to categorize memory barriers at this level. Let's start off // with a simple example that breaks under a Non-Atomic Store Atomicity system and what potential hardware features allow this behaviour to be observed. // // NOTE: For all the below examples we assume no compile reordering and that the processor also executes the instructions with no local reorderings to make the examples simpler, // to only show off the effects of Multi-Copy Store Atomicity. This is why we don't add any address dependencies, or mark explicit LDLD/LDST memory barriers. // Thus you may assume all LDLD and LDST pairs have an address dependency between them, so that they are not reordered by the compiler or the local cpu. // // --------------------------------------------------------------------------------------------------------- // Write-To-Read Causality, WRC, Litmus Test // --------------------------------------------------------------------------------------------------------- // Initial State: // X = 0; Y = 0; // --------------------------------------------------------------------------------------------------------- // Thread 0 | Thread 1 | Thread 2 // --------------------------------------------------------------------------------------------------------- // STORE(X, 1) | r0 = LOAD(X) | r1 = LOAD(Y) // | STORE(Y, r0) | r2 = LOAD(X) // --------------------------------------------------------------------------------------------------------- // Observed: r0 = 1 && r1 = 1 && r2 = 0 // --------------------------------------------------------------------------------------------------------- // // Let's go over this example in detail and whether the outcome shown above can be observed. In this example Thread 0 stores 1 into X. If Thread 1 observes the write to X, // it stores the observed value into Y. Thread 2 loads from Y then X. This means if the load from Y returns 1, then we intuitively know the global store order // was 1 to X and then 1 to Y. So is it possible then that the load from X in Thread 2 can return 0 in that case? Under a Multi-Copy Store Atomicity system, that would be // impossible because once 1 was stored to X all cpus see that store so if Thread 2 saw the store to Y which can only happen after the store to X was observed, then // Thread 2 must also have observed the store to X and return 1. As you may well have figured out, it is possible under a Non-Atomic Store Atomicity system to still // observe the load from X returning 0 even if the above load from Y returned 1 in Thread 2. This completely breaks our intuition of causality. Let's now understand what hardware may cause this. // // This is possible on cpus that have Simultaneous Multi-Threading, SMT or HyperThreading in Intel parlance, which share resources such as store buffers or L1 cache. // We are accustomed to the x86 way of SMT where each logical core shares Execution Units on the physical core but each logical core has their own statically partitioned // cache and store buffer that is not visible to the other cpus. It is possible on cpus like ARMv7 or POWER, POWER9 supports 4 and even 8 threads per physical core, so // to save on die space though yet enable this large number of threads per physical core it is common for these logical cores to all use the same store buffer or L1 cache // per physical core on these processors. Let's take the above example and rerun it with this knowledge to get the observed behaviour outlined above. // // Assume Thread 0, Thread 1, and Thread 2 run on cpu 0, cpu 1, and cpu 2 respectively. Assume that cpu 0 and cpu 1 are two logical cores on the same physical core so this processor // has an SMT value of 2. Thread 0 will store 1 into X. This store may be in the store buffer or in the L1 cache that cpu 1 also shares with cpu 0, thus cpu 1 has early access to cpu 0's stores. // Thread 1 loads X which it observed as 1 early and then stores 1 into Y. Thread 2 may see the load from Y returning 1 but now the load from X returning 0 all because cpu 1 got early // access to cpu 0 store due to sharing a L1 cache or store buffer. // We will come back on how to fix this example with the proper memory barriers for the Non-Atomic Store Atomicity systems, but we need to detour first. // // We need to take a deeper dive into memory barriers to understand how to restore Multi-Copy Store Atomicity from a Non-Atomic Store Atomicity system. // Let's start with a motivating example and we will be using the POWER architecture throughout this example because it encompasses all the possible observable behaviour. // ARMv7 technically allows Non-Atomic Store Atomicity behaviour but no consumer ARMv7 chip actually observes this behaviour. // ARMv8 reworked its model to specifically say it is a Multi-Copy Store Atomicity system. // POWER is one of the last few popular consumer architectures that are guaranteed to have Non-Atomic Store Atomicity observable behaviour, thus we will be using it for the following examples. // // To preface, POWER has two types of memory barriers called lwsync and sync. The following table lists the guarantees provided by TSO, x86, and the lwsync instruction. // The table gives a hint as to why using our previous definition of LDLD/LDST/STST/STLD isn't granular enough to categorize memory barrier instructions. // // TSO: | POWER lwsync memory barrier: // LDLD : YES | LDLD : YES // LDST : YES | LDST : YES // STST : YES | STST : YES // STLD : NO | STLD : NO // A cumulative : YES | A cumulative : YES // B cumulative : YES | B cumulative : YES // IRIW : YES | IRIW : NO // // The TSO memory model provided by x86 seems to be exactly the same as POWER if we add lwsync memory barrier instructions in between each of the memory instructions. // This provides us the exact same ordering guarantees as the TSO memory model. If we just looked at the 4 permutations of reorderings we would be inclined to assume that // TSO has the exact same ordering as sprinkling lwsync in our code in between every pair of memory instructions. That is not the case because memory barrier causality and cumulativity differ in subtle ways. // In this case they differ by the implicit guarantees from the TSO memory model versus those provided by the POWER lwsync memory barrier. // So the lwsync memory barrier prevents reordering with instructions that have causality but does not prevent reordering with instructions that are completely independent. // Let's dive into these concepts a bit more. // // Non-Atomic Store Atomicity architectures are prone to behaviours such as the non-causal outcome of the WRC test above. Architectures such as POWER defines memory barriers to enforce // ordering with respect to memory accesses in remote cpus other than the cpu actually issuing the memory barrier. This is known as memory barrier cumulativity. // How does the memory barrier issued on my cpu affect the view of memory accesses done by remote cpuss. // // Cumulative memory barriers are defined as follows - Take your time this part is very non-trivial: // A-Cumulative: We denote group A as the set of memory instructions in this cpu or other cpus that are ordered before the memory barrier in this cpu. // A-Cumulativity requires that memory instructions from any cpu that have performed prior to a memory load before the memory barrier on this cpu are also members of group A. // B-Cumulative: We denote group B as the set of memory instructions in this cpu or other cpus that are ordered after the memory barrier in this cpu. // B-Cumulativity requires that memory instructions from any cpu that perform after a load and including the load in that cpu that returns the value of a store in group B are // also members of group B. // IRIW : enforces a global ordering even for memory instructions that have no causality. The memory instructions are completely independent. // // --------------------------------------------------------------------------------------------------------- // WRC Litmus Test // --------------------------------------------------------------------------------------------------------- // Thread 0 | Thread 1 | Thread 2 // --------------------------------------------------------------------------------------------------------- // {i} : STORE(X, 1) | {ii} : r0 = LOAD(X) | {v} : r1 = LOAD(Y) // | {iii} : lwsync | // | {iv} : STORE(Y, r0) | {vi} : r2 = LOAD(X) // --------------------------------------------------------------------------------------------------------- // Outcome: r0 = 1 && r1 = 1 && r2 = 1 // // Group A of {iii} : {i} && {ii} // // Group B of {iii} : {iv} && {v} && {vi} // --------------------------------------------------------------------------------------------------------- // // Using the WRC test again and inserting a POWER lwsync, don't concern yourself with why the memory barrier was inserted at that spot right now, we now see the distinctions of group A and group B. // It demonstrates the A and B Cumulative nature of the lwsync instruction, {iii}. First group A, initially consists of {ii} and group B initially consists of {iv} from the local cpu that issued the lwsync. // Since {ii} reads from {i} and assume {i} happens before {ii}, by definition of A-Cumulativity {i} is included in group A. // Similarly {v} reads from {iv} and assume {iv} happens before {v}, then {v} is included in group B by definition of B-Cumulativity. // {vi} is also included in group B since it happens after {v} by definition of B-Cumulativity. // // WRC litmus test represents a scenario where only a A-Cumulative memory barrier is needed. The lwsync not only provides the needed local LDST memory barrier for the local thread but also ensures // that any write Thread 1 has read from before the memory barrier is kept in order with any write Thread 1 does after the memory barrier as far as any other thread observes. // In other words it ensures that any write that has propagated to Thread 1 before the memory barrier is propagated to any other thread before the second store after the memory barrier in Thread 1 // can propagate to other threads in the system. This is exactly the definition of A-Cumulativity and what we need to ensure that causality is maintained in the WRC Litmus Test example. // With that lwsync in place it is now impossible to observe r0 = 1 && r1 = 1 && r2 = 0. The lwsync has restored causal ordering. Let's look at an example that requires B-Cumulativity. // // --------------------------------------------------------------------------------------------------------- // Example 2 from POWER manual // --------------------------------------------------------------------------------------------------------- // Initial State: // X = 0; Y = 0; Z = 0 // --------------------------------------------------------------------------------------------------------- // Thread 0 | Thread 1 | Thread 2 // --------------------------------------------------------------------------------------------------------- // STORE(X, 1) | r0 = LOAD(Y) | r1 = LOAD(Z) // STORE(Y, 1) | STORE(Z, r0) | r2 = LOAD(X) // --------------------------------------------------------------------------------------------------------- // Observed: r0 = 1 && r1 = 1 && r2 = 0 // --------------------------------------------------------------------------------------------------------- // // This example is very similar to WRC except that we kinda extended the Message Passing through an additional shared variable instead. // Think of this as Thread 0 writing some data into X, setting flag Y, Thread 1 waiting for flag Y then writing flag Z, and finally Thread 2 waiting for flag Z before reading the data. // Take a minute to digest the above example and think about where a memory barrier, lwsync, should be placed. Don't peek at the solution below. // // --------------------------------------------------------------------------------------------------------- // Example 2 from POWER manual // --------------------------------------------------------------------------------------------------------- // Thread 0 | Thread 1 | Thread 2 // --------------------------------------------------------------------------------------------------------- // STORE(X, 1) | r0 = LOAD(Y) | r1 = LOAD(Z) // lwsync | | // STORE(Y, 1) | STORE(Z, r0) | r2 = LOAD(X) // --------------------------------------------------------------------------------------------------------- // // First the lwsync provides the needed local STST memory barrier for the local thread, thus the lwsync here ensures that the store to X propagates to Thread 1 before the store to Y. // B-Cumulativity applied to all operations after the memory barrier ensure that the store to X is // kept in order with respect to the store to Z as far as all other threads participating in the dependency chain are concerned. This is the exact definition of B-Cumulativity. // With this one lwsync the outcome outlined above is impossible to observe. If r0 = 1 && r1 = 1 then r2 must be properly observed to be 1. // // We know that lwsync only provides A-Cumulativity and B-Cumulativity. Now we will look at examples that have no causality constraints thus we need to grab heavier memory barriers // that ensures in short we will say makes a store become visible to all processors, even those not on the dependency chains. Let's get to the first example. // // --------------------------------------------------------------------------------------------------------- // Independent Reads of Independent Writes, IRIW, coined by Doug Lea // --------------------------------------------------------------------------------------------------------- // Initial State: // X = 0; Y = 0; // --------------------------------------------------------------------------------------------------------- // Thread 0 | Thread 1 | Thread 2 | Thread 3 // --------------------------------------------------------------------------------------------------------- // STORE(X, 1) | r0 = LOAD(X) | STORE(Y, 1) | r2 = LOAD(Y) // | r1 = LOAD(Y) | | r3 = LOAD(X) // --------------------------------------------------------------------------------------------------------- // Observed: r0 = 1 && r1 = 0 && r2 = 1 && r3 = 0 // --------------------------------------------------------------------------------------------------------- // // The IRIW example above clearly shows that writes can be propagated to different cpus in completely different orders. // Thread 1 sees the store to X but not the store to Y while Thread 3 sees the store to Y but not the store to X, the complete opposite. // Also to the keen eye you may have noticed this example is a slight modification of the Store Buffer example so try to guess where the memory barriers would go. // // --------------------------------------------------------------------------------------------------------- // Independent Reads of Independent Writes, IRIW, coined by Doug Lea // --------------------------------------------------------------------------------------------------------- // Thread 0 | Thread 1 | Thread 2 | Thread 3 // --------------------------------------------------------------------------------------------------------- // STORE(X, 1) | r0 = LOAD(X) | STORE(Y, 1) | r2 = LOAD(Y) // | sync | | sync // | r1 = LOAD(Y) | | r3 = LOAD(X) // --------------------------------------------------------------------------------------------------------- // // To ensure that the above observation is forbidden we need to add a full sync memory barrier on both the reading threads. Think of sync as restoring sequential consistency. // The sync memory barrier ensures that any writes that Thread 1 has read from before the memory barrier are fully propagated to all threads before the reads are satisfied after the memory barrier. // The same can be said for Thread 3. This is why the sync memory barrier is needed because there is no partial causal ordering here or anything that can be considered for our A and B Cumulativity definitions. // We must ensure that all writes have been propagated to all cpus before proceeding. This gives way to the difference between sync and lwsync with regards to visibility of writes and cumulativity. // sync guarantees that all program-order previous stores must have been propagated to all other cpus before the memory instructions after the memory barrier. // lwsync does not ensure that stores before the memory barrier have actually propagated to any other cpu before memory instructions after the memory barrier, but it will keep stores before and after the // lwsync in order as far as other cpus are concerned that are within the dependency chain. // // Fun fact while ARMv7 claims to be Non-Atomic Store Atomicity no mainstream ARM implementation that I have seen has shown cases of Non-Atomic Store Atomicity. // It's allowed by the ARMv7 memory model and thus you have to program to that. ARMv8 changes this and states that it has Multi-Copy Store Atomicity. // // ******** Release-Acquire Semantics ******** // // The most useful and common cases where Release-Acquire Semantics are used in every day code is in message passing and mutexes. Let's get onto some examples and the C++ definition of Release-Acquire. // // ACQUIRE: // An Acquire operation is a one-way memory barrier whereby all loads and stores after the acquire operation cannot move up and above the acquire operation. // Loads and stores before the acquire operation can move down past the acquire operation. An acquire operation should always be paired with a Release operation on the SAME atomic object. // // RELEASE: // A Release operation is a one-way memory barrier whereby all loads and stores before the release operation cannot move down and below the release operation. // Loads and stores after the release operation can move up and above the release operation. A release operation should always be paired with an Acquire operation on the SAME atomic object. // // Release-Acquire pair does not create a full memory barrier but it guarantees that all memory instructions before a Release operation on an atomic object M are visible after an Acquire // operation on that same atomic object M. Thus these semantics usually are enough to preclude the need for any other memory barriers. // The synchronization is established only between the threads Releasing and Acquiring the same atomic object M. // // --------------------------------------------------- // Critical Section // --------------------------------------------------- // Thread 0 | Thread 1 // --------------------------------------------------- // mtx.lock() - Acquire | mtx.lock() - Acquire // STORE(X, 1) | r0 = LOAD(X) // mtx.unlock() - Release | mtx.unlock() - Release // --------------------------------------------------- // // A mutex only requires Release-Acquire semantics to protect the critical section. We do not care if operations above the lock leak into the critical section or that operations below the unlock leak into the // critical section because they are outside the protected region of the lock()/unlock() pair. Release-Acquire semantics does guarantee that everything inside the critical section cannot leak out. // Thus all accesses of all previous critical sections for the mutex are guaranteed to have completed and be visible when the mutex is handed off to the next party due to the Release-Acquire chaining. // This also means that mutexes do not provide or restore Multi-Copy Store Atomicity to any memory instructions outside the mutex, like the IRIW example since it does not emit full memory barriers. // // ------------------------------------------------------ // Message Passing // ------------------------------------------------------ // Thread 0 | Thread 1 // ------------------------------------------------------ // STORE(DATA, 1) | while (!LOAD_ACQUIRE(FLAG)) // | // STORE_RELEASE(FLAG, 1) | r0 = LOAD(DATA) // ------------------------------------------------------ // // This is a common message passing idiom that also shows the use of Release-Acquire semantics. It should be obvious by the definitions outlined above why this works. // An Acquire operation attached to a load needs to provide a LDLD and LDST memory barrier according to our definition of acquire. This is provided by default on x86 TSO thus no memory barrier is emitted. // A Release operation attached to a store needs to provide a STST and LDST memory barrier according to our definition of release. This is provided by default on x86 TSO thus no memory barrier is emitted. // // A couple of things of note here. One is that by attaching the semantics of a memory model directly to the memory instruction/operation itself we can take advantage of the fact the some processors // already provide guarantees between memory instructions and thus we do not have to emit memory barriers. Another thing of note is that the memory model is directly attached to the operation, // so you must do the Release-Acquire pairing on the SAME object which in this case is the FLAG variable. Doing an Acquire or Release on a separate object has no guarantee to observe an Acquire or Release on a different object. // This better encapsulates the meaning of the code and also allows the processor to potentially do more optimizations since a stand alone memory barrier will order all memory instructions of a given type before and after the barrier. // Where as the memory ordering attached to the load or store tells the processor that it only has to order memory instructions in relation to that specific load or store with the given memory order. // // // --------------------------------------------------------------------------------------------------------- // Release Attached to a Store VS. Standalone Fence // --------------------------------------------------------------------------------------------------------- // STORE(DATA, 1) | STORE(DATA, 1) // | ATOMIC_THREAD_FENCE_RELEASE() // STORE_RELEASE(FLAG, 1) | STORE_RELAXED(FLAG, 1) // STORE_RELAXED(VAR, 2) | STORE_RELAXED(VAR, 2) // --------------------------------------------------------------------------------------------------------- // ARMv8 Assembly // --------------------------------------------------------------------------------------------------------- // str 1, DATA | str 1, DATA // | dmb ish // stlr 1, FLAG | str 1, FLAG // str 2, VAR | str 2, VAR // --------------------------------------------------------------------------------------------------------- // // In the above example the release is attached to the FLAG variable, thus synchronization only needs to be guaranteed for that atomic variable. // It is entirely possible for the VAR relaxed store to be reordered above the release store. // In the fence version, since the fence is standalone, there is no notion where the release is meant to be attached to thus the fence must prevent all subsequent relaxed stores // from being reordered above the fence. The fence provides a stronger guarantee whereby now the VAR relaxed store cannot be moved up and above the release operation. // Also notice the ARMv8 assembly is different, the release fence must use the stronger dmb ish barrier instead of the dedicated release store instruction. // We dive more into fences provided by eastl::atomic below. // // Release-Acquire semantics also have the property that it must chain through multiple dependencies which is where our knowledge from the previous section comes into play. // Everything on the Release-Acquire dependency chain must be visible to the next hop in the chain. // // --------------------------------------------------------------------------------------------------------- // Example 2 from POWER manual // --------------------------------------------------------------------------------------------------------- // Thread 0 | Thread 1 | Thread 2 // --------------------------------------------------------------------------------------------------------- // STORE(X, 1) | r0 = LOAD_ACQUIRE(Y) | r1 = LOAD_ACQUIRE(Z) // STORE_RELEASE(Y, 1) | STORE_RELEASE(Z, r0) | r2 = LOAD(X) // --------------------------------------------------------------------------------------------------------- // // --------------------------------------------------------------------------------------------------------- // Write-To-Read Causality, WRC, Litmus Test // --------------------------------------------------------------------------------------------------------- // Thread 0 | Thread 1 | Thread 2 // --------------------------------------------------------------------------------------------------------- // STORE(X, 1) | r0 = LOAD(X) | r1 = LOAD_ACQUIRE(Y) // | STORE_RELEASE(Y, r0) | r2 = LOAD(X) // --------------------------------------------------------------------------------------------------------- // // You may notice both of these examples from the previous section. We replaced the standalone POWER memory barrier instructions with Release-Acquire semantics attached directly to the operations where we want causality preserved. // We have transformed those examples to use the eastl::atomic memory model. // Take a moment to digest these examples in relation to the definition of Release-Acquire semantics. // // The Acquire chain can be satisfied by reading the value from the store release or any later stored headed by that release operation. The following examples will make this clearer. // // ------------------------------------------------------ // Release Sequence Headed // ------------------------------------------------------ // Initial State: // DATA = 0; FLAG = 0; // ------------------------------------------------------ // Thread 0 | Thread 1 // ------------------------------------------------------ // STORE(DATA, 1) | r0 = LOAD_ACQUIRE(FLAG) // | // STORE_RELEASE(FLAG, 1) | r1 = LOAD(DATA) // STORE_RELAXED(FLAG, 3) | // ------------------------------------------------------ // Observed: r0 = 3 && r1 = 0 // ------------------------------------------------------ // // In the above example we may read the value 3 from FLAG which was not the release store, but it was headed by that release store. Thus we observed a later store and therefore it is still valid to then observe r1 = 1. // The stores to FLAG from the STORE_RELEASE up to but not including the next STORE_RELEASE operation make up the release sequence headed by the first release store operation. Any store on that sequence can be used to enforce // causality on the load acquire. // // ******** Consume is currently not useful ******** // // Consume is a weaker form of an acquire barrier and creates the Release-Consume barrier pairing. // Consume states that a load operation on an atomic object M cannot allow any loads or stores dependent on the value loaded by the operation to be reordered before the operation. // To understand consume we must first understand dependent loads. // You might encounter this being called a data dependency or an address dependency in some literature. // // -------------------------------------------------------------- // Address Dependency // -------------------------------------------------------------- // Initial State: // DATA = 0; PTR = nullptr; // -------------------------------------------------------------- // Thread 0 | Thread 1 // -------------------------------------------------------------- // STORE(DATA, 1) | r0 = LOAD(PTR) - typeof(r0) = int* // | // STORE(PTR, &DATA) | r1 = LOAD(r0) - typeof(r1) = int // -------------------------------------------------------------- // // There is a clear dependency here where we cannot load from *int until we actually read the int* from memory. // Now it is possible for Thread 1's load from *ptr to be observed before the store to DATA, therefore it can lead to r0 = &DATA && r1 = 0. // While this is a failure of causality, it is allowed by some cpus such as the DEC Alpha and I believe Blackfin as well. // Thus a data dependency memory barrier must be inserted between the data dependent loads in Thread 1. Note that this would equate to a nop on any processor other than the DEC Alpha. // // This can occur for a variety of hardware reasons. We learned about invalidation queues. It is possible that the invalidation for DATA gets buffered in Thread 1. DEC Alpha allows the Thread 1 // load from PTR to continue without marking the entries in its invalidation queue. Thus the subsequent load is allowed to return the old cached value of DATA instead of waiting for the // marked entries in the invalidation queue to be processed. It is a design decision of the processor not to do proper dependency tracking here and instead relying on the programmer to insert memory barriers. // // This data dependent ordering guarantee is useful because in places where we were using an Acquire memory barrier we can reduce it to this Consume memory barrier without any hardware barriers actually emitted on every modern processor. // Let's take the above example, translate it to Acquire and Consume memory barriers and then translate it to the ARMv7 assembly and see the difference. // // --------------------------------------------------------------- --------------------------------------------------------------- // Address Dependency - Release-Acquire Address Dependency - Release-Acquire - ARMv7 Assembly // --------------------------------------------------------------- --------------------------------------------------------------- // Thread 0 | Thread 1 Thread 0 | Thread 1 // --------------------------------------------------------------- --------------------------------------------------------------- // STORE(DATA, 1) | r0 = LOAD_ACQUIRE(PTR) STORE(DATA, 1) | r0 = LOAD(PTR) // | dmb ish | dmb ish // STORE_RELEASE(PTR, &DATA) | r1 = LOAD(r0) STORE(PTR, &DATA) | r1 = LOAD(r0) // --------------------------------------------------------------- --------------------------------------------------------------- // // To get Release-Acquire semantics on ARMv7 we need to emit dmb ish; memory barriers. // // --------------------------------------------------------------- --------------------------------------------------------------- // Address Dependency - Release-Consume Address Dependency - Release-Consume - ARMv7 Assembly // --------------------------------------------------------------- --------------------------------------------------------------- // Thread 0 | Thread 1 Thread 0 | Thread 1 // --------------------------------------------------------------- --------------------------------------------------------------- // STORE(DATA, 1) | r0 = LOAD_CONSUME(PTR) STORE(DATA, 1) | r0 = LOAD(PTR) // | dmb ish | // STORE_RELEASE(PTR, &DATA) | r1 = LOAD(r0) STORE(PTR, &DATA) | r1 = LOAD(r0) // --------------------------------------------------------------- --------------------------------------------------------------- // // Data Dependencies can not only be created by read-after-write/RAW on registers, but also by RAW on memory locations too. Let's look at some more elaborate examples. // // --------------------------------------------------------------- --------------------------------------------------------------- // Address Dependency on Registers - Release-Consume - ARMv7 Address Dependency on Memory - Release-Consume - ARMv7 // --------------------------------------------------------------- --------------------------------------------------------------- // Thread 0 | Thread 1 Thread 0 | Thread 1 // --------------------------------------------------------------- --------------------------------------------------------------- // STORE(DATA, 1) | r0 = LOAD(PTR) STORE(DATA, 1) | r0 = LOAD(PTR) // | r1 = r0 + 0 | STORE(TEMP, r0) // dmb ish | r2 = r1 - 0 dmb ish | r1 = LOAD(TEMP) // STORE(PTR, &DATA) | r3 = LOAD(r2) STORE(PTR, &DATA) | r2 = LOAD(r1) // --------------------------------------------------------------- --------------------------------------------------------------- // // The above shows a more elaborate example of how data dependent dependencies flow through RAW chains either through memory or through registers. // // Notice by identifying that this is a data dependent operation and asking for a consume ordering, we can completely eliminate the memory barrier on Thread 1 since we know ARMv7 does not reorder data dependent loads. Neat. // Unfortunately every major compiler upgrades a consume to an acquire ordering, because the consume ordering in the standard has a stronger guarantee and requires the compiler to do complicated dependency tracking. // Dependency chains in source code must be mapped to dependency chains at the machine instruction level until a std::kill_dependency in the source code. // // ---------------------------------------------------------------- // Non-Address Dependency && Multiple Chains // ---------------------------------------------------------------- // Initial State: // std::atomic FLAG; int DATA[1] = 0; // ---------------------------------------------------------------- // Thread 0 | Thread 1 // ---------------------------------------------------------------- // STORE(DATA[0], 1) | int f = LOAD_CONSUME(FLAG) // | int x = f // | if (x) return Func(x); // | // STORE_RELEASE(FLAG, 1) | Func(int y) return DATA[y - y] // ---------------------------------------------------------------- // // This example is really concise but there is a lot going on. Let's digest it. // First is that the standard allows consume ordering even on what we will call not true machine level dependencies like a ptr load and then a load from that ptr as shown in the previous examples. // Here the dependency is between two ints, and the dependency chain on Thread 1 is as follows. f -> x -> y -> DATA[y - y]. The standard requires that source code dependencies on the loaded value // from consume flow thru assignments and even thru function calls. Also notice we added a dependency on the dereference of DATA with the value loaded from consume which while it does nothing actually abides by the standard // by enforcing a source code data dependent load on the consume operation. You may see this referred to as artificial data dependencies in other texts. // If we assume the compiler is able to track all these dependencies, the question is how do we enforce these dependencies at the machine instruction level. Let's go back to our ptr dependent load example. // // ---------------------------------------------------------------- // addi r0, pc, offset; // ldr r1, 0(r0); // ldr r2, 0(r1); // ---------------------------------------------------------------- // // The above pseudo assembly does a pc relative calculation to find the address of ptr. We then load ptr and then continue the dependency chain by loading the int from the loaded ptr. // Thus r0 has type of int**, which we use to load r1 an int* which we use to load our final value of r2 which is the int. // The key observation here is that most instructions provided by most architectures only allow moving from a base register + offset into a destination register. // This allows for trivial capturing of data dependent loads through pointers. But how do we capture the data dependency of DATA[y - y]. We would need something like this. // // ---------------------------------------------------------------- // sub r1, r0, r0; // Assume r0 holds y from the Consume Operation // add r3, r1, r2; // Assume r2 holds the address of DATA[0] // ldr r4, 0(r3); // ---------------------------------------------------------------- // // We cannot use two registers as both arguments to the load instruction. Thus to accomplish this you noticed we had to add indirect data dependencies through registers to compute the final address from the consume // load of y and then load from the final computed address. The compiler would have to recognize all these dependencies and enforce that they be maintained in the generated assembly. // The compiler must ensure the entire syntactic, source code, data-dependency chain is enforced in the generated assembly, no matter how long such chain may be. // Because of this and other issues, every major compiler unilaterally promotes consume to an acquire operation across the board. Read reference [15] for more information. // This completely removes the actual usefulness of consume for the pointer dependent case which is used quite heavily in concurrent read heavy data structures where updates are published via pointer swaps. // // ******** read_depends use case - Release-ReadDepends Semantics ******** // // eastl::atomic provides a weaker read_depends operation that only encapsulates the pointer dependency case above. Loading from a pointer and then loading the value from the loaded pointer. // The read_depends operation can be used on loads from only an eastl::atomic type. The return pointer of the load must and can only be used to then further load values. And that is it. // If you are unsure, upgrade this load to an acquire operation. // // MyStruct* ptr = gAtomicPtr.load(memory_order_read_depends); // int a = ptr->a; // int b = ptr->b; // return a + b; // // The loads from ptr after the gAtomicPtr load ensure that the correct values of a and b are observed. This pairs with a Release operation on the writer side by releasing gAtomicPtr. // // // As said above the returned pointer from a .load(memory_order_read_depends) can only be used to then further load values. // Dereferencing(*) and Arrow Dereferencing(->) are valid operations on return values from .load(memory_order_read_depends). // // MyStruct* ptr = gAtomicPtr.load(memory_order_read_depends); // int a = ptr->a; - VALID // int a = *ptr; - VALID // // Since dereferencing is just indexing via some offset from some base address, this also means addition and subtraction of constants is ok. // // int* ptr = gAtomicPtr.load(memory_order_read_depends); // int a = *(ptr + 1) - VALID // int a = *(ptr - 1) - VALID // // Casts also work correctly since casting is just offsetting a pointer depending on the inheritance hierarchy or if using intrusive containers. // // ReadDependsIntrusive** intrusivePtr = gAtomicPtr.load(memory_order_read_depends); // ReadDependsIntrusive* ptr = ((ReadDependsIntrusive*)(((char*)intrusivePtr) - offsetof(ReadDependsIntrusive, next))); // // Base* basePtr = gAtomicPtr.load(memory_order_read_depends); // Dervied* derivedPtr = static_cast(basePtr); // // Both of the above castings from the result of the load are valid for this memory order. // // You can reinterpret_cast the returned pointer value to a uintptr_t to set bits, clear bits, or xor bits but the pointer must be casted back before doing anything else. // // int* ptr = gAtomicPtr.load(memory_order_read_depends); // ptr = reinterpret_cast(reinterpret_cast(ptr) & ~3); // // Do not use any equality or relational operator (==, !=, >, <, >=, <=) results in the computation of offsets before dereferencing. // As we learned above in the Control Dependencies section, CPUs will not order Load-Load Control Dependencies. Relational and equality operators are often compiled using branches. // It doesn't have to be compiled to branched, condition instructions could be used. Or some architectures provide comparison instructions such as set less than which do not need // branches when using the result of the relational operator in arithmetic statements. Then again short circuiting may need to introduct branches since C++ guarantees the // rest of the expression must not be evaluated. // The following odd code is forbidden. // // int* ptr = gAtomicPtr.load(memory_order_read_depends); // int* ptr2 = ptr + (ptr >= 0); // int a = *ptr2; // // Only equality comparisons against nullptr are allowed. This is becase the compiler cannot assume that the address of the loaded value is some known address and substitute our loaded value. // int* ptr = gAtomicPtr.load(memory_order_read_depends); // if (ptr == nullptr); - VALID // if (ptr != nullptr); - VALID // // Thus the above sentence that states: // The return pointer of the load must and can only be used to then further load values. And that is it. // must be respected by the programmer. This memory order is an optimization added for efficient read heavy pointer swapping data structures. IF you are unsure, use memory_order_acquire. // // ******** Relaxed && eastl::atomic guarantees ******** // // We saw various ways that compiler barriers do not help us and that we need something more granular to make sure accesses are not mangled by the compiler to be considered atomic. // Ensuring these guarantees like preventing dead-store elimination or the splitting of stores into smaller sub stores is where the C/C++11 // standard comes into play to define what it means to operate on an atomic object. // These basic guarantees are provided via new compiler intrinsics on gcc/clang that provide explicit indication to the compiler. // Or on msvc by casting the underlying atomic T to a volatile T*, providing stronger compiler guarantees than the standard requires. // Essentially volatile turns off all possible optimizations on that variable access and ensures all volatile variables cannot be // reordered across sequence points. Again we are not using volatile here to guarantee atomicity, we are using it in its very intended purpose // to tell the compiler it cannot assume anything about the contents of that variable. Now let's dive into the base guarantees of eastl::atomic. // // The standard defines the following for all operations on an atomic object M. // // Write-Write Coherence: // If an operation A modifies an atomic object M(store), happens before an operation B that modifies M(store), then A shall be earlier than B in the modification order of M. // // Read-Read Coherence: // If a value computation A on an atomic object M(load), happens before a value computation B on M(load), and A takes its value from a side effect X on M(from a previous store to M), then the value // computed by B shall either be the value stored by X or some later side effect Y on M, where Y follows X in the modification order of M. // // Read-Write Coherence: // If a value computation A on an atomic object M(load), happens before an operation B that modifies M(store), then A shall take its value from a side effect X on M, where X precedes B in the modification // order of M. // // Write-Read Coherence: // If a side effect X on an atomic object M(store), happens before a value computation B on M(load), then the evaluation of B must take its value from X or from some side effect Y that follows X in the // modification order of M. // // What does all this mean. This is just a pedantic way of saying that the preceding coherence requirements disallow compiler reordering of atomic operations to a single atomic object. // This means all operations must be emitted by the compiler. Stores cannot be dead-store eliminated even if they are the only stores. // Loads cannot have common subexpression elimination performed on them even if they are the only loads. // Loads and Stores to the same atomic object cannot be reordered by the compiler. // Compiler cannot introduce extra loads or stores to the atomic object. // Compiler also cannot reload from an atomic object, it must save and store to a stack slot. // Essentially this provides all the necessary guarantees needed when treating an object as atomic from the compilers point of view. // // ******** Same Address LoadLoad Reordering ******** // // It is expected that same address operations cannot and are not reordered with each other. It is expected that operations to the same address have sequential consistency because // they are to the same address. If you picture a cpu executing instructions, how is it possible to reorder instructions to the same address and yet keep program behaviour the same. // Same Address LoadLoad Reordering is one weakening that is possible to do and keep observed program behaviour for a single-threaded program. // More formally, A and B are two memory instructions onto the same address P, where A is program ordered before B. If A and B are both loads then their order need not be ordered. // If B is a store then it cannot retire the store before A instruction completes. If A is a store and B is a load, then B must get its value forwarded from the store buffer or observe a later store // from the cache. Thus Same Address LDST, STST, STLD cannot be reordered but Same Address LDLD can be reordered. // Intel Itanium and SPARC RMO cpus allow and do Same Address LoadLoad Reordering. // Let's look at an example. // // --------------------------- // Same Address LoadLoad // --------------------------- // Initial State: // x = 0; // --------------------------- // Thread 0 | Thread 1 // --------------------------- // STORE(x, 1) | r0 = LOAD(x) // | r1 = LOAD(x) // --------------------------- // Observed: r0 = 1 && r0 = 0 // --------------------------- // // Notice in the above example it has appeared as if the two loads from the same address have been reordered. If we first observed the new store of 1, then the next load should not observe a value in the past. // Many programmers, expect same address sequential consistency, all accesses to a single address appear to execute in a sequential order. // Notice this violates the Read-Read Coherence for all atomic objects defined by the std and thus provided by eastl::atomic. // // All operations on eastl::atomic irrelevant of the memory ordering of the operation provides Same Address Sequential Consistency since it must abide by the coherence rules above. // // ******** eastl::atomic_thread_fence ******** // // eastl::atomic_thread_fence(relaxed) : Provides no ordering guarantees // eastl::atomic_thread_fence(acquire) : Prevents all prior loads from being reordered with all later loads and stores, LDLD && LDST memory barrier // eastl::atomic_thread_fence(release) : Prevents all prior loads and stores from being reordered with all later stores, STST && LDST memory barrier // eastl::atomic_thread_fence(acq_rel) : Union of acquire and release, LDLD && STST && LDST memory barrier // eastl::atomic_thread_fence(seq_cst) : Full memory barrier that provides a single total order // // See Reference [9] and Fence-Fence, Atomic-Fence, Fence-Atomic Synchronization, Atomics Order and Consistency in the C++ std. // // ******** Atomic && Fence Synchronization ******** // // --------------------------- // Fence-Fence Synchronization // --------------------------- // A release fence A synchronizes-with an acquire fence B if there exist operations X and Y on the same atomic object M, such that fence A is sequenced-before operation X and X modifies M, // operation Y is sequenced-before B and Y reads the value written by X. // In this case all non-atomic and relaxed atomic stores that are sequenced-before fence A will happen-before all non-atomic and relaxed atomic loads after fence B. // // ---------------------------- // Atomic-Fence Synchronization // ---------------------------- // An atomic release operation A on atomic object M synchronizes-with an acquire fence B if there exists some atomic operation X on atomic object M, such that X is sequenced-before B and reads // the value written by A. // In this case all non-atomic and relaxed atomic stores that are sequenced-before atomic release operation A will happen-before all non-atomic and relaxed atomic loads after fence B. // // ---------------------------- // Fence-Atomic Synchronization // ---------------------------- // A release fence A synchronizes-with an atomic acquire operation B on an atomic object M if there exists an atomic operation X such that A is sequenced-before X, X modifies M and B reads the // value written by X. // In this case all non-atomic and relaxed atomic stores that are sequenced-before fence A will happen-before all non-atomic and relaxed atomic loads after atomic acquire operation B. // // This can be used to add synchronization to a series of several relaxed atomic operations, as in the following trivial example. // // ---------------------------------------------------------------------------------------- // Initial State: // x = 0; // eastl::atomic y = 0; // z = 0; // eastl::atomic w = 0; // ---------------------------------------------------------------------------------------- // Thread 0 | Thread 1 // ---------------------------------------------------------------------------------------- // x = 2 | r0 = y.load(memory_order_relaxed); // z = 2 | r1 = w.load(memory_order_relaxed); // atomic_thread_fence(memory_order_release); | atomic_thread_fence(memory_order_acquire); // y.store(1, memory_order_relaxed); | r2 = x // w.store(1, memory_order_relaxed); | r3 = z // ---------------------------------------------------------------------------------------- // Observed: r0 = 1 && r1 = 1 && r2 = 0 && r3 = 0 // ---------------------------------------------------------------------------------------- // // ******** Atomic vs Standalone Fence ******** // // A sequentially consistent fence is stronger than a sequentially consistent operation because it is not tied to a specific atomic object. // An atomic fence must provide synchronization with ANY atomic object whereas the ordering on the atomic object itself must only provide // that ordering on that SAME atomic object. Thus this can provide cheaper guarantees on architectures with dependency tracking hardware. // Let's look at a concrete example that will make this all clear. // // ---------------------------------------------------------------------------------------- // Initial State: // eastl::atomic y = 0; // eastl::atomic z = 0; // ---------------------------------------------------------------------------------------- // Thread 0 | Thread 1 // ---------------------------------------------------------------------------------------- // z.store(2, memory_order_relaxed); | r0 = y.load(memory_order_relaxed); // atomic_thread_fence(memory_order_seq_cst); | atomic_thread_fence(memory_order_seq_cst); // y.store(1, memory_order_relaxed); | r1 = z.load(memory_order_relaxed); // ---------------------------------------------------------------------------------------- // Observed: r0 = 1 && r1 = 0 // ---------------------------------------------------------------------------------------- // // Here the two sequentially consistent fences synchronize-with each other thus ensuring that if we observe r0 = 1 then we also observe that r1 = 2. // In the above example if we observe r0 = 1 it is impossible to observe r1 = 0. // // ---------------------------------------------------------------------------------------- // Initial State: // eastl::atomic x = 0; // eastl::atomic y = 0; // eastl::atomic z = 0; // ---------------------------------------------------------------------------------------- // Thread 0 | Thread 1 // ---------------------------------------------------------------------------------------- // z.store(2, memory_order_relaxed); | r0 = y.load(memory_order_relaxed); // x.fetch_add(1, memory_order_seq_cst); | x.fetch_add(1, memory_order_seq_cst); // y.store(1, memory_order_relaxed); | r1 = z.load(memory_order_relaxed); // ---------------------------------------------------------------------------------------- // Observed: r0 = 1 && r1 = 0 // ---------------------------------------------------------------------------------------- // // Here the two fetch_add sequentially consistent operations on x synchronize-with each other ensuring that if we observe r0 = 1 then we cannot observer r1 = 0; // The thing to take note here is that we synchronized on the SAME atomic object, that being the atomic object x. // Note that replacing the x.fetch_add() in Thread 1 with a sequentially consistent operation on another atomic object or a sequentially consistent fence can lead to // observing r1 = 0 even if we observe r0 = 1. For example the following code may fail. // // ---------------------------------------------------------------------------------------- // Initial State: // eastl::atomic x = 0; // eastl::atomic y = 0; // eastl::atomic z = 0; // ---------------------------------------------------------------------------------------- // Thread 0 | Thread 1 // ---------------------------------------------------------------------------------------- // z.store(2, memory_order_relaxed); | r0 = y.load(memory_order_relaxed); // | x.fetch_add(1, memory_order_seq_cst); // y.fetch_add(1, memory_order_seq_cst); | r1 = z.load(memory_order_relaxed); // ---------------------------------------------------------------------------------------- // Observed: r0 = 1 && r1 = 0 // ---------------------------------------------------------------------------------------- // // ---------------------------------------------------------------------------------------- // Initial State: // eastl::atomic x = 0; // eastl::atomic y = 0; // eastl::atomic z = 0; // ---------------------------------------------------------------------------------------- // Thread 0 | Thread 1 // ---------------------------------------------------------------------------------------- // z.store(2, memory_order_relaxed); | r0 = y.load(memory_order_relaxed); // x.fetch_add(1, memory_order_seq_cst); | atomic_thread_fence(memory_order_seq_cst); // y.store(1, memory_order_relaxed); | r1 = z.load(memory_order_relaxed); // ---------------------------------------------------------------------------------------- // Observed: r0 = 1 && r1 = 0 // ---------------------------------------------------------------------------------------- // // In this example it is entirely possible that we observe r0 = 1 && r1 = 0 even though we have source code causality and sequentially consistent operations. // Observability is tied to the atomic object on which the operation was performed and the thread fence doesn't synchronize-with the fetch_add because // there is no load above the fence that reads the value from the fetch_add. // // ******** Sequential Consistency Semantics ******** // // See section, Order and consistency, in the C++ std and Reference [9]. // // A load with memory_order_seq_cst performs an acquire operation // A store with memory_order_seq_cst performs a release operation // A RMW with memory_order_seq_cst performs both an acquire and a release operation // // All memory_order_seq_cst operations exhibit the below single total order in which all threads observe all modifications in the same order // // Paraphrasing, there is a single total order on all memory_order_seq_cst operations, S, such that each sequentially consistent operation B that loads a value from // atomic object M observes either the result of the last sequentially consistent modification A on M, or some modification on M that isn't memory_order_seq_cst. // For atomic modifications A and B on an atomic object M, B occurs after A in the total order of M if: // there is a memory_order_seq_cst fence X whereby A is sequenced before X, and X precedes B, // there is a memory_order_seq_cst fence Y whereby Y is sequenced before B, and A precedes Y, // there are memory_order_seq_cst fences X and Y such that A is sequenced before X, Y is sequenced before B, and X precedes Y. // // Let's look at some examples using memory_order_seq_cst. // // ------------------------------------------------------------ // Store-Buffer // ------------------------------------------------------------ // Initial State: // x = 0; y = 0; // ------------------------------------------------------------ // Thread 0 | Thread 1 // ------------------------------------------------------------ // STORE_RELAXED(x, 1) | STORE_RELAXED(y, 1) // ATOMIC_THREAD_FENCE(SEQ_CST) | ATOMIC_THREAD_FENCE(SEQ_CST) // r0 = LOAD_RELAXED(y) | r1 = LOAD_RELAXED(x) // ------------------------------------------------------------ // Observed: r0 = 0 && r1 = 0 // ------------------------------------------------------------ // // ------------------------------------------------------------ // Store-Buffer // ------------------------------------------------------------ // Initial State: // x = 0; y = 0; // ------------------------------------------------------------ // Thread 0 | Thread 1 // ------------------------------------------------------------ // STORE_SEQ_CST(x, 1) | STORE_SEQ_CST(y, 1) // r0 = LOAD_SEQ_CST(y) | r1 = LOAD_SEQ_CST(x) // ------------------------------------------------------------ // Observed: r0 = 0 && r1 = 0 // ------------------------------------------------------------ // // Both solutions above are correct to ensure that the end results cannot lead to both r0 and r1 returning 0. Notice that the second one requires memory_order_seq_cst on both // operations to ensure they are in the total order, S, for all memory_order_seq_cst operations. The other example uses the stronger guarantee provided by a sequentially consistent fence. // // ------------------------------------------------------------------------------------------------ // Read-To-Write Causality // ------------------------------------------------------------------------------------------------ // Initial State: // x = 0; y = 0; // ------------------------------------------------------------------------------------------------ // Thread 0 | Thread 1 | Thread 2 // ------------------------------------------------------------------------------------------------ // STORE_SEQ_CST(x, 1) | r0 = LOAD_RELAXED(x) | STORE_RELAXED(y, 1) // | ATOMIC_THREAD_FENCE(SEQ_CST) | ATOMIC_THREAD_FENCE(SEQ_CST) // | r1 = LOAD_RELAXED(y) | r2 = LOAD_RELAXED(x) // ------------------------------------------------------------------------------------------------ // Observed: r0 = 1 && r1 = 0 && r2 = 0 // ------------------------------------------------------------------------------------------------ // // You'll notice this example is an in between example of the Store-Buffer and IRIW examples we have seen earlier. The store in Thread 0 needs to be sequentially consistent so it synchronizes with the // thread fence in Thread 1. C++20 due to Reference [9], increased the strength of sequentially consistent fences has been increased to allow for the following. // // ------------------------------------------------------------------------------------------------ // Read-To-Write Causality - C++20 // ------------------------------------------------------------------------------------------------ // Initial State: // x = 0; y = 0; // ------------------------------------------------------------------------------------------------ // Thread 0 | Thread 1 | Thread 2 // ------------------------------------------------------------------------------------------------ // STORE_RELAXED(x, 1) | r0 = LOAD_RELAXED(x) | STORE_RELAXED(y, 1) // | ATOMIC_THREAD_FENCE(SEQ_CST) | ATOMIC_THREAD_FENCE(SEQ_CST) // | r1 = LOAD_RELAXED(y) | r2 = LOAD_RELAXED(x) // ------------------------------------------------------------------------------------------------ // Observed: r0 = 1 && r1 = 0 && r2 = 0 // ------------------------------------------------------------------------------------------------ // // Notice we were able to turn the store in Thread 0 into a relaxed store and still properly observe either r1 or r2 returning 1. // Note that all implementations of the C++11 standard for every architecture even now allows the C++20 behaviour. // The C++20 standard memory model was brought up to recognize that all current implementations are able to implement them stronger. // // ******** False Sharing ******** // // As we know operations work on the granularity of a cacheline. A RMW operation obviously must have some help from the cache to ensure the entire operation // is seen as one whole unit. Conceptually we can think of this as the cpu's cache taking a lock on the cacheline, the cpu doing the read-modify-write operation on the // locked cacheline, and then releasing the lock on the cacheline. This means during that time any other cpu needing that cacheline must wait for the lock to be released. // // If we have two atomic objects doing RMW operations and they are within the same cacheline, they are unintentionally contending and serializing with each other even // though they are two completely separate objects. This gives us the common name to this phenomona called false sharing. // You can cacheline align your structure or the eastl::atomic object to prevent false sharing. // // ******** union of eastl::atomic ******** // // union { eastl::atomic atomic8; eastl::atomic atomic32; }; // // While we know that operations operate at the granularity of a processor's cacheline size and so we may expect that storing and loading // from different width atomic variables at the same address to not cause weird observable behaviour but it may. // Store Buffers allow smaller stores to replace parts of larger loads that are forwarded from a store buffer. // This means if there is 2 bytes of modified data in the store buffer that overlaps with a 4 byte load, the 2 bytes will be forwarded // from the store buffer. This is even documented behaviour of the x86 store buffer in the x86 architecture manual. // This behaviour can cause processors to observe values that have never and will never be visible on the bus to other processors. // The use of a union with eastl::atomic is not wrong but your code must be able to withstand these effects. // // Assume everything starts out initially as zero. // // ------------------------------------------------------------------------------------------------------- // Thread 0 | Thread 1 | Thread 2 // -------------------------------------------------------------------------------------------------------- // cmpxchg 0 -> 0x11111111 | cmpxchg 0x11111111 -> 0x22222222 | mov byte 0x33; mov 4 bytes into register; // --------------------------------------------------------------------------------------------------------- // // After all operations complete, the value in memory at that location is, 0x22222233. // It is possible that the 4 byte load in thread 2 actually returns 0x11111133. // Now 0x11111133 is an observed value that no other cpu could observe because it was never globally visible on the data bus. // // If the value in memory is 0x22222233 then the first cmpxchg succeeded, then the second cmpxchg succeeded and finally our // byte to memory was stored, yet our load returned 0x11111133. This is because store buffer contents can be forwarded to overlapping loads. // It is possible that the byte store got put in the store buffer. Our load happened after the first cmpxchg with the byte forwarded. // This behaviour is fine as long as your algorithm is able to cope with this kind of store buffer forwarding effects. // // Reference [13] is a great read on more about this topic of mixed-size concurrency. // ///////////////////////////////////////////////////////////////////////////////// #include #include #include #include #endif /* EASTL_ATOMIC_H */ ================================================ FILE: include/EASTL/atomic_raw.h ================================================ ///////////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////////// #pragma once #include // This header provides extension functions for atomically operating on non-atomic types with // specific memory order semantics. These functions should be used as a last resort when using // the standard atomic types is impossible (for example due to having to cross established API // boundaries which can't change). // // The reason the atomic template should be preferred is that all operations on that type // are atomic by design, using the functions provided by this header and mixing atomic and // non-atomic accesses leads to potential data races unless done with extreme care. However, as // mentioned above sometimes there are circumstances where using atomic is // unfeasible. Whenever this API is used, it is recommended to verify that your data access // assumptions are consistent and correct by using tools like TSAN (thread sanitizer). // // For every member function of the form atomic::foo(...) where T is bool, an integral type, // or a pointer type, we provide an equivalent free function atomic_raw_foo(T* ptr, ...) which // operates on the T pointed to by ptr. Specifically: // // For bool we provide: // - bool atomic_raw_load(bool*, mem_order) // - void atomic_raw_store(bool*, bool, mem_order) // - bool atomic_raw_exchange(bool*, bool, mem_order) // - bool atomic_raw_compare_exchange_weak/strong(bool*, bool&, bool, mem_ord(, mem_ord)) // // For integral T we provide: // - T atomic_raw_load(T*, mem_order) // - void atomic_raw_store(T*, T, mem_order) // - T atomic_raw_exchange(T*, T, mem_order) // - bool atomic_raw_compare_exchange_weak/strong(T*, T&, T, mem_ord(, mem_ord)) // - T atomic_raw_fetch_add/atomic_raw_add_fetch(T*, T, mem_ord) // - T atomic_raw_fetch_sub/atomic_raw_sub_fetch(T*, T, mem_ord) // - T atomic_raw_fetch_and/atomic_raw_and_fetch(T*, T, mem_ord) // - T atomic_raw_fetch_or / atomic_raw_or_fetch(T*, T, mem_ord) // - T atomic_raw_fetch_xor/atomic_raw_xor_fetch(T*, T, mem_ord) // // For any T we provide: // - T* atomic_raw_load(T**, mem_order) // - void atomic_raw_store(T**, T*, mem_order) // - T* atomic_raw_exchange(T**, T*, mem_order) // - bool atomic_raw_compare_exchange_weak/strong(T**, T*&, T*, mem_ord(, mem_ord)) // - T* atomic_raw_fetch_add/atomic_raw_add_fetch(T**, T*, mem_ord) // - T* atomic_raw_fetch_sub/atomic_raw_sub_fetch(T**, T*, mem_ord) // For pointer types, the fetch operations which look like: // T* fetch_add(T** p, ptrdiff_t arg, memory_order) // T* add_fetch(T** p, ptrdiff_t arg, memory_order) // The supported operations for pointer types are: add, sub. #define EASTL_ATOMIC_RAW_OPERATION_IMPL(Operation, OpAlias, p, MemOrderMacro, BitCount) \ EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_, Operation), FUNC_WITH_PTR_IMPL) \ (EASTL_ATOMIC_BASE_OP_JOIN(OpAlias, MemOrderMacro), BitCount, ptr); #define EASTL_ATOMIC_RAW_SIZE_DEPENDANT_OPERATION_WITH_ALIAS_IMPL(Operation, OpAlias, MemOrderMacro) \ static_assert(sizeof(T) <= 8, "Atomic functions only support up to 64bit types"); \ static_assert(eastl::is_integral_v || eastl::is_pointer_v, \ "We only support these for integral and pointer types"); \ if constexpr (sizeof(T) == 1) \ { \ EASTL_ATOMIC_RAW_OPERATION_IMPL(Operation, OpAlias, ptr, MemOrderMacro, 8) \ } \ else if constexpr (sizeof(T) == 2) \ { \ EASTL_ATOMIC_RAW_OPERATION_IMPL(Operation, OpAlias, ptr, MemOrderMacro, 16) \ } \ else if constexpr (sizeof(T) == 4) \ { \ EASTL_ATOMIC_RAW_OPERATION_IMPL(Operation, OpAlias, ptr, MemOrderMacro, 32) \ } \ else if constexpr (sizeof(T) == 8) \ { \ EASTL_ATOMIC_RAW_OPERATION_IMPL(Operation, OpAlias, ptr, MemOrderMacro, 64) \ } #define EASTL_ATOMIC_RAW_SIZE_DEPENDANT_OPERATION_IMPL(Operation, MemOrderMacro) \ EASTL_ATOMIC_RAW_SIZE_DEPENDANT_OPERATION_WITH_ALIAS_IMPL(Operation, Operation, MemOrderMacro) #define EASTL_ATOMIC_RAW_LOAD_TMPL(MemOrderType, MemOrderMacro) \ template \ T atomic_raw_load(T* ptr, MemOrderType) \ { \ EASTL_ATOMIC_RAW_SIZE_DEPENDANT_OPERATION_IMPL(LOAD_, MemOrderMacro) \ } // Special memory order only provided for pointers #define EASTL_ATOMIC_RAW_POINTER_READ_DEPENDS_LOAD_TMPL() \ template \ T* atomic_raw_load(T** p, internal::memory_order_read_depends_s) \ { \ static_assert(sizeof(T*) == 8 || sizeof(T*) == 4, "Invalid pointer size"); \ if constexpr (sizeof(T*) == 4) \ { \ EASTL_ATOMIC_POINTER_READ_DEPENDS_LOAD_IMPL(32, p) \ } \ if constexpr (sizeof(T*) == 8) \ { \ EASTL_ATOMIC_POINTER_READ_DEPENDS_LOAD_IMPL(64, p) \ } \ } #define EASTL_ATOMIC_RAW_STORE_TMPL(MemOrderType, MemOrderMacro) \ template \ void atomic_raw_store(T* ptr, T desired, MemOrderType) \ { \ EASTL_ATOMIC_RAW_SIZE_DEPENDANT_OPERATION_IMPL(STORE_, MemOrderMacro) \ } #define EASTL_ATOMIC_RAW_EXCHANGE_TMPL(MemOrderType, MemOrderMacro) \ template \ T atomic_raw_exchange(T* ptr, T desired, MemOrderType) \ { \ EASTL_ATOMIC_RAW_SIZE_DEPENDANT_OPERATION_IMPL(EXCHANGE_, MemOrderMacro) \ } #define EASTL_ATOMIC_RAW_CMPXCHG1_TMPL(MemOrderType, MemOrderMacro) \ template \ bool atomic_raw_compare_exchange_weak(T* ptr, T& expected, T desired, MemOrderType) \ { \ EASTL_ATOMIC_RAW_SIZE_DEPENDANT_OPERATION_WITH_ALIAS_IMPL(CMPXCHG_, CMPXCHG_WEAK_, MemOrderMacro) \ } \ template \ bool atomic_raw_compare_exchange_strong(T* ptr, T& expected, T desired, MemOrderType) \ { \ EASTL_ATOMIC_RAW_SIZE_DEPENDANT_OPERATION_WITH_ALIAS_IMPL(CMPXCHG_, CMPXCHG_STRONG_, MemOrderMacro) \ } #define EASTL_ATOMIC_RAW_CMPXCHG2_TMPL(MemOrderType1, MemOrderType2, MemOrderMacro) \ template \ bool atomic_raw_compare_exchange_weak(T* ptr, T& expected, T desired, MemOrderType1, MemOrderType2) \ { \ EASTL_ATOMIC_RAW_SIZE_DEPENDANT_OPERATION_WITH_ALIAS_IMPL(CMPXCHG_, CMPXCHG_WEAK_, MemOrderMacro) \ } \ template \ bool atomic_raw_compare_exchange_strong(T* ptr, T& expected, T desired, MemOrderType1, MemOrderType2) \ { \ EASTL_ATOMIC_RAW_SIZE_DEPENDANT_OPERATION_WITH_ALIAS_IMPL(CMPXCHG_, CMPXCHG_STRONG_, MemOrderMacro) \ } #define EASTL_ATOMIC_RAW_INTEGRAL_FETCH_ORDER_TMPL(FuncName, MacroFnName, MemOrderType, MemOrderMacro) \ template \ T EA_PREPROCESSOR_JOIN(atomic_raw_, FuncName)(T * p, T arg, MemOrderType) \ { \ static_assert(eastl::is_integral_v && !eastl::is_same_v, \ "This API is only enabled for integral non boolean types."); \ if constexpr (sizeof(T) == 1) \ { \ EASTL_ATOMIC_INTEGRAL_FUNC_IMPL(EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(MacroFnName, MemOrderMacro), 8, p, T); \ } \ else if constexpr (sizeof(T) == 2) \ { \ EASTL_ATOMIC_INTEGRAL_FUNC_IMPL(EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(MacroFnName, MemOrderMacro), 16, p, \ T); \ } \ else if constexpr (sizeof(T) == 4) \ { \ EASTL_ATOMIC_INTEGRAL_FUNC_IMPL(EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(MacroFnName, MemOrderMacro), 32, p, \ T); \ } \ else if constexpr (sizeof(T) == 8) \ { \ EASTL_ATOMIC_INTEGRAL_FUNC_IMPL(EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(MacroFnName, MemOrderMacro), 64, p, \ T); \ } \ } #define EASTL_ATOMIC_RAW_INTEGRAL_FETCH_FUNC_TMPL(FuncName, MacroFnName) \ EASTL_ATOMIC_RAW_INTEGRAL_FETCH_ORDER_TMPL(FuncName, MacroFnName, internal::memory_order_relaxed_s, RELAXED_) \ EASTL_ATOMIC_RAW_INTEGRAL_FETCH_ORDER_TMPL(FuncName, MacroFnName, internal::memory_order_acquire_s, ACQUIRE_) \ EASTL_ATOMIC_RAW_INTEGRAL_FETCH_ORDER_TMPL(FuncName, MacroFnName, internal::memory_order_release_s, RELEASE_) \ EASTL_ATOMIC_RAW_INTEGRAL_FETCH_ORDER_TMPL(FuncName, MacroFnName, internal::memory_order_acq_rel_s, ACQ_REL_) \ EASTL_ATOMIC_RAW_INTEGRAL_FETCH_ORDER_TMPL(FuncName, MacroFnName, internal::memory_order_seq_cst_s, SEQ_CST_) // Ptr version of the fetch functions. #define EASTL_ATOMIC_RAW_POINTER_FETCH_FUNC_IMPL(FetchOp, BitCount, ptr) \ using ptr_integral_type = EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(int, BitCount), _t); \ EASTL_ATOMIC_STATIC_ASSERT_TYPE_IS_OBJECT(T); \ EASTL_ATOMIC_POINTER_FUNC_IMPL(FetchOp, BitCount, ptr); #define EASTL_ATOMIC_RAW_PTR_FETCH_ORDER_TMPL(FuncName, MacroFnName, MemOrderType, MemOrderMacro) \ template \ T* EA_PREPROCESSOR_JOIN(atomic_raw_, FuncName)(T * *p, ptrdiff_t arg, MemOrderType) \ { \ if constexpr (sizeof(T*) == 4) \ { \ EASTL_ATOMIC_RAW_POINTER_FETCH_FUNC_IMPL(EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(MacroFnName, MemOrderMacro), \ 32, p) \ } \ else if constexpr (sizeof(T*) == 8) \ { \ EASTL_ATOMIC_RAW_POINTER_FETCH_FUNC_IMPL(EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(MacroFnName, MemOrderMacro), \ 64, p) \ } \ } #define EASTL_ATOMIC_RAW_PTR_FETCH_FUNC_TMPL(FuncName, MacroFnName) \ EASTL_ATOMIC_RAW_PTR_FETCH_ORDER_TMPL(FuncName, MacroFnName, internal::memory_order_relaxed_s, RELAXED_) \ EASTL_ATOMIC_RAW_PTR_FETCH_ORDER_TMPL(FuncName, MacroFnName, internal::memory_order_acquire_s, ACQUIRE_) \ EASTL_ATOMIC_RAW_PTR_FETCH_ORDER_TMPL(FuncName, MacroFnName, internal::memory_order_release_s, RELEASE_) \ EASTL_ATOMIC_RAW_PTR_FETCH_ORDER_TMPL(FuncName, MacroFnName, internal::memory_order_acq_rel_s, ACQ_REL_) \ EASTL_ATOMIC_RAW_PTR_FETCH_ORDER_TMPL(FuncName, MacroFnName, internal::memory_order_seq_cst_s, SEQ_CST_) namespace eastl { EASTL_ATOMIC_RAW_LOAD_TMPL(internal::memory_order_relaxed_s, RELAXED_) EASTL_ATOMIC_RAW_LOAD_TMPL(internal::memory_order_acquire_s, ACQUIRE_) EASTL_ATOMIC_RAW_LOAD_TMPL(internal::memory_order_seq_cst_s, SEQ_CST_) // Special memory order for pointers EASTL_ATOMIC_RAW_POINTER_READ_DEPENDS_LOAD_TMPL() EASTL_ATOMIC_RAW_STORE_TMPL(internal::memory_order_relaxed_s, RELAXED_) EASTL_ATOMIC_RAW_STORE_TMPL(internal::memory_order_release_s, RELEASE_) EASTL_ATOMIC_RAW_STORE_TMPL(internal::memory_order_seq_cst_s, SEQ_CST_) EASTL_ATOMIC_RAW_EXCHANGE_TMPL(internal::memory_order_relaxed_s, RELAXED_) EASTL_ATOMIC_RAW_EXCHANGE_TMPL(internal::memory_order_acquire_s, ACQUIRE_) EASTL_ATOMIC_RAW_EXCHANGE_TMPL(internal::memory_order_release_s, RELEASE_) EASTL_ATOMIC_RAW_EXCHANGE_TMPL(internal::memory_order_acq_rel_s, ACQ_REL_) EASTL_ATOMIC_RAW_EXCHANGE_TMPL(internal::memory_order_seq_cst_s, SEQ_CST_) EASTL_ATOMIC_RAW_CMPXCHG1_TMPL(internal::memory_order_relaxed_s, RELAXED_) EASTL_ATOMIC_RAW_CMPXCHG1_TMPL(internal::memory_order_acquire_s, ACQUIRE_) EASTL_ATOMIC_RAW_CMPXCHG1_TMPL(internal::memory_order_release_s, RELEASE_) EASTL_ATOMIC_RAW_CMPXCHG1_TMPL(internal::memory_order_acq_rel_s, ACQ_REL_) EASTL_ATOMIC_RAW_CMPXCHG1_TMPL(internal::memory_order_seq_cst_s, SEQ_CST_) EASTL_ATOMIC_RAW_CMPXCHG2_TMPL(internal::memory_order_relaxed_s, internal::memory_order_relaxed_s, RELAXED_RELAXED_) EASTL_ATOMIC_RAW_CMPXCHG2_TMPL(internal::memory_order_acquire_s, internal::memory_order_relaxed_s, ACQUIRE_RELAXED_) EASTL_ATOMIC_RAW_CMPXCHG2_TMPL(internal::memory_order_acquire_s, internal::memory_order_acquire_s, ACQUIRE_ACQUIRE_) EASTL_ATOMIC_RAW_CMPXCHG2_TMPL(internal::memory_order_release_s, internal::memory_order_relaxed_s, RELEASE_RELAXED_) EASTL_ATOMIC_RAW_CMPXCHG2_TMPL(internal::memory_order_acq_rel_s, internal::memory_order_relaxed_s, ACQ_REL_RELAXED_) EASTL_ATOMIC_RAW_CMPXCHG2_TMPL(internal::memory_order_acq_rel_s, internal::memory_order_acquire_s, ACQ_REL_ACQUIRE_) EASTL_ATOMIC_RAW_CMPXCHG2_TMPL(internal::memory_order_seq_cst_s, internal::memory_order_relaxed_s, SEQ_CST_RELAXED_) EASTL_ATOMIC_RAW_CMPXCHG2_TMPL(internal::memory_order_seq_cst_s, internal::memory_order_acquire_s, SEQ_CST_ACQUIRE_) EASTL_ATOMIC_RAW_CMPXCHG2_TMPL(internal::memory_order_seq_cst_s, internal::memory_order_seq_cst_s, SEQ_CST_SEQ_CST_) EASTL_ATOMIC_RAW_INTEGRAL_FETCH_FUNC_TMPL(fetch_add, FETCH_ADD_) EASTL_ATOMIC_RAW_INTEGRAL_FETCH_FUNC_TMPL(add_fetch, ADD_FETCH_) EASTL_ATOMIC_RAW_INTEGRAL_FETCH_FUNC_TMPL(fetch_sub, FETCH_SUB_) EASTL_ATOMIC_RAW_INTEGRAL_FETCH_FUNC_TMPL(sub_fetch, SUB_FETCH_) EASTL_ATOMIC_RAW_INTEGRAL_FETCH_FUNC_TMPL(fetch_and, FETCH_AND_) EASTL_ATOMIC_RAW_INTEGRAL_FETCH_FUNC_TMPL(and_fetch, AND_FETCH_) EASTL_ATOMIC_RAW_INTEGRAL_FETCH_FUNC_TMPL(fetch_or, FETCH_OR_) EASTL_ATOMIC_RAW_INTEGRAL_FETCH_FUNC_TMPL(or_fetch, OR_FETCH_) EASTL_ATOMIC_RAW_INTEGRAL_FETCH_FUNC_TMPL(fetch_xor, FETCH_XOR_) EASTL_ATOMIC_RAW_INTEGRAL_FETCH_FUNC_TMPL(xor_fetch, XOR_FETCH_) EASTL_ATOMIC_RAW_PTR_FETCH_FUNC_TMPL(fetch_add, FETCH_ADD_) EASTL_ATOMIC_RAW_PTR_FETCH_FUNC_TMPL(add_fetch, ADD_FETCH_) EASTL_ATOMIC_RAW_PTR_FETCH_FUNC_TMPL(fetch_sub, FETCH_SUB_) EASTL_ATOMIC_RAW_PTR_FETCH_FUNC_TMPL(sub_fetch, SUB_FETCH_) } // namespace eastl ================================================ FILE: include/EASTL/bit.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #ifndef EASTL_BIT_H #define EASTL_BIT_H #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once #endif #include #include #include #include // memcpy #if defined(EA_COMPILER_MSVC) && !defined(__clang__) #include #endif namespace eastl { // eastl::bit_cast // Obtains a value of type To by reinterpreting the object representation of 'from'. // Every bit in the value representation of the returned To object is equal to the // corresponding bit in the object representation of 'from'. // // In order for bit_cast to be constexpr, the compiler needs to explicitly support // it by providing the __builtin_bit_cast builtin. If that builtin is not available, // then we memcpy into aligned storage at runtime and return that instead. // // Both types To and From must be equal in size, and must be trivially copyable. #if defined(EASTL_CONSTEXPR_BIT_CAST_SUPPORTED) && EASTL_CONSTEXPR_BIT_CAST_SUPPORTED template::value && eastl::is_trivially_copyable::value > > constexpr To bit_cast(const From& from) noexcept { return __builtin_bit_cast(To, from); } #else template::value && eastl::is_trivially_copyable::value > > inline To bit_cast(const From& from) noexcept { typename eastl::aligned_storage::type to; ::memcpy(eastl::addressof(to), eastl::addressof(from), sizeof(To)); return reinterpret_cast(to); } #endif // EASTL_CONSTEXPR_BIT_CAST_SUPPORTED namespace internal { constexpr int countl_zero64(uint64_t x) noexcept { if (x) { int n = 0; if (x & UINT64_C(0xFFFFFFFF00000000)) { n += 32; x >>= 32; } if (x & 0xFFFF0000) { n += 16; x >>= 16; } if (x & 0xFFFFFF00) { n += 8; x >>= 8; } if (x & 0xFFFFFFF0) { n += 4; x >>= 4; } if (x & 0xFFFFFFFC) { n += 2; x >>= 2; } if (x & 0xFFFFFFFE) { n += 1; } return 63 - n; } return 64; } // Count leading zeroes in an integer. // // todo: consolidate with EA::StdC::CountLeading0Bits() and bitset's GetLastBit() + 1. // this implementation has these improvements: // - has overloads for all unsigned integral types. CountLeading0Bits() may not compile with ambiguous overloads because it doesn't match all unsigned integral types. // - supports 128 bit types. // - is noexcept. // - is constexpr, if available. #if defined(EA_COMPILER_MSVC) && !defined(__clang__) // MSVC overloads are not constexpr because _BitScanReverse is not constexpr. inline int countl_zero(unsigned char x) noexcept { unsigned long index; return _BitScanReverse(&index, static_cast(x)) ? (sizeof(unsigned char) * CHAR_BIT - 1 - index) : (sizeof(unsigned char) * CHAR_BIT); } inline int countl_zero(unsigned short x) noexcept { unsigned long index; return _BitScanReverse(&index, static_cast(x)) ? (sizeof(unsigned short) * CHAR_BIT - 1 - index) : (sizeof(unsigned short) * CHAR_BIT); } inline int countl_zero(unsigned int x) noexcept { unsigned long index; return _BitScanReverse(&index, static_cast(x)) ? (sizeof(unsigned int) * CHAR_BIT - 1 - index) : (sizeof(unsigned int) * CHAR_BIT); } inline int countl_zero(unsigned long x) noexcept { unsigned long index; return _BitScanReverse(&index, x) ? (sizeof(unsigned long) * CHAR_BIT - 1 - index) : (sizeof(unsigned long) * CHAR_BIT); } #if (EA_PLATFORM_PTR_SIZE == 8) inline int countl_zero(unsigned long long x) noexcept { unsigned long index; return _BitScanReverse64(&index, x) ? (sizeof(unsigned long long) * CHAR_BIT - 1 - index) : (sizeof(unsigned long long) * CHAR_BIT); } #else inline int countl_zero(unsigned long long x) noexcept { return countl_zero64(static_cast(x)); } #endif #elif defined(__GNUC__) || defined(__clang__) // __builtin_clz constexpr inline int countl_zero(unsigned char x) noexcept { constexpr auto diff = eastl::numeric_limits::digits - eastl::numeric_limits::digits; return x ? (__builtin_clz(static_cast(x)) - diff) : (sizeof(unsigned char) * CHAR_BIT); } constexpr inline int countl_zero(unsigned short x) noexcept { constexpr auto diff = eastl::numeric_limits::digits - eastl::numeric_limits::digits; return x ? (__builtin_clz(static_cast(x)) - diff) : (sizeof(unsigned short) * CHAR_BIT); } constexpr inline int countl_zero(unsigned int x) noexcept { return x ? __builtin_clz(x) : (sizeof(unsigned int) * CHAR_BIT); } // __builtin_clzl constexpr inline int countl_zero(unsigned long x) noexcept { return x ? __builtin_clzl(x) : (sizeof(unsigned long) * CHAR_BIT); } // __builtin_clzll #if (EA_PLATFORM_PTR_SIZE == 8) constexpr inline int countl_zero(unsigned long long x) noexcept { return x ? __builtin_clzll(x) : (sizeof(unsigned long long) * CHAR_BIT); } #else constexpr inline int countl_zero(unsigned long long x) noexcept { return countl_zero64(static_cast(x)); } #endif #if EASTL_INT128_SUPPORTED // todo: once we are using Clang 19.1.0 and GCC ??? use __builtin_clzg(x) constexpr inline int countl_zero(eastl_uint128_t x) noexcept { const int first64bits = countl_zero(static_cast(x >> 64)); return first64bits == 64 ? (64 + countl_zero(static_cast(x))) : first64bits; } #endif #else // not MSVC, clang or GCC template && sizeof(T) < 4, bool > = true> constexpr int countl_zero(const T num) noexcept { constexpr auto diff = 32 - eastl::numeric_limits::digits; return countl_zero(static_cast(num)) - diff; } template && sizeof(T) == 4, bool> = true> constexpr int countl_zero(T x) noexcept { if (x) { int n = 0; if (x <= 0x0000FFFF) { n += 16; x <<= 16; } if (x <= 0x00FFFFFF) { n += 8; x <<= 8; } if (x <= 0x0FFFFFFF) { n += 4; x <<= 4; } if (x <= 0x3FFFFFFF) { n += 2; x <<= 2; } if (x <= 0x7FFFFFFF) { n += 1; } return n; } return 32; } template && sizeof(T) == 8, bool> = true> constexpr int countl_zero(T x) noexcept { return countl_zero64(static_cast(x)); } #if EASTL_INT128_SUPPORTED constexpr inline int countl_zero(eastl_uint128_t x) noexcept { if (x) { int n = 0; if (x & (~eastl_uint128_t(0) << 64)) { n += 64; x >>= 64; } if (x & UINT64_C(0xFFFFFFFF00000000)) { n += 32; x >>= 32; } if (x & 0xFFFF0000) { n += 16; x >>= 16; } if (x & 0xFFFFFF00) { n += 8; x >>= 8; } if (x & 0xFFFFFFF0) { n += 4; x >>= 4; } if (x & 0xFFFFFFFC) { n += 2; x >>= 2; } if (x & 0xFFFFFFFE) { n += 1; } return 127 - n; } return 128; } #endif #endif } // namespace internal template >> constexpr int countl_zero(T x) noexcept { return internal::countl_zero(x); } template >> constexpr bool has_single_bit(const T num) noexcept { return num != 0 && (num & (num - 1)) == 0; } template >> constexpr T bit_ceil(const T num) noexcept { if (num <= 1U) { return T(1); } const auto shift = eastl::numeric_limits::digits - eastl::countl_zero(static_cast(num - 1)); return static_cast(T(1) << shift); } template >> constexpr T bit_floor(const T num) noexcept { if (num == 0) { return T(0); } const auto shift = eastl::numeric_limits::digits - eastl::countl_zero(num) - 1; return static_cast(T(1) << shift); } template >> constexpr int bit_width(const T num) noexcept { return eastl::numeric_limits::digits - eastl::countl_zero(num); } namespace internal { const static char kBitsPerUint16[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; template constexpr int popcount_non_intrinsic(T num) noexcept { int n = 0; for (T w = num; w; w >>= 4) n += kBitsPerUint16[w & 0xF]; return n; } #if defined(EA_COMPILER_MSVC) && !defined(__clang__) // __popcnt is not constexpr // __popcnt16 inline int popcount(unsigned char num) noexcept { return __popcnt16(static_cast(num)); } inline int popcount(unsigned short num) noexcept { return __popcnt16(num); } // __popcnt inline int popcount(unsigned int num) noexcept { return __popcnt(num); } #if defined(EA_PROCESSOR_X86_64) // __popcnt64 inline int popcount(unsigned long num) noexcept { return static_cast(__popcnt64(num)); } inline int popcount(unsigned long long num) noexcept { return static_cast(__popcnt64(num)); } #else // todo: is it better to use __popcnt() or the fallback implementation? inline int popcount(unsigned long num) noexcept { return popcount_non_intrinsic(num); } inline int popcount(unsigned long long num) noexcept { return popcount_non_intrinsic(num); } #endif #elif defined(__GNUC__) || defined(__clang__) // __builtin_popcount constexpr inline int popcount(unsigned char num) noexcept { return __builtin_popcount(num); } constexpr inline int popcount(unsigned short num) noexcept { return __builtin_popcount(num); } constexpr inline int popcount(unsigned int num) noexcept { return __builtin_popcount(num); } // __builtin_popcountl constexpr inline int popcount(unsigned long num) noexcept { return __builtin_popcountl(num); } // __builtin_popcountll constexpr inline int popcount(unsigned long long num) noexcept { return __builtin_popcountll(num); } #endif #if EASTL_INT128_SUPPORTED // todo: once we are using Clang 19.1.0 and GCC ??? use __builtin_popcountg(num) constexpr inline int popcount(eastl_uint128_t num) noexcept { return popcount(static_cast(num >> 64)) + popcount(static_cast(num)); } #endif } // namespace internal template >> constexpr int popcount(T x) noexcept { return internal::popcount(x); } } // namespace eastl #endif // EASTL_BIT_H ================================================ FILE: include/EASTL/bitset.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // This file implements a bitset much like the C++ std::bitset class. // The primary distinctions between this bitset and std::bitset are: // - bitset is more efficient than some other std::bitset implementations, // notably the bitset that comes with Microsoft and other 1st party platforms. // - bitset is savvy to an environment that doesn't have exception handling, // as is sometimes the case with console or embedded environments. // - bitset is savvy to environments in which 'unsigned long' is not the // most efficient integral data type. std::bitset implementations use // unsigned long, even if it is an inefficient integer type. // - bitset removes as much function calls as practical, in order to allow // debug builds to run closer in speed and code footprint to release builds. // - bitset doesn't support string functionality. We can add this if // it is deemed useful. // /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_BITSET_H #define EASTL_BITSET_H #include #include #include EA_DISABLE_ALL_VC_WARNINGS(); #include #include EA_RESTORE_ALL_VC_WARNINGS(); #if EASTL_EXCEPTIONS_ENABLED EA_DISABLE_ALL_VC_WARNINGS(); #include // std::out_of_range, std::length_error. EA_RESTORE_ALL_VC_WARNINGS(); #endif EA_DISABLE_VC_WARNING(4127); // Conditional expression is constant #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif namespace eastl { // To consider: Enable this for backwards compatibility with any user code that might be using BitsetWordType: // #define BitsetWordType EASTL_BITSET_WORD_TYPE_DEFAULT /// BITSET_WORD_COUNT /// /// Defines the number of words we use, based on the number of bits. /// nBitCount refers to the number of bits in a bitset. /// WordType refers to the type of integer word which stores bitet data. By default it is BitsetWordType. /// /// Note: for nBitCount == 0, returns 1! #if !defined(__GNUC__) || (__GNUC__ >= 3) // GCC 2.x can't handle the simpler declaration below. #define BITSET_WORD_COUNT(nBitCount, WordType) (nBitCount == 0 ? 1 : ((nBitCount - 1) / (8 * sizeof(WordType)) + 1)) #else #define BITSET_WORD_COUNT(nBitCount, WordType) ((nBitCount - 1) / (8 * sizeof(WordType)) + 1) #endif /// EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING /// Before GCC 4.7 the '-Warray-bounds' buggy and was very likely to issue false positives for loops that are /// difficult to evaluate. /// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=45978 /// #if defined(__GNUC__) && (EA_COMPILER_VERSION > 4007) && defined(EA_PLATFORM_ANDROID) // Earlier than GCC 4.7 #define EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING 1 #else #define EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING 0 #endif template class bitset; namespace detail { template struct is_word_type : bool_constant && !is_volatile_v && !is_same_v && is_integral_v && is_unsigned_v> {}; template constexpr bool is_word_type_v = is_word_type::value; // slices the min(N, UInt) lowest significant bits from value. template eastl::enable_if_t> from_unsigned_integral(bitset& bs, UInt value) { constexpr size_t numWords = (N > 0) ? ((N - 1) / (CHAR_BIT * sizeof(WordType)) + 1) : 0; // BITSET_WORD_COUNT(N, WordType) but 0 for N == 0 WordType* data = bs.data(); EA_CONSTEXPR_IF (numWords > 0) { // copy everything from value into our word array: constexpr size_t bytes_to_copy = eastl::min_alt(numWords * sizeof(WordType), sizeof(UInt)); memcpy(data, &value, bytes_to_copy); // zero any remaining elements in our array: memset(reinterpret_cast(data) + bytes_to_copy, 0, numWords * sizeof(WordType) - bytes_to_copy); // we may have copied bits into the final element that are unusable (ie. bit positions > N). // zero these bits out, as this is an invariant for our implementation. EA_CONSTEXPR_IF (N % (CHAR_BIT * sizeof(WordType)) != 0) { constexpr WordType lastElemUsedBitsMask = (WordType(1) << (N % (CHAR_BIT * sizeof(WordType)))) - 1; data[numWords - 1] &= lastElemUsedBitsMask; } } else { data[0] = 0; // our bitset implementation has a single element even when N == 0. } } // This is here to work around the lack of `if constexpr` in C++14, so that calling // (WordType(1) << (CHAR_BIT * sizeof(UInt))) doesn't trigger warnings/errors when // sizeof(UInt) >= sizeof(WordType) template struct to_unsigned_integral_helper {}; template struct to_unsigned_integral_helper { static size_t copyWords(const WordType* data, UInt* result) { constexpr size_t bytes_to_copy = sizeof(UInt); memcpy(result, data, bytes_to_copy); // check remaining uncopied bits from the first word are zero: constexpr WordType lastElemOverflowBitsMask = static_cast(~((WordType(1) << (CHAR_BIT * sizeof(UInt))) - 1)); if ((data[0] & lastElemOverflowBitsMask) != 0) { #if EASTL_EXCEPTIONS_ENABLED throw std::overflow_error("target type cannot represent the full bitset."); #elif EASTL_ASSERT_ENABLED EA_CONSTEXPR_IF(bAssertOnOverflow) EASTL_FAIL_MSG("overflow_error"); #endif } return 1; } }; template struct to_unsigned_integral_helper { static size_t copyWords(const WordType* data, UInt* result) { constexpr size_t bytes_to_copy = eastl::min_alt(NumWords * sizeof(WordType), sizeof(UInt)); memcpy(result, data, bytes_to_copy); return bytes_to_copy / sizeof(WordType); } }; template eastl::enable_if_t, UInt> to_unsigned_integral(const bitset& bs) { constexpr size_t kNumWords = (N > 0) ? ((N - 1) / (CHAR_BIT * sizeof(WordType)) + 1) : 0; // BITSET_WORD_COUNT(N, WordType) but 0 for N == 0 EA_CONSTEXPR_IF (kNumWords > 0) { const WordType* data = bs.data(); UInt result = 0; const size_t numWordsCopied = to_unsigned_integral_helper::copyWords(data, &result); // check any remaining uncopied words are zero (don't contain any useful information). for (size_t wordIndex = numWordsCopied; wordIndex < kNumWords; ++wordIndex) { if (data[wordIndex] != 0) { #if EASTL_EXCEPTIONS_ENABLED throw std::overflow_error("target type cannot represent the full bitset."); #elif EASTL_ASSERT_ENABLED EA_CONSTEXPR_IF (bAssertOnOverflow) EASTL_FAIL_MSG("overflow_error"); #endif } } return result; } else { return 0; } } } // namespace detail /// BitsetBase /// /// This is a default implementation that works for any number of words. /// template // Templated on the number of words used to hold the bitset and the word type. struct BitsetBase { typedef WordType word_type; typedef BitsetBase this_type; #if EASTL_BITSET_SIZE_T typedef size_t size_type; #else typedef eastl_size_t size_type; #endif enum { kBitsPerWord = (8 * sizeof(word_type)), kBitsPerWordMask = (kBitsPerWord - 1), kBitsPerWordShift = ((kBitsPerWord == 8) ? 3 : ((kBitsPerWord == 16) ? 4 : ((kBitsPerWord == 32) ? 5 : (((kBitsPerWord == 64) ? 6 : 7))))) }; public: // invariant: we keep any high bits in the last word that are unneeded set to 0 // so that our to_ulong() conversion can simply copy the words into the target type. word_type mWord[NW]; public: void operator&=(const this_type& x); void operator|=(const this_type& x); void operator^=(const this_type& x); void operator<<=(size_type n); void operator>>=(size_type n); void flip(); void set(); void set(size_type i, bool value); void reset(); bool operator==(const this_type& x) const; bool any() const; size_type count() const; word_type& DoGetWord(size_type i); word_type DoGetWord(size_type i) const; size_type DoFindFirst() const; size_type DoFindNext(size_type last_find) const; size_type DoFindLast() const; // Returns NW * kBitsPerWord (the bit count) if no bits are set. size_type DoFindPrev(size_type last_find) const; // Returns NW * kBitsPerWord (the bit count) if no bits are set. }; // class BitsetBase /// BitsetBase<1, WordType> /// /// This is a specialization for a bitset that fits within one word. /// template struct BitsetBase<1, WordType> { typedef WordType word_type; typedef BitsetBase<1, WordType> this_type; #if EASTL_BITSET_SIZE_T typedef size_t size_type; #else typedef eastl_size_t size_type; #endif enum { kBitsPerWord = (8 * sizeof(word_type)), kBitsPerWordMask = (kBitsPerWord - 1), kBitsPerWordShift = ((kBitsPerWord == 8) ? 3 : ((kBitsPerWord == 16) ? 4 : ((kBitsPerWord == 32) ? 5 : (((kBitsPerWord == 64) ? 6 : 7))))) }; public: word_type mWord[1]; // Defined as an array of 1 so that bitset can treat this BitsetBase like others. public: void operator&=(const this_type& x); void operator|=(const this_type& x); void operator^=(const this_type& x); void operator<<=(size_type n); void operator>>=(size_type n); void flip(); void set(); void set(size_type i, bool value); void reset(); bool operator==(const this_type& x) const; bool any() const; size_type count() const; word_type& DoGetWord(size_type); word_type DoGetWord(size_type) const; size_type DoFindFirst() const; size_type DoFindNext(size_type last_find) const; size_type DoFindLast() const; // Returns 1 * kBitsPerWord (the bit count) if no bits are set. size_type DoFindPrev(size_type last_find) const; // Returns 1 * kBitsPerWord (the bit count) if no bits are set. }; // BitsetBase<1, WordType> /// BitsetBase<2, WordType> /// /// This is a specialization for a bitset that fits within two words. /// The difference here is that we avoid branching (ifs and loops). /// template struct BitsetBase<2, WordType> { typedef WordType word_type; typedef BitsetBase<2, WordType> this_type; #if EASTL_BITSET_SIZE_T typedef size_t size_type; #else typedef eastl_size_t size_type; #endif enum { kBitsPerWord = (8 * sizeof(word_type)), kBitsPerWordMask = (kBitsPerWord - 1), kBitsPerWordShift = ((kBitsPerWord == 8) ? 3 : ((kBitsPerWord == 16) ? 4 : ((kBitsPerWord == 32) ? 5 : (((kBitsPerWord == 64) ? 6 : 7))))) }; public: word_type mWord[2]; public: void operator&=(const this_type& x); void operator|=(const this_type& x); void operator^=(const this_type& x); void operator<<=(size_type n); void operator>>=(size_type n); void flip(); void set(); void set(size_type i, bool value); void reset(); bool operator==(const this_type& x) const; bool any() const; size_type count() const; word_type& DoGetWord(size_type); word_type DoGetWord(size_type) const; size_type DoFindFirst() const; size_type DoFindNext(size_type last_find) const; size_type DoFindLast() const; // Returns 2 * kBitsPerWord (the bit count) if no bits are set. size_type DoFindPrev(size_type last_find) const; // Returns 2 * kBitsPerWord (the bit count) if no bits are set. }; // BitsetBase<2, WordType> /// bitset /// /// Implements a bitset much like the C++ std::bitset. /// /// As of this writing we don't implement a specialization of bitset<0>, /// as it is deemed an academic exercise that nobody would actually /// use and it would increase code space and provide little practical /// benefit. Note that this doesn't mean bitset<0> isn't supported; /// it means that our version of it isn't as efficient as it would be /// if a specialization was made for it. /// /// - N can be any unsigned (non-zero) value, though memory usage is /// linear with respect to N, so large values of N use large amounts of memory. /// - WordType must be a non-cv qualified unsigned integral other than bool. /// By default the WordType is the largest native register type that the /// target platform supports. /// template class bitset : private BitsetBase { public: static_assert(detail::is_word_type_v, "Word type must be a non-cv qualified, unsigned integral other than bool."); typedef BitsetBase base_type; typedef bitset this_type; typedef WordType word_type; typedef typename base_type::size_type size_type; enum { kBitsPerWord = (8 * sizeof(word_type)), kBitsPerWordMask = (kBitsPerWord - 1), kBitsPerWordShift = ((kBitsPerWord == 8) ? 3 : ((kBitsPerWord == 16) ? 4 : ((kBitsPerWord == 32) ? 5 : (((kBitsPerWord == 64) ? 6 : 7))))), kSize = N, // The number of bits the bitset holds kWordSize = sizeof(word_type), // The size of individual words the bitset uses to hold the bits. kWordCount = BITSET_WORD_COUNT(N, WordType) // The number of words the bitset uses to hold the bits. sizeof(bitset) == kWordSize * kWordCount. }; // internal implementation details. do not use. using base_type::mWord; using base_type::DoGetWord; using base_type::DoFindFirst; using base_type::DoFindNext; using base_type::DoFindLast; using base_type::DoFindPrev; using base_type::count; using base_type::any; public: /// reference /// /// A reference is a reference to a specific bit in the bitset. /// The C++ standard specifies that this be a nested class, /// though it is not clear if a non-nested reference implementation /// would be non-conforming. /// class reference { protected: friend class bitset; word_type* mpBitWord; size_type mnBitIndex; reference(){} // The C++ standard specifies that this is private. public: reference(const bitset& x, size_type i); reference& operator=(bool value); reference& operator=(const reference& x); bool operator~() const; operator bool() const // Defined inline because CodeWarrior fails to be able to compile it outside. { return (*mpBitWord & (static_cast(1) << (mnBitIndex & kBitsPerWordMask))) != 0; } reference& flip(); }; public: friend class reference; bitset(); #if EA_IS_ENABLED(EASTL_DEPRECATIONS_FOR_2024_SEPT) // note: this constructor will only copy the minimum of N or unsigned long long's size least significant bits. bitset(unsigned long long value); #else bitset(uint32_t value); #endif // We don't define copy constructor and operator= because // the compiler-generated versions will suffice. this_type& operator&=(const this_type& x); this_type& operator|=(const this_type& x); this_type& operator^=(const this_type& x); this_type& operator<<=(size_type n); this_type& operator>>=(size_type n); this_type& set(); this_type& set(size_type i, bool value = true); this_type& reset(); this_type& reset(size_type i); this_type& flip(); this_type& flip(size_type i); this_type operator~() const; reference operator[](size_type i); bool operator[](size_type i) const; const word_type* data() const; word_type* data(); // Deprecated: use the bitset(unsigned long long) constructor instead. // this was a workaround for when our constructor was defined as bitset(uint32_t) and could cause a narrowing conversion. EASTL_REMOVE_AT_2024_SEPT void from_uint32(uint32_t value); EASTL_REMOVE_AT_2024_SEPT void from_uint64(uint64_t value); /// to_xxx() /// /// Not recommended: Use one of /// as_xxx() which is a compile time error if the target type cannot represent the entire bitset, or /// to_xxx_assert_convertible() which is the standard conformant version of this function, or /// to_xxx_no_assert_convertible() which has the same behaviour, explicit naming /// /// Different from the standard: /// Does *NOT* assert that the bitset can be represented as the target integer type (has bits set outside the target type). /// However, if exceptions are enabled, it does throw an exception if the bitset cannot be represented as the target integer type. unsigned long to_ulong() const; uint32_t to_uint32() const; uint64_t to_uint64() const; /// to_xxx_assert_convertible() /// /// Equivalent to the standard library's to_ulong() / to_ullong(). /// Asserts / throws an exception if the bitset cannot be represented as the target integer type. uint32_t to_uint32_assert_convertible() const { return detail::to_unsigned_integral(*this); } uint64_t to_uint64_assert_convertible() const { return detail::to_unsigned_integral(*this); } unsigned long to_ulong_assert_convertible() const { return detail::to_unsigned_integral(*this); } unsigned long long to_ullong_assert_convertible() const { return detail::to_unsigned_integral(*this); } /// to_xxx_no_assert_convertible() /// /// Prefer to_xxx_assert_convertible() instead of these functions. /// /// Different from the standard: /// Does *NOT* assert that the bitset can be represented as the target integer type (has bits set outside the target type). /// However, if exceptions are enabled, it does throw an exception if the bitset cannot be represented as the target integer type. uint32_t to_uint32_no_assert_convertible() const { return detail::to_unsigned_integral(*this); } uint64_t to_uint64_no_assert_convertible() const { return detail::to_unsigned_integral(*this); } unsigned long to_ulong_no_assert_convertible() const { return detail::to_unsigned_integral(*this); } unsigned long long to_ullong_no_assert_convertible() const { return detail::to_unsigned_integral(*this); } /// as_uint() / as_xxx() /// /// Extension to the standard: Cast to a unsigned integral that can represent the entire bitset. /// If the target type cannot represent the entire bitset, then issue a compile error (overload does not exist). /// Never throws / asserts. template eastl::enable_if_t && N <= (CHAR_BIT * sizeof(UInt)), UInt> as_uint() const noexcept { return detail::to_unsigned_integral(*this); } template eastl::enable_if_t as_uint32() const noexcept { return to_uint32_assert_convertible(); } template eastl::enable_if_t as_uint64() const noexcept { return to_uint64_assert_convertible(); } template eastl::enable_if_t as_ulong() const noexcept { return to_ulong_assert_convertible(); } template eastl::enable_if_t as_ullong() const noexcept { return to_ullong_assert_convertible(); } //size_type count() const; // We inherit this from the base class. size_type size() const; bool operator==(const this_type& x) const; #if !defined(EA_COMPILER_HAS_THREE_WAY_COMPARISON) bool operator!=(const this_type& x) const; #endif bool test(size_type i) const; //bool any() const; // We inherit this from the base class. bool all() const; bool none() const; this_type operator<<(size_type n) const; this_type operator>>(size_type n) const; // Finds the index of the first "on" bit, returns kSize if none are set. size_type find_first() const; // Finds the index of the next "on" bit after last_find, returns kSize if none are set. size_type find_next(size_type last_find) const; // Finds the index of the last "on" bit, returns kSize if none are set. size_type find_last() const; // Finds the index of the last "on" bit before last_find, returns kSize if none are set. size_type find_prev(size_type last_find) const; }; // bitset /// BitsetCountBits /// /// This is a fast trick way to count bits without branches nor memory accesses. /// /// todo: Use bit.h's popcount instead? template eastl::enable_if_t && sizeof(UInt64) == 8, uint32_t> BitsetCountBits(UInt64 x) { // GCC 3.x's implementation of UINT64_C is broken and fails to deal with // the code below correctly. So we make a workaround for it. Earlier and // later versions of GCC don't have this bug. #if defined(__GNUC__) && (__GNUC__ == 3) x = x - ((x >> 1) & 0x5555555555555555ULL); x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FULL; return (uint32_t)((x * 0x0101010101010101ULL) >> 56); #else x = x - ((x >> 1) & UINT64_C(0x5555555555555555)); x = (x & UINT64_C(0x3333333333333333)) + ((x >> 2) & UINT64_C(0x3333333333333333)); x = (x + (x >> 4)) & UINT64_C(0x0F0F0F0F0F0F0F0F); return (uint32_t)((x * UINT64_C(0x0101010101010101)) >> 56); #endif } template eastl::enable_if_t && sizeof(UInt32) == 4, uint32_t> BitsetCountBits(UInt32 x) { x = x - ((x >> 1) & 0x55555555); x = (x & 0x33333333) + ((x >> 2) & 0x33333333); x = (x + (x >> 4)) & 0x0F0F0F0F; return (uint32_t)((x * 0x01010101) >> 24); } template eastl::enable_if_t< detail::is_word_type_v && sizeof(SmallUInt) < 4, uint32_t> BitsetCountBits(SmallUInt x) { return BitsetCountBits((uint32_t)x); } // const static char kBitsPerUint16[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; #define EASTL_BITSET_COUNT_STRING "\0\1\1\2\1\2\2\3\1\2\2\3\2\3\3\4" template eastl::enable_if_t && sizeof(UInt8) == 1, uint32_t> GetFirstBit(UInt8 x) { if(x) { uint32_t n = 1; if((x & 0x0000000F) == 0) { n += 4; x >>= 4; } if((x & 0x00000003) == 0) { n += 2; x >>= 2; } return (uint32_t)(n - (x & 1)); } return 8; } // To do: Update this to use VC++ _BitScanForward, _BitScanForward64; // GCC __builtin_ctz, __builtin_ctzl. // VC++ __lzcnt16, __lzcnt, __lzcnt64 requires recent CPUs (2013+) and probably can't be used. // http://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29#New_features template eastl::enable_if_t && sizeof(UInt16) == 2, uint32_t> GetFirstBit(UInt16 x) { if(x) { uint32_t n = 1; if((x & 0x000000FF) == 0) { n += 8; x >>= 8; } if((x & 0x0000000F) == 0) { n += 4; x >>= 4; } if((x & 0x00000003) == 0) { n += 2; x >>= 2; } return (uint32_t)(n - (x & 1)); } return 16; } template eastl::enable_if_t && sizeof(UInt32) == 4, uint32_t> GetFirstBit(UInt32 x) { #if defined(EA_COMPILER_MSVC) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) // This has been benchmarked as significantly faster than the generic code below. unsigned char isNonZero; unsigned long index; isNonZero = _BitScanForward(&index, x); return isNonZero ? (int)index : 32; #elif (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) && !defined(EA_COMPILER_EDG) if (x) return __builtin_ctz(x); return 32; #else if(x) { uint32_t n = 1; if((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; } if((x & 0x000000FF) == 0) { n += 8; x >>= 8; } if((x & 0x0000000F) == 0) { n += 4; x >>= 4; } if((x & 0x00000003) == 0) { n += 2; x >>= 2; } return (n - (x & 1)); } return 32; #endif } template eastl::enable_if_t && sizeof(UInt64) == 8, uint32_t> GetFirstBit(UInt64 x) { #if defined(EA_COMPILER_MSVC) && defined(EA_PROCESSOR_X86_64) // This has been benchmarked as significantly faster than the generic code below. unsigned char isNonZero; unsigned long index; isNonZero = _BitScanForward64(&index, x); return isNonZero ? (int)index : 64; #elif (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) && !defined(EA_COMPILER_EDG) if (x) return __builtin_ctzll(x); return 64; #else if(x) { uint32_t n = 1; if((x & 0xFFFFFFFF) == 0) { n += 32; x >>= 32; } if((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; } if((x & 0x000000FF) == 0) { n += 8; x >>= 8; } if((x & 0x0000000F) == 0) { n += 4; x >>= 4; } if((x & 0x00000003) == 0) { n += 2; x >>= 2; } return (n - ((uint32_t)x & 1)); } return 64; #endif } #if EASTL_INT128_SUPPORTED inline uint32_t GetFirstBit(eastl_uint128_t x) { if(x) { uint32_t n = 1; if((x & UINT64_C(0xFFFFFFFFFFFFFFFF)) == 0) { n += 64; x >>= 64; } if((x & 0xFFFFFFFF) == 0) { n += 32; x >>= 32; } if((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; } if((x & 0x000000FF) == 0) { n += 8; x >>= 8; } if((x & 0x0000000F) == 0) { n += 4; x >>= 4; } if((x & 0x00000003) == 0) { n += 2; x >>= 2; } return (n - ((uint32_t)x & 1)); } return 128; } #endif template eastl::enable_if_t && sizeof(UInt8) == 1, uint32_t> GetLastBit(UInt8 x) { if(x) { uint32_t n = 0; if(x & 0xFFF0) { n += 4; x >>= 4; } if(x & 0xFFFC) { n += 2; x >>= 2; } if(x & 0xFFFE) { n += 1; } return n; } return 8; } template eastl::enable_if_t && sizeof(UInt16) == 2, uint32_t> GetLastBit(UInt16 x) { if(x) { uint32_t n = 0; if(x & 0xFF00) { n += 8; x >>= 8; } if(x & 0xFFF0) { n += 4; x >>= 4; } if(x & 0xFFFC) { n += 2; x >>= 2; } if(x & 0xFFFE) { n += 1; } return n; } return 16; } template eastl::enable_if_t && sizeof(UInt32) == 4, uint32_t> GetLastBit(UInt32 x) { #if defined(EA_COMPILER_MSVC) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) // This has been benchmarked as significantly faster than the generic code below. unsigned char isNonZero; unsigned long index; isNonZero = _BitScanReverse(&index, x); return isNonZero ? (int)index : 32; #elif (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) && !defined(EA_COMPILER_EDG) if (x) return 31 - __builtin_clz(x); return 32; #else if(x) { uint32_t n = 0; if(x & 0xFFFF0000) { n += 16; x >>= 16; } if(x & 0xFFFFFF00) { n += 8; x >>= 8; } if(x & 0xFFFFFFF0) { n += 4; x >>= 4; } if(x & 0xFFFFFFFC) { n += 2; x >>= 2; } if(x & 0xFFFFFFFE) { n += 1; } return n; } return 32; #endif } template eastl::enable_if_t && sizeof(UInt64) == 8, uint32_t> GetLastBit(UInt64 x) { #if defined(EA_COMPILER_MSVC) && defined(EA_PROCESSOR_X86_64) // This has been benchmarked as significantly faster than the generic code below. unsigned char isNonZero; unsigned long index; isNonZero = _BitScanReverse64(&index, x); return isNonZero ? (int)index : 64; #elif (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) && !defined(EA_COMPILER_EDG) if (x) return 63 - __builtin_clzll(x); return 64; #else if(x) { uint32_t n = 0; if(x & UINT64_C(0xFFFFFFFF00000000)) { n += 32; x >>= 32; } if(x & 0xFFFF0000) { n += 16; x >>= 16; } if(x & 0xFFFFFF00) { n += 8; x >>= 8; } if(x & 0xFFFFFFF0) { n += 4; x >>= 4; } if(x & 0xFFFFFFFC) { n += 2; x >>= 2; } if(x & 0xFFFFFFFE) { n += 1; } return n; } return 64; #endif } #if EASTL_INT128_SUPPORTED inline uint32_t GetLastBit(eastl_uint128_t x) { if(x) { uint32_t n = 0; eastl_uint128_t mask(UINT64_C(0xFFFFFFFFFFFFFFFF)); // There doesn't seem to exist compiler support for INT128_C() by any compiler. EAStdC's int128_t supports it though. mask <<= 64; if(x & mask) { n += 64; x >>= 64; } if(x & UINT64_C(0xFFFFFFFF00000000)) { n += 32; x >>= 32; } if(x & UINT64_C(0x00000000FFFF0000)) { n += 16; x >>= 16; } if(x & UINT64_C(0x00000000FFFFFF00)) { n += 8; x >>= 8; } if(x & UINT64_C(0x00000000FFFFFFF0)) { n += 4; x >>= 4; } if(x & UINT64_C(0x00000000FFFFFFFC)) { n += 2; x >>= 2; } if(x & UINT64_C(0x00000000FFFFFFFE)) { n += 1; } return n; } return 128; } #endif /////////////////////////////////////////////////////////////////////////// // BitsetBase // // We tried two forms of array access here: // for(word_type *pWord(mWord), *pWordEnd(mWord + NW); pWord < pWordEnd; ++pWord) // *pWord = ... // and // for(size_t i = 0; i < NW; i++) // mWord[i] = ... // // For our tests (~NW < 16), the latter (using []) access resulted in faster code. /////////////////////////////////////////////////////////////////////////// template inline void BitsetBase::operator&=(const this_type& x) { for(size_t i = 0; i < NW; i++) mWord[i] &= x.mWord[i]; } template inline void BitsetBase::operator|=(const this_type& x) { for(size_t i = 0; i < NW; i++) mWord[i] |= x.mWord[i]; } template inline void BitsetBase::operator^=(const this_type& x) { for(size_t i = 0; i < NW; i++) mWord[i] ^= x.mWord[i]; } template inline void BitsetBase::operator<<=(size_type n) { const size_type nWordShift = (size_type)(n >> kBitsPerWordShift); if(nWordShift) { for(int i = (int)(NW - 1); i >= 0; --i) mWord[i] = (nWordShift <= (size_type)i) ? mWord[i - nWordShift] : (word_type)0; } if(n &= kBitsPerWordMask) { for(size_t i = (NW - 1); i > 0; --i) mWord[i] = (word_type)((mWord[i] << n) | (mWord[i - 1] >> (kBitsPerWord - n))); mWord[0] <<= n; } // We let the parent class turn off any upper bits. } template inline void BitsetBase::operator>>=(size_type n) { const size_type nWordShift = (size_type)(n >> kBitsPerWordShift); if(nWordShift) { for(size_t i = 0; i < NW; ++i) mWord[i] = ((nWordShift < (NW - i)) ? mWord[i + nWordShift] : (word_type)0); } if(n &= kBitsPerWordMask) { for(size_t i = 0; i < (NW - 1); ++i) mWord[i] = (word_type)((mWord[i] >> n) | (mWord[i + 1] << (kBitsPerWord - n))); mWord[NW - 1] >>= n; } } template inline void BitsetBase::flip() { for(size_t i = 0; i < NW; i++) mWord[i] = ~mWord[i]; // We let the parent class turn off any upper bits. } template inline void BitsetBase::set() { for(size_t i = 0; i < NW; i++) mWord[i] = static_cast(~static_cast(0)); // We let the parent class turn off any upper bits. } template inline void BitsetBase::set(size_type i, bool value) { if(value) mWord[i >> kBitsPerWordShift] |= (static_cast(1) << (i & kBitsPerWordMask)); else mWord[i >> kBitsPerWordShift] &= ~(static_cast(1) << (i & kBitsPerWordMask)); } template inline void BitsetBase::reset() { if(NW > 16) // This is a constant expression and should be optimized away. { // This will be fastest if compiler intrinsic function optimizations are enabled. memset(mWord, 0, sizeof(mWord)); } else { for(size_t i = 0; i < NW; i++) mWord[i] = 0; } } template inline bool BitsetBase::operator==(const this_type& x) const { for(size_t i = 0; i < NW; i++) { if(mWord[i] != x.mWord[i]) return false; } return true; } template inline bool BitsetBase::any() const { for(size_t i = 0; i < NW; i++) { if(mWord[i]) return true; } return false; } template inline typename BitsetBase::size_type BitsetBase::count() const { size_type n = 0; for(size_t i = 0; i < NW; i++) n += popcount(mWord[i]); return n; } template inline typename BitsetBase::word_type& BitsetBase::DoGetWord(size_type i) { return mWord[i >> kBitsPerWordShift]; } template inline typename BitsetBase::word_type BitsetBase::DoGetWord(size_type i) const { return mWord[i >> kBitsPerWordShift]; } template inline typename BitsetBase::size_type BitsetBase::DoFindFirst() const { for(size_type word_index = 0; word_index < NW; ++word_index) { const size_type fbiw = GetFirstBit(mWord[word_index]); if(fbiw != kBitsPerWord) return (word_index * kBitsPerWord) + fbiw; } return (size_type)NW * kBitsPerWord; } #if EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING EA_DISABLE_GCC_WARNING(-Warray-bounds) #endif template inline typename BitsetBase::size_type BitsetBase::DoFindNext(size_type last_find) const { // Start looking from the next bit. ++last_find; // Set initial state based on last find. size_type word_index = static_cast(last_find >> kBitsPerWordShift); size_type bit_index = static_cast(last_find & kBitsPerWordMask); // To do: There probably is a more elegant way to write looping below. if(word_index < NW) { // Mask off previous bits of the word so our search becomes a "find first". word_type this_word = mWord[word_index] & (static_cast(~0) << bit_index); for(;;) { const size_type fbiw = GetFirstBit(this_word); if(fbiw != kBitsPerWord) return (word_index * kBitsPerWord) + fbiw; if(++word_index < NW) this_word = mWord[word_index]; else break; } } return (size_type)NW * kBitsPerWord; } #if EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING EA_RESTORE_GCC_WARNING() #endif template inline typename BitsetBase::size_type BitsetBase::DoFindLast() const { for(size_type word_index = (size_type)NW; word_index > 0; --word_index) { const size_type lbiw = GetLastBit(mWord[word_index - 1]); if(lbiw != kBitsPerWord) return ((word_index - 1) * kBitsPerWord) + lbiw; } return (size_type)NW * kBitsPerWord; } template inline typename BitsetBase::size_type BitsetBase::DoFindPrev(size_type last_find) const { if(last_find > 0) { // Set initial state based on last find. size_type word_index = static_cast(last_find >> kBitsPerWordShift); size_type bit_index = static_cast(last_find & kBitsPerWordMask); // Mask off subsequent bits of the word so our search becomes a "find last". // We do two shifts here because it's undefined behaviour to right shift greater than or equal to the number of bits in the integer. // // Note: operator~() is an arithmetic operator and performs integral promotions, ie. small integrals are promoted to an int. // Because the promotion is before applying operator~() we need to cast back to our word type otherwise we end up with extraneous set bits. word_type mask = (static_cast(~static_cast(0)) >> (kBitsPerWord - 1 - bit_index)) >> 1; word_type this_word = mWord[word_index] & mask; for(;;) { const size_type lbiw = GetLastBit(this_word); if(lbiw != kBitsPerWord) return (word_index * kBitsPerWord) + lbiw; if(word_index > 0) this_word = mWord[--word_index]; else break; } } return (size_type)NW * kBitsPerWord; } /////////////////////////////////////////////////////////////////////////// // BitsetBase<1, WordType> /////////////////////////////////////////////////////////////////////////// template inline void BitsetBase<1, WordType>::operator&=(const this_type& x) { mWord[0] &= x.mWord[0]; } template inline void BitsetBase<1, WordType>::operator|=(const this_type& x) { mWord[0] |= x.mWord[0]; } template inline void BitsetBase<1, WordType>::operator^=(const this_type& x) { mWord[0] ^= x.mWord[0]; } template inline void BitsetBase<1, WordType>::operator<<=(size_type n) { mWord[0] <<= n; // We let the parent class turn off any upper bits. } template inline void BitsetBase<1, WordType>::operator>>=(size_type n) { mWord[0] >>= n; } template inline void BitsetBase<1, WordType>::flip() { mWord[0] = ~mWord[0]; // We let the parent class turn off any upper bits. } template inline void BitsetBase<1, WordType>::set() { mWord[0] = static_cast(~static_cast(0)); // We let the parent class turn off any upper bits. } template inline void BitsetBase<1, WordType>::set(size_type i, bool value) { if(value) mWord[0] |= (static_cast(1) << i); else mWord[0] &= ~(static_cast(1) << i); } template inline void BitsetBase<1, WordType>::reset() { mWord[0] = 0; } template inline bool BitsetBase<1, WordType>::operator==(const this_type& x) const { return mWord[0] == x.mWord[0]; } template inline bool BitsetBase<1, WordType>::any() const { return mWord[0] != 0; } template inline typename BitsetBase<1, WordType>::size_type BitsetBase<1, WordType>::count() const { return popcount(mWord[0]); } template inline typename BitsetBase<1, WordType>::word_type& BitsetBase<1, WordType>::DoGetWord(size_type) { return mWord[0]; } template inline typename BitsetBase<1, WordType>::word_type BitsetBase<1, WordType>::DoGetWord(size_type) const { return mWord[0]; } template inline typename BitsetBase<1, WordType>::size_type BitsetBase<1, WordType>::DoFindFirst() const { return GetFirstBit(mWord[0]); } template inline typename BitsetBase<1, WordType>::size_type BitsetBase<1, WordType>::DoFindNext(size_type last_find) const { if(++last_find < kBitsPerWord) { // Mask off previous bits of word so our search becomes a "find first". const word_type this_word = mWord[0] & (static_cast(~0) << last_find); return GetFirstBit(this_word); } return kBitsPerWord; } template inline typename BitsetBase<1, WordType>::size_type BitsetBase<1, WordType>::DoFindLast() const { return GetLastBit(mWord[0]); } template inline typename BitsetBase<1, WordType>::size_type BitsetBase<1, WordType>::DoFindPrev(size_type last_find) const { if(last_find > 0) { // Mask off previous bits of word so our search becomes a "find first". const word_type this_word = mWord[0] & (static_cast(~static_cast(0)) >> (kBitsPerWord - last_find)); return GetLastBit(this_word); } return kBitsPerWord; } /////////////////////////////////////////////////////////////////////////// // BitsetBase<2, WordType> /////////////////////////////////////////////////////////////////////////// template inline void BitsetBase<2, WordType>::operator&=(const this_type& x) { mWord[0] &= x.mWord[0]; mWord[1] &= x.mWord[1]; } template inline void BitsetBase<2, WordType>::operator|=(const this_type& x) { mWord[0] |= x.mWord[0]; mWord[1] |= x.mWord[1]; } template inline void BitsetBase<2, WordType>::operator^=(const this_type& x) { mWord[0] ^= x.mWord[0]; mWord[1] ^= x.mWord[1]; } template inline void BitsetBase<2, WordType>::operator<<=(size_type n) { if(n) // to avoid a shift by kBitsPerWord, which is undefined { if(EASTL_UNLIKELY(n >= kBitsPerWord)) // parent expected to handle high bits and n >= 64 { mWord[1] = mWord[0]; mWord[0] = 0; n -= kBitsPerWord; } mWord[1] = (mWord[1] << n) | (mWord[0] >> (kBitsPerWord - n)); // Intentionally use | instead of +. mWord[0] <<= n; // We let the parent class turn off any upper bits. } } template inline void BitsetBase<2, WordType>::operator>>=(size_type n) { if(n) // to avoid a shift by kBitsPerWord, which is undefined { if(EASTL_UNLIKELY(n >= kBitsPerWord)) // parent expected to handle n >= 64 { mWord[0] = mWord[1]; mWord[1] = 0; n -= kBitsPerWord; } mWord[0] = (mWord[0] >> n) | (mWord[1] << (kBitsPerWord - n)); // Intentionally use | instead of +. mWord[1] >>= n; } } template inline void BitsetBase<2, WordType>::flip() { mWord[0] = ~mWord[0]; mWord[1] = ~mWord[1]; // We let the parent class turn off any upper bits. } template inline void BitsetBase<2, WordType>::set() { EA_DISABLE_VC_WARNING(4245); // '=': conversion from 'int' to 'unsigned short', signed/unsigned mismatch // https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-4-c4245?view=msvc-170 // MSVC incorrectly believes 0 is a negative value. mWord[0] = ~static_cast(0); mWord[1] = ~static_cast(0); EA_RESTORE_VC_WARNING(); // We let the parent class turn off any upper bits. } template inline void BitsetBase<2, WordType>::set(size_type i, bool value) { if(value) mWord[i >> kBitsPerWordShift] |= (static_cast(1) << (i & kBitsPerWordMask)); else mWord[i >> kBitsPerWordShift] &= ~(static_cast(1) << (i & kBitsPerWordMask)); } template inline void BitsetBase<2, WordType>::reset() { mWord[0] = 0; mWord[1] = 0; } template inline bool BitsetBase<2, WordType>::operator==(const this_type& x) const { return (mWord[0] == x.mWord[0]) && (mWord[1] == x.mWord[1]); } template inline bool BitsetBase<2, WordType>::any() const { // Or with two branches: { return (mWord[0] != 0) || (mWord[1] != 0); } return (mWord[0] | mWord[1]) != 0; } template inline typename BitsetBase<2, WordType>::size_type BitsetBase<2, WordType>::count() const { return popcount(mWord[0]) + popcount(mWord[1]); } template inline typename BitsetBase<2, WordType>::word_type& BitsetBase<2, WordType>::DoGetWord(size_type i) { return mWord[i >> kBitsPerWordShift]; } template inline typename BitsetBase<2, WordType>::word_type BitsetBase<2, WordType>::DoGetWord(size_type i) const { return mWord[i >> kBitsPerWordShift]; } template inline typename BitsetBase<2, WordType>::size_type BitsetBase<2, WordType>::DoFindFirst() const { size_type fbiw = GetFirstBit(mWord[0]); if(fbiw != kBitsPerWord) return fbiw; fbiw = GetFirstBit(mWord[1]); if(fbiw != kBitsPerWord) return kBitsPerWord + fbiw; return 2 * kBitsPerWord; } template inline typename BitsetBase<2, WordType>::size_type BitsetBase<2, WordType>::DoFindNext(size_type last_find) const { // If the last find was in the first word, we must check it and then possibly the second. if(++last_find < (size_type)kBitsPerWord) { // Mask off previous bits of word so our search becomes a "find first". word_type this_word = mWord[0] & (static_cast(~0) << last_find); // Step through words. size_type fbiw = GetFirstBit(this_word); if(fbiw != kBitsPerWord) return fbiw; fbiw = GetFirstBit(mWord[1]); if(fbiw != kBitsPerWord) return kBitsPerWord + fbiw; } else if(last_find < (size_type)(2 * kBitsPerWord)) { // The last find was in the second word, remove the bit count of the first word from the find. last_find -= kBitsPerWord; // Mask off previous bits of word so our search becomes a "find first". word_type this_word = mWord[1] & (static_cast(~0) << last_find); const size_type fbiw = GetFirstBit(this_word); if(fbiw != kBitsPerWord) return kBitsPerWord + fbiw; } return 2 * kBitsPerWord; } template inline typename BitsetBase<2, WordType>::size_type BitsetBase<2, WordType>::DoFindLast() const { size_type lbiw = GetLastBit(mWord[1]); if(lbiw != kBitsPerWord) return kBitsPerWord + lbiw; lbiw = GetLastBit(mWord[0]); if(lbiw != kBitsPerWord) return lbiw; return 2 * kBitsPerWord; } template inline typename BitsetBase<2, WordType>::size_type BitsetBase<2, WordType>::DoFindPrev(size_type last_find) const { // If the last find was in the second word, we must check it and then possibly the first. if(last_find > (size_type)kBitsPerWord) { // This has the same effect as last_find %= kBitsPerWord in our case. last_find -= kBitsPerWord; // Mask off previous bits of word so our search becomes a "find first". word_type this_word = mWord[1] & (static_cast(~static_cast(0)) >> (kBitsPerWord - last_find)); // Step through words. size_type lbiw = GetLastBit(this_word); if(lbiw != kBitsPerWord) return kBitsPerWord + lbiw; lbiw = GetLastBit(mWord[0]); if(lbiw != kBitsPerWord) return lbiw; } else if(last_find != 0) { // Mask off previous bits of word so our search becomes a "find first". word_type this_word = mWord[0] & (static_cast(~static_cast(0)) >> (kBitsPerWord - last_find)); const size_type lbiw = GetLastBit(this_word); if(lbiw != kBitsPerWord) return lbiw; } return 2 * kBitsPerWord; } /////////////////////////////////////////////////////////////////////////// // bitset::reference /////////////////////////////////////////////////////////////////////////// template inline bitset::reference::reference(const bitset& x, size_type i) : mpBitWord(&const_cast(x).DoGetWord(i)), mnBitIndex(i & kBitsPerWordMask) { // We have an issue here because the above is casting away the const-ness of the source bitset. // Empty } template inline typename bitset::reference& bitset::reference::operator=(bool value) { if(value) *mpBitWord |= (static_cast(1) << (mnBitIndex & kBitsPerWordMask)); else *mpBitWord &= ~(static_cast(1) << (mnBitIndex & kBitsPerWordMask)); return *this; } template inline typename bitset::reference& bitset::reference::operator=(const reference& x) { if(*x.mpBitWord & (static_cast(1) << (x.mnBitIndex & kBitsPerWordMask))) *mpBitWord |= (static_cast(1) << (mnBitIndex & kBitsPerWordMask)); else *mpBitWord &= ~(static_cast(1) << (mnBitIndex & kBitsPerWordMask)); return *this; } template inline bool bitset::reference::operator~() const { return (*mpBitWord & (static_cast(1) << (mnBitIndex & kBitsPerWordMask))) == 0; } //Defined inline in the class because Metrowerks fails to be able to compile it here. //template //inline bitset::reference::operator bool() const //{ // return (*mpBitWord & (static_cast(1) << (mnBitIndex & kBitsPerWordMask))) != 0; //} template inline typename bitset::reference& bitset::reference::flip() { *mpBitWord ^= static_cast(1) << (mnBitIndex & kBitsPerWordMask); return *this; } /////////////////////////////////////////////////////////////////////////// // bitset /////////////////////////////////////////////////////////////////////////// template inline bitset::bitset() { reset(); } EA_DISABLE_VC_WARNING(6313) #if EA_IS_ENABLED(EASTL_DEPRECATIONS_FOR_2024_SEPT) template inline bitset::bitset(unsigned long long value) { detail::from_unsigned_integral(*this, value); } #else template inline bitset::bitset(uint32_t value) { detail::from_unsigned_integral(*this, value); } #endif EA_RESTORE_VC_WARNING() template inline typename bitset::this_type& bitset::operator&=(const this_type& x) { base_type::operator&=(x); return *this; } template inline typename bitset::this_type& bitset::operator|=(const this_type& x) { base_type::operator|=(x); return *this; } template inline typename bitset::this_type& bitset::operator^=(const this_type& x) { base_type::operator^=(x); return *this; } template inline typename bitset::this_type& bitset::operator<<=(size_type n) { if(EASTL_LIKELY((intptr_t)n < (intptr_t)N)) { EA_DISABLE_VC_WARNING(6313) base_type::operator<<=(n); if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32. mWord[kWordCount - 1] &= ~(static_cast(~static_cast(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly. EA_RESTORE_VC_WARNING() } else base_type::reset(); return *this; } template inline typename bitset::this_type& bitset::operator>>=(size_type n) { if(EASTL_LIKELY(n < N)) base_type::operator>>=(n); else base_type::reset(); return *this; } template inline typename bitset::this_type& bitset::set() { base_type::set(); // This sets all bits. if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32. mWord[kWordCount - 1] &= ~(static_cast(~static_cast(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly. return *this; } template inline typename bitset::this_type& bitset::set(size_type i, bool value) { if(i < N) base_type::set(i, value); else { #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(!(i < N))) EASTL_FAIL_MSG("bitset::set -- out of range"); #endif #if EASTL_EXCEPTIONS_ENABLED throw std::out_of_range("bitset::set"); #endif } return *this; } template inline typename bitset::this_type& bitset::reset() { base_type::reset(); return *this; } template inline typename bitset::this_type& bitset::reset(size_type i) { if(EASTL_LIKELY(i < N)) DoGetWord(i) &= ~(static_cast(1) << (i & kBitsPerWordMask)); else { #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(!(i < N))) EASTL_FAIL_MSG("bitset::reset -- out of range"); #endif #if EASTL_EXCEPTIONS_ENABLED throw std::out_of_range("bitset::reset"); #endif } return *this; } template inline typename bitset::this_type& bitset::flip() { EA_DISABLE_VC_WARNING(6313) base_type::flip(); if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32. mWord[kWordCount - 1] &= ~(static_cast(~static_cast(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly. return *this; EA_RESTORE_VC_WARNING() } template inline typename bitset::this_type& bitset::flip(size_type i) { if(EASTL_LIKELY(i < N)) DoGetWord(i) ^= (static_cast(1) << (i & kBitsPerWordMask)); else { #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(!(i < N))) EASTL_FAIL_MSG("bitset::flip -- out of range"); #endif #if EASTL_EXCEPTIONS_ENABLED throw std::out_of_range("bitset::flip"); #endif } return *this; } template inline typename bitset::this_type bitset::operator~() const { return this_type(*this).flip(); } template inline typename bitset::reference bitset::operator[](size_type i) { #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(!(i < N))) EASTL_FAIL_MSG("bitset::operator[] -- out of range"); #endif return reference(*this, i); } template inline bool bitset::operator[](size_type i) const { #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(!(i < N))) EASTL_FAIL_MSG("bitset::operator[] -- out of range"); #endif return (DoGetWord(i) & (static_cast(1) << (i & kBitsPerWordMask))) != 0; } template inline const typename bitset::word_type* bitset::data() const { return base_type::mWord; } template inline typename bitset::word_type* bitset::data() { return base_type::mWord; } template inline void bitset::from_uint32(uint32_t value) { detail::from_unsigned_integral(*this, value); } template inline void bitset::from_uint64(uint64_t value) { detail::from_unsigned_integral(*this, value); } template inline unsigned long bitset::to_ulong() const { return detail::to_unsigned_integral(*this); } template inline uint32_t bitset::to_uint32() const { return detail::to_unsigned_integral(*this); } template inline uint64_t bitset::to_uint64() const { return detail::to_unsigned_integral(*this); } // template // inline typename bitset::size_type // bitset::count() const // { // return base_type::count(); // } template inline typename bitset::size_type bitset::size() const { return (size_type)N; } template inline bool bitset::operator==(const this_type& x) const { return base_type::operator==(x); } #if !defined(EA_COMPILER_HAS_THREE_WAY_COMPARISON) template inline bool bitset::operator!=(const this_type& x) const { return !base_type::operator==(x); } #endif template inline bool bitset::test(size_type i) const { if(EASTL_UNLIKELY(i < N)) return (DoGetWord(i) & (static_cast(1) << (i & kBitsPerWordMask))) != 0; #if EASTL_ASSERT_ENABLED EASTL_FAIL_MSG("bitset::test -- out of range"); #endif #if EASTL_EXCEPTIONS_ENABLED throw std::out_of_range("bitset::test"); #else return false; #endif } // template // inline bool bitset::any() const // { // return base_type::any(); // } template inline bool bitset::all() const { return count() == size(); } template inline bool bitset::none() const { return !base_type::any(); } template inline typename bitset::this_type bitset::operator<<(size_type n) const { return this_type(*this).operator<<=(n); } template inline typename bitset::this_type bitset::operator>>(size_type n) const { return this_type(*this).operator>>=(n); } template inline typename bitset::size_type bitset::find_first() const { const size_type i = base_type::DoFindFirst(); if(i < kSize) return i; // Else i could be the base type bit count, so we clamp it to our size. return kSize; } template inline typename bitset::size_type bitset::find_next(size_type last_find) const { const size_type i = base_type::DoFindNext(last_find); if(i < kSize) return i; // Else i could be the base type bit count, so we clamp it to our size. return kSize; } template inline typename bitset::size_type bitset::find_last() const { const size_type i = base_type::DoFindLast(); if(i < kSize) return i; // Else i could be the base type bit count, so we clamp it to our size. return kSize; } template inline typename bitset::size_type bitset::find_prev(size_type last_find) const { const size_type i = base_type::DoFindPrev(last_find); if(i < kSize) return i; // Else i could be the base type bit count, so we clamp it to our size. return kSize; } /////////////////////////////////////////////////////////////////////////// // global operators /////////////////////////////////////////////////////////////////////////// template inline bitset operator&(const bitset& a, const bitset& b) { // We get betting inlining when we don't declare temporary variables. return bitset(a).operator&=(b); } template inline bitset operator|(const bitset& a, const bitset& b) { return bitset(a).operator|=(b); } template inline bitset operator^(const bitset& a, const bitset& b) { return bitset(a).operator^=(b); } } // namespace eastl EA_RESTORE_VC_WARNING(); #endif // Header include guard ================================================ FILE: include/EASTL/bitvector.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Implements a bit vector, which is essentially a vector of bool but which // uses bits instead of bytes. It is thus similar to the original std::vector. /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Note: This code is not yet complete: it isn't tested and doesn't yet // support containers other than vector. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_BITVECTOR_H #define EASTL_BITVECTOR_H #include #include #include #include #include #if EASTL_EXCEPTIONS_ENABLED #include #endif EA_DISABLE_VC_WARNING(4480); // nonstandard extension used: specifying underlying type for enum #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif namespace eastl { /// EASTL_BITVECTOR_DEFAULT_NAME /// /// Defines a default container name in the absence of a user-provided name. /// #ifndef EASTL_BITVECTOR_DEFAULT_NAME #define EASTL_BITVECTOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " bitvector" // Unless the user overrides something, this is "EASTL bitvector". #endif /// EASTL_BITVECTOR_DEFAULT_ALLOCATOR /// #ifndef EASTL_BITVECTOR_DEFAULT_ALLOCATOR #define EASTL_BITVECTOR_DEFAULT_ALLOCATOR allocator_type(EASTL_BITVECTOR_DEFAULT_NAME) #endif /// BitvectorWordType /// Defines the integral data type used by bitvector. typedef EASTL_BITSET_WORD_TYPE_DEFAULT BitvectorWordType; template class bitvector_const_iterator; template class bitvector_reference { public: typedef eastl_size_t size_type; bitvector_reference(Element* ptr, eastl_size_t i); bitvector_reference(const bitvector_reference& other); bitvector_reference& operator=(bool value); bitvector_reference& operator=(const bitvector_reference& rhs); operator bool() const // Defined here because some compilers fail otherwise. { return (*mpBitWord & (Element(1) << mnBitIndex)) != 0; } protected: friend class bitvector_const_iterator; Element* mpBitWord; size_type mnBitIndex; bitvector_reference() {} void CopyFrom(const bitvector_reference& rhs); }; template class bitvector_const_iterator { public: typedef eastl::random_access_iterator_tag iterator_category; typedef bitvector_const_iterator this_type; typedef bool value_type; typedef bitvector_reference reference_type; typedef ptrdiff_t difference_type; typedef Element element_type; typedef element_type* pointer; // This is wrong. It needs to be someting that acts as a pointer to a bit. typedef element_type& reference; // This is not right. It needs to be someting that acts as a pointer to a bit. typedef eastl_size_t size_type; protected: reference_type mReference; enum { kBitCount = (8 * sizeof(Element)) }; public: bool operator*() const; bool operator[](difference_type n) const; bitvector_const_iterator(); bitvector_const_iterator(const element_type* p, eastl_size_t i); bitvector_const_iterator(const reference_type& referenceType); bitvector_const_iterator(const bitvector_const_iterator& other); bitvector_const_iterator& operator++(); bitvector_const_iterator operator++(int); bitvector_const_iterator& operator--(); bitvector_const_iterator operator--(int); bitvector_const_iterator& operator+=(difference_type dist); bitvector_const_iterator& operator-=(difference_type dist); bitvector_const_iterator operator+ (difference_type dist) const; bitvector_const_iterator operator- (difference_type dist) const; difference_type operator-(const this_type& rhs) const; bitvector_const_iterator& operator= (const this_type& rhs); bool operator==(const this_type& rhs) const; bool operator!=(const this_type& rhs) const; bool operator< (const this_type& rhs) const; bool operator<=(const this_type& rhs) const; bool operator> (const this_type& rhs) const; bool operator>=(const this_type& rhs) const; int validate(const element_type* pStart, const element_type* pEnd, eastl_size_t nExtraBits) const; protected: template friend class bitvector; reference_type& get_reference_type() { return mReference; } }; template class bitvector_iterator : public bitvector_const_iterator { public: typedef eastl::random_access_iterator_tag iterator_category; typedef bitvector_iterator this_type; typedef bitvector_const_iterator base_type; typedef bool value_type; typedef bitvector_reference reference_type; typedef ptrdiff_t difference_type; typedef Element element_type; typedef element_type* pointer; // This is wrong. It needs to be someting that acts as a pointer to a bit. typedef element_type& reference; // This is not right. It needs to be someting that acts as a pointer to a bit. public: reference_type operator*() const; reference_type operator[](difference_type n) const; bitvector_iterator(); bitvector_iterator(element_type* p, eastl_size_t i); bitvector_iterator(reference_type& referenceType); bitvector_iterator& operator++() { base_type::operator++(); return *this; } bitvector_iterator& operator--() { base_type::operator--(); return *this; } bitvector_iterator operator++(int); bitvector_iterator operator--(int); bitvector_iterator& operator+=(difference_type dist) { base_type::operator+=(dist); return *this; } bitvector_iterator& operator-=(difference_type dist) { base_type::operator-=(dist); return *this; } bitvector_iterator operator+ (difference_type dist) const; bitvector_iterator operator- (difference_type dist) const; // We need this here because we are overloading operator-, so for some reason the // other overload of the function can't be found unless it's explicitly specified. difference_type operator-(const base_type& rhs) const { return base_type::operator-(rhs); } }; /// bitvector /// /// Implements an array of bits treated as boolean values. /// bitvector is similar to vector but uses bits instead of bytes and /// allows the user to use other containers such as deque instead of vector. /// bitvector is different from bitset in that bitset is less flexible but /// uses less memory and has higher performance. /// /// To consider: Rename the Element template parameter to WordType, for /// consistency with bitset. /// template > class bitvector { public: typedef bitvector this_type; typedef bool value_type; typedef bitvector_reference reference; typedef bool const_reference; typedef bitvector_iterator iterator; typedef bitvector_const_iterator const_iterator; typedef eastl::reverse_iterator reverse_iterator; typedef eastl::reverse_iterator const_reverse_iterator; typedef Allocator allocator_type; typedef Element element_type; typedef Container container_type; typedef eastl_size_t size_type; typedef ptrdiff_t difference_type; #if defined(_MSC_VER) && (_MSC_VER >= 1400) && (_MSC_VER <= 1600) && !EASTL_STD_CPP_ONLY // _MSC_VER of 1400 means VS2005, 1600 means VS2010. VS2012 generates errors with usage of enum:size_type. enum : size_type { // Use Microsoft enum language extension, allowing for smaller debug symbols than using a static const. Users have been affected by this. npos = container_type::npos, kMaxSize = container_type::kMaxSize }; #else static const size_type npos = container_type::npos; /// 'npos' means non-valid position or simply non-position. static const size_type kMaxSize = container_type::kMaxSize; /// -1 is reserved for 'npos'. It also happens to be slightly beneficial that kMaxSize is a value less than -1, as it helps us deal with potential integer wraparound issues. #endif enum { kBitCount = 8 * sizeof(Element) }; protected: container_type mContainer; size_type mFreeBitCount; // Unused bits in the last word of mContainer. public: bitvector(); explicit bitvector(const allocator_type& allocator); explicit bitvector(size_type n, const allocator_type& allocator = EASTL_BITVECTOR_DEFAULT_ALLOCATOR); bitvector(size_type n, value_type value, const allocator_type& allocator = EASTL_BITVECTOR_DEFAULT_ALLOCATOR); template bitvector(InputIterator first, InputIterator last); void swap(this_type& x); template void assign(InputIterator first, InputIterator last); iterator begin() EA_NOEXCEPT; const_iterator begin() const EA_NOEXCEPT; const_iterator cbegin() const EA_NOEXCEPT; iterator end() EA_NOEXCEPT; const_iterator end() const EA_NOEXCEPT; const_iterator cend() const EA_NOEXCEPT; reverse_iterator rbegin() EA_NOEXCEPT; const_reverse_iterator rbegin() const EA_NOEXCEPT; const_reverse_iterator crbegin() const EA_NOEXCEPT; reverse_iterator rend() EA_NOEXCEPT; const_reverse_iterator rend() const EA_NOEXCEPT; const_reverse_iterator crend() const EA_NOEXCEPT; bool empty() const EA_NOEXCEPT; size_type size() const EA_NOEXCEPT; size_type capacity() const EA_NOEXCEPT; void resize(size_type n, value_type value); void resize(size_type n); void reserve(size_type n); void set_capacity(size_type n = npos); // Revises the capacity to the user-specified value. Resizes the container to match the capacity if the requested capacity n is less than the current size. If n == npos then the capacity is reallocated (if necessary) such that capacity == size. void push_back(); void push_back(value_type value); void pop_back(); reference front(); const_reference front() const; reference back(); const_reference back() const; bool test(size_type n, bool defaultValue) const; // Returns true if the bit index is < size() and set. Returns defaultValue if the bit is >= size(). void set(size_type n, bool value); // Resizes the container to accomodate n if necessary. reference at(size_type n); // throws an out_of_range exception if n is invalid. const_reference at(size_type n) const; reference operator[](size_type n); // behavior is undefined if n is invalid. const_reference operator[](size_type n) const; /* Work in progress: template iterator find_first(); // Finds the lowest "on" bit. template iterator find_next(const_iterator it); // Finds the next lowest "on" bit after it. template iterator find_last(); // Finds the index of the last "on" bit, returns size if none are set. template iterator find_prev(const_iterator it); // Finds the index of the last "on" bit before last_find, returns size if none are set. template const_iterator find_first() const; // Finds the lowest "on" bit. template const_iterator find_next(const_iterator it) const; // Finds the next lowest "on" bit after it. template const_iterator find_last() const; // Finds the index of the last "on" bit, returns size if none are set. template const_iterator find_prev(const_iterator it) const; // Finds the index of the last "on" bit before last_find, returns size if none are set. */ element_type* data() EA_NOEXCEPT; const element_type* data() const EA_NOEXCEPT; iterator insert(const_iterator position, value_type value); void insert(const_iterator position, size_type n, value_type value); // template Not yet implemented. See below for disabled definition. // void insert(const_iterator position, InputIterator first, InputIterator last); iterator erase(const_iterator position); iterator erase(const_iterator first, const_iterator last); reverse_iterator erase(const_reverse_iterator position); reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last); void clear(); void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. container_type& get_container(); const container_type& get_container() const; bool validate() const; int validate_iterator(const_iterator i) const; bool any() const; bool all() const; }; /////////////////////////////////////////////////////////////////////// // bitvector_reference /////////////////////////////////////////////////////////////////////// template bitvector_reference::bitvector_reference(Element* p, eastl_size_t i) : mpBitWord(p), mnBitIndex(i) { } template bitvector_reference::bitvector_reference(const bitvector_reference& other) : mpBitWord(other.mpBitWord), mnBitIndex(other.mnBitIndex) { } template bitvector_reference& bitvector_reference::operator=(bool value) { const Element mask = (Element)(Element(1) << mnBitIndex); if(value) *mpBitWord |= mask; else *mpBitWord &= ~mask; return *this; } template bitvector_reference& bitvector_reference::operator=(const bitvector_reference& rhs) { return (*this = (bool)rhs); } template void bitvector_reference::CopyFrom(const bitvector_reference& rhs) { mpBitWord = rhs.mpBitWord; mnBitIndex = rhs.mnBitIndex; } /////////////////////////////////////////////////////////////////////// // bitvector_const_iterator /////////////////////////////////////////////////////////////////////// template bitvector_const_iterator::bitvector_const_iterator() : mReference(0, 0) { } template bitvector_const_iterator::bitvector_const_iterator(const Element* p, eastl_size_t i) : mReference(const_cast(p), i) // const_cast is safe here because we never let mReference leak and we don't modify it. { } template bitvector_const_iterator::bitvector_const_iterator(const reference_type& reference) : mReference(reference) { } template bitvector_const_iterator::bitvector_const_iterator(const bitvector_const_iterator& other) : mReference(other.mReference) { } template bitvector_const_iterator& bitvector_const_iterator::operator++() { ++mReference.mnBitIndex; if(mReference.mnBitIndex == kBitCount) { ++mReference.mpBitWord; mReference.mnBitIndex = 0; } return *this; } template bitvector_const_iterator& bitvector_const_iterator::operator--() { if(mReference.mnBitIndex == 0) { --mReference.mpBitWord; mReference.mnBitIndex = kBitCount; } --mReference.mnBitIndex; return *this; } template bitvector_const_iterator bitvector_const_iterator::operator++(int) { bitvector_const_iterator copy(*this); ++*this; return copy; } template bitvector_const_iterator bitvector_const_iterator::operator--(int) { bitvector_const_iterator copy(*this); --*this; return copy; } template bitvector_const_iterator& bitvector_const_iterator::operator+=(difference_type n) { n += mReference.mnBitIndex; if(n >= difference_type(0)) { mReference.mpBitWord += n / kBitCount; mReference.mnBitIndex = (size_type)(n % kBitCount); } else { // backwards is tricky // figure out how many full words backwards we need to move // n = [-1..-32] => 1 // n = [-33..-64] => 2 const size_type backwards = (size_type)(-n + kBitCount - 1); mReference.mpBitWord -= backwards / kBitCount; // -1 => 31; backwards = 32; 31 - (backwards % 32) = 31 // -2 => 30; backwards = 33; 31 - (backwards % 32) = 30 // -3 => 29; backwards = 34 // .. // -32 => 0; backwards = 63; 31 - (backwards % 32) = 0 // -33 => 31; backwards = 64; 31 - (backwards % 32) = 31 mReference.mnBitIndex = (kBitCount - 1) - (backwards % kBitCount); } return *this; } template bitvector_const_iterator& bitvector_const_iterator::operator-=(difference_type n) { return (*this += -n); } template bitvector_const_iterator bitvector_const_iterator::operator+(difference_type n) const { bitvector_const_iterator copy(*this); copy += n; return copy; } template bitvector_const_iterator bitvector_const_iterator::operator-(difference_type n) const { bitvector_const_iterator copy(*this); copy -= n; return copy; } template typename bitvector_const_iterator::difference_type bitvector_const_iterator::operator-(const this_type& rhs) const { return ((mReference.mpBitWord - rhs.mReference.mpBitWord) * kBitCount) + mReference.mnBitIndex - rhs.mReference.mnBitIndex; } template bool bitvector_const_iterator::operator==(const this_type& rhs) const { return (mReference.mpBitWord == rhs.mReference.mpBitWord) && (mReference.mnBitIndex == rhs.mReference.mnBitIndex); } template bool bitvector_const_iterator::operator!=(const this_type& rhs) const { return !(*this == rhs); } template bool bitvector_const_iterator::operator<(const this_type& rhs) const { return (mReference.mpBitWord < rhs.mReference.mpBitWord) || ((mReference.mpBitWord == rhs.mReference.mpBitWord) && (mReference.mnBitIndex < rhs.mReference.mnBitIndex)); } template bool bitvector_const_iterator::operator<=(const this_type& rhs) const { return (mReference.mpBitWord < rhs.mReference.mpBitWord) || ((mReference.mpBitWord == rhs.mReference.mpBitWord) && (mReference.mnBitIndex <= rhs.mReference.mnBitIndex)); } template bool bitvector_const_iterator::operator>(const this_type& rhs) const { return !(*this <= rhs); } template bool bitvector_const_iterator::operator>=(const this_type& rhs) const { return !(*this < rhs); } template bool bitvector_const_iterator::operator*() const { return mReference; } template bool bitvector_const_iterator::operator[](difference_type n) const { return *(*this + n); } template bitvector_const_iterator& bitvector_const_iterator::operator= (const this_type& rhs) { mReference.CopyFrom(rhs.mReference); return *this; } template int bitvector_const_iterator::validate(const Element* pStart, const Element* pEnd, eastl_size_t nExtraBits) const { const Element* const pCurrent = mReference.mpBitWord; if(pCurrent >= pStart) { if(nExtraBits == 0) { if(pCurrent == pEnd && mReference) return eastl::isf_valid | eastl::isf_current; else if(pCurrent < pEnd) return eastl::isf_valid | eastl::isf_current | eastl::isf_can_dereference; } else if(pCurrent == (pEnd - 1)) { const size_type bit = mReference.mnBitIndex; const size_type lastbit = kBitCount - nExtraBits; if(bit == lastbit) return eastl::isf_valid | eastl::isf_current; else if(bit < lastbit) return eastl::isf_valid | eastl::isf_current | eastl::isf_can_dereference; } else if(pCurrent < pEnd) { return eastl::isf_valid | eastl::isf_current | eastl::isf_can_dereference; } } return eastl::isf_none; } /////////////////////////////////////////////////////////////////////// // bitvector_iterator /////////////////////////////////////////////////////////////////////// template bitvector_iterator::bitvector_iterator() : base_type() { } template bitvector_iterator::bitvector_iterator(Element* p, eastl_size_t i) : base_type(p, i) { } template bitvector_iterator::bitvector_iterator(reference_type& reference) : base_type(reference) { } template typename bitvector_iterator::reference_type bitvector_iterator::operator*() const { return base_type::mReference; } template typename bitvector_iterator::reference_type bitvector_iterator::operator[](difference_type n) const { return *(*this + n); } template void MoveBits(bitvector_iterator start, bitvector_iterator end, bitvector_iterator dest) { // Slow implemenation; could optimize by moving a word at a time. if(dest <= start) { while(start != end) { *dest = *start; ++dest; ++start; } } else { // Need to move backwards dest += (end - start); while(start != end) { --dest; --end; *dest = *end; } } } template bitvector_iterator bitvector_iterator::operator++(int) { bitvector_iterator copy(*this); ++*this; return copy; } template bitvector_iterator bitvector_iterator::operator--(int) { bitvector_iterator copy(*this); --*this; return copy; } template bitvector_iterator bitvector_iterator::operator+(difference_type n) const { bitvector_iterator copy(*this); copy += n; return copy; } template bitvector_iterator bitvector_iterator::operator-(difference_type n) const { bitvector_iterator copy(*this); copy -= n; return copy; } /////////////////////////////////////////////////////////////////////// // bitvector /////////////////////////////////////////////////////////////////////// template template void bitvector::assign(InputIterator first, InputIterator last) { // To consider: We can maybe specialize this on bitvector_iterator to do a fast bitwise copy. // We can also specialize for random access iterators to figure out the size & reserve first. clear(); while(first != last) { push_back(*first); ++first; } } template typename bitvector::iterator bitvector::begin() EA_NOEXCEPT { return iterator(mContainer.begin(), 0); } template typename bitvector::const_iterator bitvector::begin() const EA_NOEXCEPT { return const_iterator(mContainer.begin(), 0); } template typename bitvector::const_iterator bitvector::cbegin() const EA_NOEXCEPT { return const_iterator(mContainer.begin(), 0); } template typename bitvector::iterator bitvector::end() EA_NOEXCEPT { return iterator(mContainer.end(), 0) - mFreeBitCount; } template typename bitvector::const_iterator bitvector::end() const EA_NOEXCEPT { return const_iterator(mContainer.end(), 0) - mFreeBitCount; } template typename bitvector::const_iterator bitvector::cend() const EA_NOEXCEPT { return const_iterator(mContainer.end(), 0) - mFreeBitCount; } template bool bitvector::empty() const EA_NOEXCEPT { return mContainer.empty(); } template typename bitvector::size_type bitvector::size() const EA_NOEXCEPT { return (mContainer.size() * kBitCount) - mFreeBitCount; } template typename bitvector::size_type bitvector::capacity() const EA_NOEXCEPT { return mContainer.capacity() * kBitCount; } template void bitvector::set_capacity(size_type n) { if(n == npos) mContainer.set_capacity(npos); else mContainer.set_capacity((n + kBitCount - 1) / kBitCount); } template typename bitvector::reverse_iterator bitvector::rbegin() EA_NOEXCEPT { return reverse_iterator(end()); } template typename bitvector::const_reverse_iterator bitvector::rbegin() const EA_NOEXCEPT { return const_reverse_iterator(end()); } template typename bitvector::const_reverse_iterator bitvector::crbegin() const EA_NOEXCEPT { return const_reverse_iterator(end()); } template typename bitvector::reverse_iterator bitvector::rend() EA_NOEXCEPT { return reverse_iterator(begin()); } template typename bitvector::const_reverse_iterator bitvector::rend() const EA_NOEXCEPT { return const_reverse_iterator(begin()); } template typename bitvector::const_reverse_iterator bitvector::crend() const EA_NOEXCEPT { return const_reverse_iterator(begin()); } template typename bitvector::reference bitvector::front() { EASTL_ASSERT(!empty()); return reference(&mContainer[0], 0); } template typename bitvector::const_reference bitvector::front() const { EASTL_ASSERT(!empty()); // To consider: make a better solution to this than const_cast. return reference(const_cast(&mContainer[0]), 0); } template typename bitvector::reference bitvector::back() { EASTL_ASSERT(!empty()); return *(--end()); } template typename bitvector::const_reference bitvector::back() const { EASTL_ASSERT(!empty()); return *(--end()); } template void bitvector::push_back() { if(!mFreeBitCount) { mContainer.push_back(); mFreeBitCount = kBitCount; } --mFreeBitCount; } template void bitvector::push_back(value_type value) { push_back(); *--end() = value; } template void bitvector::pop_back() { EASTL_ASSERT(!empty()); if(++mFreeBitCount == kBitCount) { mContainer.pop_back(); mFreeBitCount = 0; } } template void bitvector::reserve(size_type n) { const size_type wordCount = (n + kBitCount - 1) / kBitCount; mContainer.reserve(wordCount); } template void bitvector::resize(size_type n) { const size_type wordCount = (n + kBitCount - 1) / kBitCount; const size_type extra = (wordCount * kBitCount) - n; mContainer.resize(wordCount); mFreeBitCount = extra; } template void bitvector::resize(size_type n, value_type value) { const size_type s = size(); if(n < s) resize(n); // Fill up to the end of a word size_type newbits = n - s; while(mFreeBitCount && newbits) { push_back(value); --newbits; } // Fill the rest a word at a time if(newbits) { element_type element(0); if(value) element = ~element; const size_type words = (n + kBitCount - 1) / kBitCount; const size_type extra = words * kBitCount - n; mContainer.resize(words, element); mFreeBitCount = extra; } } template bool bitvector::test(size_type n, bool defaultValue) const { if(n < size()) return *(begin() + (difference_type)n); return defaultValue; } template void bitvector::set(size_type n, bool value) { if(EASTL_UNLIKELY(n >= size())) resize(n + 1); *(begin() + (difference_type)n) = value; } template typename bitvector::reference bitvector::at(size_type n) { // The difference between at and operator[] is that at signals // if the requested position is out of range by throwing an // out_of_range exception. #if EASTL_EXCEPTIONS_ENABLED if(EASTL_UNLIKELY(n >= size())) throw std::out_of_range("bitvector::at -- out of range"); #elif EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(n >= size())) EASTL_FAIL_MSG("bitvector::at -- out of range"); #endif return *(begin() + (difference_type)n); } template typename bitvector::const_reference bitvector::at(size_type n) const { #if EASTL_EXCEPTIONS_ENABLED if(EASTL_UNLIKELY(n >= size())) throw std::out_of_range("bitvector::at -- out of range"); #elif EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(n >= size())) EASTL_FAIL_MSG("bitvector::at -- out of range"); #endif return *(begin() + (difference_type)n); } template typename bitvector::reference bitvector::operator[](size_type n) { return *(begin() + (difference_type)n); } template typename bitvector::const_reference bitvector::operator[](size_type n) const { return *(begin() + (difference_type)n); } /* template template typename bitvector::iterator bitvector::find_first() { return begin(); } template iterator find_next(const_iterator it); template iterator find_last(); template iterator find_prev(const_iterator it); template const_iterator find_first() const; template const_iterator find_next(const_iterator it) const; template const_iterator find_last() const; template const_iterator find_prev(const_iterator it) const; */ template inline typename bitvector::container_type& bitvector::get_container() { return mContainer; } template inline const typename bitvector::container_type& bitvector::get_container() const { return mContainer; } template bool bitvector::validate() const { if(!mContainer.validate()) return false; if((unsigned)mFreeBitCount >= kBitCount) return false; return true; } template int bitvector::validate_iterator(const_iterator i) const { return i.validate(mContainer.begin(), mContainer.end(), mFreeBitCount); } template typename bitvector::element_type* bitvector::data() EA_NOEXCEPT { return mContainer.data(); } template const typename bitvector::element_type* bitvector::data() const EA_NOEXCEPT { return mContainer.data(); } template typename bitvector::iterator bitvector::insert(const_iterator position, value_type value) { iterator iPosition(position.get_reference_type()); // This is just a non-const version of position. #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(validate_iterator(iPosition) & eastl::isf_valid) == 0) EASTL_FAIL_MSG("bitvector::insert -- invalid iterator"); #endif // Save because we might reallocate const typename iterator::difference_type n = iPosition - begin(); push_back(); iPosition = begin() + n; MoveBits(iPosition, --end(), ++iterator(iPosition)); *iPosition = value; return iPosition; } template void bitvector::insert(const_iterator position, size_type n, value_type value) { iterator iPosition(position.get_reference_type()); // This is just a non-const version of position. #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(validate_iterator(iPosition) & eastl::isf_valid) == 0) EASTL_FAIL_MSG("bitvector::insert -- invalid iterator"); #endif // Save because we might reallocate. const typename iterator::difference_type p = iPosition - begin(); resize(size() + n); iPosition = begin() + p; iterator insert_end = iPosition + n; MoveBits(iPosition, end() - n, insert_end); // To do: Optimize this to word-at-a-time for large inserts while(iPosition != insert_end) { *iPosition = value; ++iPosition; } } /* The following is a placeholder for a future implementation. It turns out that a correct implementation of insert(pos, first, last) is a non-trivial exercise that would take a few hours to implement and test. The reasons why involve primarily the problem of handling the case where insertion source comes from within the container itself, and the case that first and last (note they are templated) might not refer to iterators might refer to a value/count pair. The C++ Standard requires you to handle this case and I (Paul Pedriana) believe that it applies even for a bitvector, given that bool is an integral type. So you have to set up a compile-time type traits function chooser. See vector, for example. template template void bitvector::insert(const_iterator position, InputIterator first, InputIterator last) { iterator iPosition(position.get_reference_type()); // This is just a non-const version of position. // This implementation is probably broken due to not handling insertion into self. // To do: Make a more efficient version of this. difference_type distance = (iPosition - begin()); while(first != last) { insert(iPosition, *first); iPosition = begin() + ++distance; ++first; } } */ template typename bitvector::iterator bitvector::erase(const_iterator position) { iterator iPosition(position.get_reference_type()); // This is just a non-const version of position. #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(validate_iterator(iPosition) & eastl::isf_can_dereference) == 0) EASTL_FAIL_MSG("bitvector::erase -- invalid iterator"); #endif MoveBits(++iterator(iPosition), end(), iPosition); resize(size() - 1); // Verify that no reallocation occurred. EASTL_ASSERT(validate_iterator(iPosition) & eastl::isf_valid); return iPosition; } template typename bitvector::iterator bitvector::erase(const_iterator first, const_iterator last) { iterator iFirst(first.get_reference_type()); // This is just a non-const version of first. iterator iLast(last.get_reference_type()); // This is just a non-const version of last. #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(validate_iterator(iLast) & eastl::isf_valid) == 0) EASTL_FAIL_MSG("bitvector::erase -- invalid iterator"); #endif if(!(iFirst == iLast)) { #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(validate_iterator(iFirst) & eastl::isf_can_dereference) == 0) EASTL_FAIL_MSG("bitvector::erase -- invalid iterator"); #endif const size_type eraseCount = (size_type)(iLast - iFirst); MoveBits(iLast, end(), iFirst); resize(size() - eraseCount); // Verify that no reallocation occurred. #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(validate_iterator(iFirst) & eastl::isf_valid) == 0) EASTL_FAIL_MSG("bitvector::erase -- invalid iterator"); #endif } return iFirst; } template typename bitvector::reverse_iterator bitvector::erase(const_reverse_iterator position) { return reverse_iterator(erase((++position).base())); } template typename bitvector::reverse_iterator bitvector::erase(const_reverse_iterator first, const_reverse_iterator last) { // Version which erases in order from first to last. // difference_type i(first.base() - last.base()); // while(i--) // first = erase(first); // return first; // Version which erases in order from last to first, but is slightly more efficient: return reverse_iterator(erase(last.base(), first.base())); } template void bitvector::swap(this_type& rhs) { mContainer.swap(rhs.mContainer); eastl::swap(mFreeBitCount, rhs.mFreeBitCount); } template void bitvector::reset_lose_memory() { mContainer.reset_lose_memory(); // intentional memory leak. mFreeBitCount = 0; } template void bitvector::clear() { mContainer.clear(); mFreeBitCount = 0; } template bitvector::bitvector() : mContainer(), mFreeBitCount(0) { } template bitvector::bitvector(const allocator_type& allocator) : mContainer(allocator), mFreeBitCount(0) { } template bitvector::bitvector(size_type n, const allocator_type& allocator) : mContainer((n + kBitCount - 1) / kBitCount, allocator) { mFreeBitCount = kBitCount - (n % kBitCount); if(mFreeBitCount == kBitCount) mFreeBitCount = 0; } template bitvector::bitvector(size_type n, value_type value, const allocator_type& allocator) : mContainer((n + kBitCount - 1) / kBitCount, value ? ~element_type(0) : element_type(0), allocator) { mFreeBitCount = kBitCount - (n % kBitCount); if(mFreeBitCount == kBitCount) mFreeBitCount = 0; } template template bitvector::bitvector(InputIterator first, InputIterator last) : mContainer(), mFreeBitCount(0) { assign(first, last); } template bool bitvector::any() const { if (mContainer.size() == 0) return false; for (eastl_size_t i = 0, count = mContainer.size() - 1; i < count; ++i) { if (mContainer[i] != 0) return true; } Element mask = mFreeBitCount == 0 ? (Element)-1 : ((Element(1) << Element(kBitCount - mFreeBitCount)) - 1); return (mContainer.back() & mask); } template bool bitvector::all() const { if (mContainer.size() == 0) return true; for (eastl_size_t i = 0, count = mContainer.size() - 1; i < count; ++i) { if (mContainer[i] != (~(Element)0)) { return false; } } Element mask = mFreeBitCount == 0 ? (Element)-1 : ((Element(1) << Element(kBitCount - mFreeBitCount)) - 1); return (mContainer.back() & mask) == mask; } /////////////////////////////////////////////////////////////////////// // global operators /////////////////////////////////////////////////////////////////////// template inline bool operator==(const bitvector& a, const bitvector& b) { // To do: Replace this with a smart compare implementation. This is much slower than it needs to be. return ((a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin())); } template inline bool operator!=(const bitvector& a, const bitvector& b) { return !operator==(a, b); } template inline bool operator<(const bitvector& a, const bitvector& b) { // To do: Replace this with a smart compare implementation. This is much slower than it needs to be. return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); } template inline bool operator>(const bitvector& a, const bitvector& b) { return b < a; } template inline bool operator<=(const bitvector& a, const bitvector& b) { return !(b < a); } template inline bool operator>=(const bitvector& a, const bitvector& b) { return !(a < b); } template inline void swap(bitvector& a, bitvector& b) { a.swap(b); } template > using fixed_bitvector = eastl::bitvector; } // namespace eastl EA_RESTORE_VC_WARNING(); #endif // Header include guard ================================================ FILE: include/EASTL/bonus/adaptors.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_ADAPTORS_H #define EASTL_ADAPTORS_H #include #include #include #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif EA_DISABLE_VC_WARNING(4512 4626) #if defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015+ EA_DISABLE_VC_WARNING(5027) // move assignment operator was implicitly defined as deleted #endif namespace eastl { /// reverse /// /// This adaptor allows reverse iteration of a container in ranged base for-loops. /// /// for (auto& i : reverse(c)) { ... } /// template struct reverse_wrapper { template reverse_wrapper(C&& c) : mContainer(eastl::forward(c)) { /** * NOTE: * * Due to reference collapsing rules of universal references Container type is either * * const C& if the input is a const lvalue * C& if the input is a non-const lvalue * C if the input is an rvalue * const C if the input is a const rvalue thus the object will have to be copied and the copy-ctor will be called * * * Thus we either move the whole container into this object or take a reference to the lvalue avoiding the copy. * The static_assert below ensures this. */ static_assert(eastl::is_same_v, "Reference collapsed deduced type must be the same as the deduced Container type!"); } Container mContainer; }; template auto begin(const reverse_wrapper& w) -> decltype(eastl::rbegin(w.mContainer)) { return eastl::rbegin(w.mContainer); } template auto end(const reverse_wrapper& w) -> decltype(eastl::rend(w.mContainer)) { return eastl::rend(w.mContainer); } template reverse_wrapper reverse(Container&& c) { return reverse_wrapper(eastl::forward(c)); } } // namespace eastl #if defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015+ EA_RESTORE_VC_WARNING() #endif EA_RESTORE_VC_WARNING() #endif // Header include guard ================================================ FILE: include/EASTL/bonus/call_traits.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // The design for call_traits here is very similar to that found in template // metaprogramming libraries such as Boost, GCC, and Metrowerks, given that // these libraries have established this interface as a defacto standard for // solving this problem. Also, these are described in various books on the // topic of template metaprogramming, such as "Modern C++ Design". // // See http://www.boost.org/libs/utility/call_traits.htm or search for // call_traits in Google for a description of call_traits. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_CALL_TRAITS_H #define EASTL_CALL_TRAITS_H #include #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif namespace eastl { template struct ct_imp2 { typedef const T& param_type; }; template struct ct_imp2 { typedef const T param_type; }; template struct ct_imp { typedef const T& param_type; }; template struct ct_imp { typedef typename ct_imp2::param_type param_type; }; template struct ct_imp { typedef T const param_type; }; template struct call_traits { public: typedef T value_type; typedef T& reference; typedef const T& const_reference; typedef typename ct_imp::value, is_arithmetic::value>::param_type param_type; }; template struct call_traits { typedef T& value_type; typedef T& reference; typedef const T& const_reference; typedef T& param_type; }; template struct call_traits { private: typedef T array_type[N]; public: typedef const T* value_type; typedef array_type& reference; typedef const array_type& const_reference; typedef const T* const param_type; }; template struct call_traits { private: typedef const T array_type[N]; public: typedef const T* value_type; typedef array_type& reference; typedef const array_type& const_reference; typedef const T* const param_type; }; } // namespace eastl #endif // Header include guard ================================================ FILE: include/EASTL/bonus/compressed_pair.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // The compressed pair class is very similar to std::pair, but if either of the // template arguments are empty classes, then the "empty base-class optimization" // is applied to compress the size of the pair. // // The design for compressed_pair here is very similar to that found in template // metaprogramming libraries such as Boost, GCC, and Metrowerks, given that // these libraries have established this interface as a defacto standard for // solving this problem. Also, these are described in various books on the // topic of template metaprogramming, such as "Modern C++ Design". // // template // class compressed_pair // { // public: // typedef T1 first_type; // typedef T2 second_type; // typedef typename call_traits::param_type first_param_type; // typedef typename call_traits::param_type second_param_type; // typedef typename call_traits::reference first_reference; // typedef typename call_traits::reference second_reference; // typedef typename call_traits::const_reference first_const_reference; // typedef typename call_traits::const_reference second_const_reference; // // compressed_pair() : base() {} // compressed_pair(first_param_type x, second_param_type y); // explicit compressed_pair(first_param_type x); // explicit compressed_pair(second_param_type y); // // compressed_pair& operator=(const compressed_pair&); // // first_reference first(); // first_const_reference first() const; // // second_reference second(); // second_const_reference second() const; // // void swap(compressed_pair& y); // }; // // The two members of the pair can be accessed using the member functions first() // and second(). Note that not all member functions can be instantiated for all // template parameter types. In particular compressed_pair can be instantiated for // reference and array types, however in these cases the range of constructors that // can be used are limited. If types T1 and T2 are the same type, then there is // only one version of the single-argument constructor, and this constructor // initialises both values in the pair to the passed value. // // Note that compressed_pair can not be instantiated if either of the template // arguments is a union type, unless there is compiler support for is_union, // or if is_union is specialised for the union type. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_COMPRESSED_PAIR_H #define EASTL_COMPRESSED_PAIR_H #include #include #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif #if defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015 or later EA_DISABLE_VC_WARNING(4626 5027) // warning C4626: 'eastl::compressed_pair_imp': assignment operator was implicitly defined as deleted because a base class assignment operator is inaccessible or deleted #endif namespace eastl { template class compressed_pair; template struct compressed_pair_switch; template struct compressed_pair_switch{ static const int value = 0; }; template struct compressed_pair_switch { static const int value = 1; }; template struct compressed_pair_switch { static const int value = 2; }; template struct compressed_pair_switch { static const int value = 3; }; template struct compressed_pair_switch { static const int value = 4; }; template struct compressed_pair_switch { static const int value = 5; }; template class compressed_pair_imp; template inline void cp_swap(T& t1, T& t2) { T tTemp = t1; t1 = t2; t2 = tTemp; } // Derive from neither template class compressed_pair_imp { public: typedef T1 first_type; typedef T2 second_type; typedef typename call_traits::param_type first_param_type; typedef typename call_traits::param_type second_param_type; typedef typename call_traits::reference first_reference; typedef typename call_traits::reference second_reference; typedef typename call_traits::const_reference first_const_reference; typedef typename call_traits::const_reference second_const_reference; compressed_pair_imp() {} compressed_pair_imp(first_param_type x, second_param_type y) : mFirst(x), mSecond(y) {} compressed_pair_imp(first_param_type x) : mFirst(x) {} compressed_pair_imp(second_param_type y) : mSecond(y) {} first_reference first() { return mFirst; } first_const_reference first() const { return mFirst; } second_reference second() { return mSecond; } second_const_reference second() const { return mSecond; } void swap(compressed_pair& y) { cp_swap(mFirst, y.first()); cp_swap(mSecond, y.second()); } private: first_type mFirst; second_type mSecond; }; // Derive from T1 template class compressed_pair_imp : private T1 { public: typedef T1 first_type; typedef T2 second_type; typedef typename call_traits::param_type first_param_type; typedef typename call_traits::param_type second_param_type; typedef typename call_traits::reference first_reference; typedef typename call_traits::reference second_reference; typedef typename call_traits::const_reference first_const_reference; typedef typename call_traits::const_reference second_const_reference; compressed_pair_imp() {} compressed_pair_imp(first_param_type x, second_param_type y) : first_type(x), mSecond(y) {} compressed_pair_imp(first_param_type x) : first_type(x) {} compressed_pair_imp(second_param_type y) : mSecond(y) {} first_reference first() { return *this; } first_const_reference first() const { return *this; } second_reference second() { return mSecond; } second_const_reference second() const { return mSecond; } void swap(compressed_pair& y) { // No need to swap empty base class cp_swap(mSecond, y.second()); } private: second_type mSecond; }; // Derive from T2 template class compressed_pair_imp : private T2 { public: typedef T1 first_type; typedef T2 second_type; typedef typename call_traits::param_type first_param_type; typedef typename call_traits::param_type second_param_type; typedef typename call_traits::reference first_reference; typedef typename call_traits::reference second_reference; typedef typename call_traits::const_reference first_const_reference; typedef typename call_traits::const_reference second_const_reference; compressed_pair_imp() {} compressed_pair_imp(first_param_type x, second_param_type y) : second_type(y), mFirst(x) {} compressed_pair_imp(first_param_type x) : mFirst(x) {} compressed_pair_imp(second_param_type y) : second_type(y) {} first_reference first() { return mFirst; } first_const_reference first() const { return mFirst; } second_reference second() { return *this; } second_const_reference second() const { return *this; } void swap(compressed_pair& y) { // No need to swap empty base class cp_swap(mFirst, y.first()); } private: first_type mFirst; }; // Derive from T1 and T2 template class compressed_pair_imp : private T1, private T2 { public: typedef T1 first_type; typedef T2 second_type; typedef typename call_traits::param_type first_param_type; typedef typename call_traits::param_type second_param_type; typedef typename call_traits::reference first_reference; typedef typename call_traits::reference second_reference; typedef typename call_traits::const_reference first_const_reference; typedef typename call_traits::const_reference second_const_reference; compressed_pair_imp() {} compressed_pair_imp(first_param_type x, second_param_type y) : first_type(x), second_type(y) {} compressed_pair_imp(first_param_type x) : first_type(x) {} compressed_pair_imp(second_param_type y) : second_type(y) {} first_reference first() { return *this; } first_const_reference first() const { return *this; } second_reference second() { return *this; } second_const_reference second() const { return *this; } // No need to swap empty bases void swap(compressed_pair&) { } }; // T1 == T2, T1 and T2 are both empty // Note does not actually store an instance of T2 at all; // but reuses T1 base class for both first() and second(). template class compressed_pair_imp : private T1 { public: typedef T1 first_type; typedef T2 second_type; typedef typename call_traits::param_type first_param_type; typedef typename call_traits::param_type second_param_type; typedef typename call_traits::reference first_reference; typedef typename call_traits::reference second_reference; typedef typename call_traits::const_reference first_const_reference; typedef typename call_traits::const_reference second_const_reference; compressed_pair_imp() {} compressed_pair_imp(first_param_type x, second_param_type) : first_type(x) {} compressed_pair_imp(first_param_type x) : first_type(x) {} first_reference first() { return *this; } first_const_reference first() const { return *this; } second_reference second() { return *this; } second_const_reference second() const { return *this; } void swap(compressed_pair&) { } }; // T1 == T2 and are not empty template class compressed_pair_imp { public: typedef T1 first_type; typedef T2 second_type; typedef typename call_traits::param_type first_param_type; typedef typename call_traits::param_type second_param_type; typedef typename call_traits::reference first_reference; typedef typename call_traits::reference second_reference; typedef typename call_traits::const_reference first_const_reference; typedef typename call_traits::const_reference second_const_reference; compressed_pair_imp() {} compressed_pair_imp(first_param_type x, second_param_type y) : mFirst(x), mSecond(y) {} compressed_pair_imp(first_param_type x) : mFirst(x), mSecond(x) {} first_reference first() { return mFirst; } first_const_reference first() const { return mFirst; } second_reference second() { return mSecond; } second_const_reference second() const { return mSecond; } void swap(compressed_pair& y) { cp_swap(mFirst, y.first()); cp_swap(mSecond, y.second()); } private: first_type mFirst; second_type mSecond; }; template class compressed_pair : private compressed_pair_imp::type, typename remove_cv::type>::value, is_empty::value, is_empty::value>::value> { private: typedef compressed_pair_imp::type, typename remove_cv::type>::value, is_empty::value, is_empty::value>::value> base; public: typedef T1 first_type; typedef T2 second_type; typedef typename call_traits::param_type first_param_type; typedef typename call_traits::param_type second_param_type; typedef typename call_traits::reference first_reference; typedef typename call_traits::reference second_reference; typedef typename call_traits::const_reference first_const_reference; typedef typename call_traits::const_reference second_const_reference; compressed_pair() : base() {} compressed_pair(first_param_type x, second_param_type y) : base(x, y) {} explicit compressed_pair(first_param_type x) : base(x) {} explicit compressed_pair(second_param_type y) : base(y) {} first_reference first() { return base::first(); } first_const_reference first() const { return base::first(); } second_reference second() { return base::second(); } second_const_reference second() const { return base::second(); } void swap(compressed_pair& y) { base::swap(y); } }; // Partial specialisation for case where T1 == T2: template class compressed_pair : private compressed_pair_imp::type, typename remove_cv::type>::value, is_empty::value, is_empty::value>::value> { private: typedef compressed_pair_imp::type, typename remove_cv::type>::value, is_empty::value, is_empty::value>::value> base; public: typedef T first_type; typedef T second_type; typedef typename call_traits::param_type first_param_type; typedef typename call_traits::param_type second_param_type; typedef typename call_traits::reference first_reference; typedef typename call_traits::reference second_reference; typedef typename call_traits::const_reference first_const_reference; typedef typename call_traits::const_reference second_const_reference; compressed_pair() : base() {} compressed_pair(first_param_type x, second_param_type y) : base(x, y) {} explicit compressed_pair(first_param_type x) : base(x) {} first_reference first() { return base::first(); } first_const_reference first() const { return base::first(); } second_reference second() { return base::second(); } second_const_reference second() const { return base::second(); } void swap(compressed_pair& y) { base::swap(y); } }; template inline void swap(compressed_pair& x, compressed_pair& y) { x.swap(y); } } // namespace eastl #if defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015 or later EA_RESTORE_VC_WARNING() #endif #endif // Header include guard ================================================ FILE: include/EASTL/bonus/fixed_ring_buffer.h ================================================ /////////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_FIXED_RING_BUFFER_H #define EASTL_FIXED_RING_BUFFER_H #include #include #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif namespace eastl { /// fixed_ring_buffer /// /// This is a convenience template alias for creating a fixed-sized /// ring_buffer using eastl::fixed_vector as its storage container. This has /// been tricky for users to get correct due to the constructor requirements /// of eastl::ring_buffer leaking the implementation detail of the sentinel /// value being used internally. In addition, it was not obvious what the /// correct allocator_type template parameter should be used for containers /// providing both a default allocator type and an overflow allocator type. /// /// We are over-allocating the fixed_vector container to accommodate the /// ring_buffer sentinel to prevent that implementation detail leaking into /// user code. /// /// Example usage: /// /// fixed_ring_buffer rb = {0, 1, 2, 3, 4, 5, 6, 7}; /// or /// fixed_ring_buffer rb(8); // capacity doesn't need to respect sentinel /// rb.push_back(0); /// /// #if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) template using fixed_ring_buffer = ring_buffer, typename fixed_vector::overflow_allocator_type>; #endif } // namespace eastl #endif // Header include guard ================================================ FILE: include/EASTL/bonus/fixed_tuple_vector.h ================================================ /////////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_FIXEDTUPLEVECTOR_H #define EASTL_FIXEDTUPLEVECTOR_H #include #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif namespace eastl { /// EASTL_FIXED_TUPLE_VECTOR_DEFAULT_NAME /// /// Defines a default container name in the absence of a user-provided name. /// In the case of fixed-size containers, the allocator name always refers /// to overflow allocations. /// #ifndef EASTL_FIXED_TUPLE_VECTOR_DEFAULT_NAME #define EASTL_FIXED_TUPLE_VECTOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_tuple_vector" // Unless the user overrides something, this is "EASTL fixed_vector". #endif /// EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR /// #ifndef EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR #define EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_TUPLE_VECTOR_DEFAULT_NAME) #endif // External interface of fixed_tuple_vector template class fixed_tuple_vector : public TupleVecInternal::TupleVecImpl::GetTotalAllocationSize(nodeCount, 0), 1, TupleVecInternal::TupleRecurser::GetTotalAlignment(), 0, bEnableOverflow, EASTLAllocatorType>, make_index_sequence, Ts...> { public: typedef fixed_vector_allocator< TupleVecInternal::TupleRecurser::GetTotalAllocationSize(nodeCount, 0), 1, TupleVecInternal::TupleRecurser::GetTotalAlignment(), 0, bEnableOverflow, EASTLAllocatorType> fixed_allocator_type; typedef aligned_buffer aligned_buffer_type; typedef fixed_tuple_vector this_type; typedef EASTLAllocatorType overflow_allocator_type; typedef TupleVecInternal::TupleVecImpl, Ts...> base_type; typedef typename base_type::size_type size_type; private: aligned_buffer_type mBuffer; public: fixed_tuple_vector() : base_type(fixed_allocator_type(mBuffer.buffer), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { } fixed_tuple_vector(const overflow_allocator_type& allocator) : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { } fixed_tuple_vector(this_type&& x) : base_type(fixed_allocator_type(mBuffer.buffer), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { base_type::get_allocator().copy_overflow_allocator(x.get_allocator()); base_type::DoInitFromIterator(eastl::make_move_iterator(x.begin()), eastl::make_move_iterator(x.end())); x.clear(); } fixed_tuple_vector(this_type&& x, const overflow_allocator_type& allocator) : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { base_type::DoInitFromIterator(eastl::make_move_iterator(x.begin()), eastl::make_move_iterator(x.end())); x.clear(); } fixed_tuple_vector(const this_type& x) : base_type(fixed_allocator_type(mBuffer.buffer), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { base_type::get_allocator().copy_overflow_allocator(x.get_allocator()); base_type::DoInitFromIterator(x.begin(), x.end()); } fixed_tuple_vector(const this_type& x, const overflow_allocator_type& allocator) : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { base_type::DoInitFromIterator(x.begin(), x.end()); } template fixed_tuple_vector(move_iterator begin, move_iterator end, const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR) : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { base_type::DoInitFromIterator(begin, end); } template fixed_tuple_vector(Iterator begin, Iterator end, const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR) : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { base_type::DoInitFromIterator(begin, end); } fixed_tuple_vector(size_type n, const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR) : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { base_type::DoInitDefaultFill(n); } fixed_tuple_vector(size_type n, const Ts&... args) : base_type(fixed_allocator_type(mBuffer.buffer), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { base_type::DoInitFillArgs(n, args...); } fixed_tuple_vector(size_type n, const Ts&... args, const overflow_allocator_type& allocator) : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { base_type::DoInitFillArgs(n, args...); } fixed_tuple_vector(size_type n, typename base_type::const_reference_tuple tup, const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR) : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { base_type::DoInitFillTuple(n, tup); } fixed_tuple_vector(const typename base_type::value_tuple* first, const typename base_type::value_tuple* last, const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR) : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { base_type::DoInitFromTupleArray(first, last); } fixed_tuple_vector(std::initializer_list iList, const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR) : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) { base_type::DoInitFromTupleArray(iList.begin(), iList.end()); } this_type& operator=(const this_type& other) { base_type::operator=(other); return *this; } this_type& operator=(this_type&& other) { base_type::clear(); // OK to call DoInitFromIterator in a non-ctor scenario because clear() reset everything, more-or-less base_type::DoInitFromIterator(eastl::make_move_iterator(other.begin()), eastl::make_move_iterator(other.end())); other.clear(); return *this; } this_type& operator=(std::initializer_list iList) { base_type::operator=(iList); return *this; } void swap(this_type& x) { // If both containers are using the heap instead of local memory // then we can do a fast pointer swap instead of content swap. if ((has_overflowed() && x.has_overflowed()) && (get_overflow_allocator() == x.get_overflow_allocator())) { base_type::swap(x); } else { // Fixed containers use a special swap that can deal with excessively large buffers. eastl::fixed_swap(*this, x); } } // Returns the max fixed size, which is the user-supplied nodeCount parameter. size_type max_size() const { return nodeCount; } // Returns true if the fixed space has been fully allocated. Note that if overflow is enabled, // the container size can be greater than nodeCount but full() could return true because the // fixed space may have a recently freed slot. bool full() const { return (base_type::mNumElements >= nodeCount) || ((void*)base_type::mpData != (void*)mBuffer.buffer); } // Returns true if the allocations spilled over into the overflow allocator. Meaningful // only if overflow is enabled. bool has_overflowed() const { return ((void*)base_type::mpData != (void*)mBuffer.buffer); } // Returns the value of the bEnableOverflow template parameter. static constexpr bool can_overflow() { return bEnableOverflow; } const overflow_allocator_type& get_overflow_allocator() const { return base_type::get_allocator().get_overflow_allocator(); } }; template inline void swap(fixed_tuple_vector& a, fixed_tuple_vector& b) { a.swap(b); } } // namespace eastl #endif // EASTL_TUPLEVECTOR_H ================================================ FILE: include/EASTL/bonus/flags.h ================================================ // Copyright (c) Electronic Arts Inc. All rights reserved. #pragma once // // introduction // -------------- // // hello there. // // Flags are usually defined on a per-bit basis, and stored in integrals. This file // defines a templated type that encapsulates that logic as a thin-wrapper around an // integral. The benefit here is that [i.e.] enums that are not already shifted can // be seamlessly used if the corresponding flags type knows they're pre-shifted. // // // context // --------- // Here's a really common example where everything is _easy_: // (operators & conversions left out for brevity) // // enum class Dragons : uint32_t { Fire = 1, Ice = 2, Poison = 4, Golden = 8 }; // uint32_t flags = Dragons::Fire | Dragons::Poison; // // Here's a really common example where things are _harder_: // // enum class Knights : uint16_t { Armoured, Cowardly, Shining, Ni }; // uint32_t flags = (1 << Knights::Armoured) | (1 << Knights::Shining); // // Fun fact: the shift operator's resultant type is the type of the LHS, and // bitwise or-operator will promote its arguments of uint16_t to int on 64-bit // systems, meaning there are _many more_ omitted casts then may first appear. // // In the above examples, we would call `Dragons` _post-shifted_, and `Knights` // _pre-shifted_. Example code perhaps trivialises the issue; in large codebases // keeping track of which flags are pre- or post-shifted can be annoying, and // there is genuine need for both mixed within the same code. Awful! // // // basic_flags (prefer using bitflags/maskflags) // ----------------------------------------------- // This structure wraps an integral and provides all common bitwise operations for // that integral. It will automatically shift pre-shifted flags (kind of, you need // to tell it that a flag is pre-shifted (bitflags), or post-shifted (maskflags)). // // This example demonstrates pre-shifted flags being used without any shifting // required on the user's part. If the user had accidentally used eastl::maskflags<> // instead of bitflags<>, then no shifting would occur and there would be _bugs_: // // enum class Knights : uint16_t { Armoured, Cowardly, Shining, Ni }; // using KnightFlags = eastl::bitflags; // // // default construct = all bits zero // KnightFlags knight_useless; // // // takes an initializer_list if so desired // KnightFlags knight_in_shining_armour{Knights::Armoured, Knights::Shining}; // // // supports all typical operators // if (knight_in_shining_armour & Knights::Shining) // { // // ... // } // // // being brave is part of the job description // knight_useless = Knights::Cowardly; // // // integrals would support implicit conversion to bool, so we do too // if (knight_useless) // { // } // // // // // macros for defining flags // --------------------------- // // The following macros are shorthands for defining a flag-type based off // an enum-type. This enum-type can either be known ahead of time, using // EASTL_DECLARE_BITFLAGS or EASTL_DECLARE_MASKFLAGS, or it can be defined // directly then and there, using the "_ENUM_CLASS" versions of the macros. // // These macros help by defining certain bitwise operators for enum-classes. // // // // // EASTL_DECLARE_BITFLAGS // EASTL_DECLARE_MASKFLAGS // ------------------------- // These macros simply take an existing enum-type and define an alias for a // flags-type (bitflags or maskflags) of that enum-type: // // enum class DragonType { Happy, Sad, Drunk }; // // EASTL_DECLARE_BITFLAGS(DragonFlags, DragonType); // // ^ the above macro has expanded to something akin to: // // using DragonFlags = ::eastl::bitflags; // inline constexpr DragonFlags operator | (DragonType lhs, DragonType rhs) { /* snip */ } // // This example shows a post-shifted enum using MASKFLAGS. Note that pre-shifted // flags can be combined and thus more than the bitwise-or operator has been // provided (the bitwse and-operator and xor-operator and included free of charge): // // enum class WizardType // { // Brilliant = 1, Imbecile = 2, Evil = 4, Forgetful = 8, // MaybeHarmless = (Evil | Forgetful) // }; // // EASTL_DECLARE_MASKFLAGS(WizardFlags, WizardType); // // ^ macro expansion for the above macro looks like: // // using WizardFlags = ::eastl::maskflags; // inline constexpr WizardFlags operator | (WizardType lhs, WizardType rhs) { /* snip */ } // inline constexpr WizardFlags operator & (WizardType lhs, WizardType rhs) { /* snip */ } // inline constexpr WizardFlags operator ^ (WizardType lhs, WizardType rhs) { /* snip */ } // // // // // EASTL_DECLARE_BITFLAGS_ENUM_CLASS // EASTL_DECLARE_MASKFLAGS_ENUM_CLASS // ----------------------------------- // These macros are convenience macros to both declare the flag-type, and // in-place define an enum-class (the second parameter). Operators for the // enum-type are automatically generated too: // // Note that this first example is again using BITFLAGS. // // EASTL_DECLARE_BITFLAGS_ENUM_CLASS(DragonFlags, DragonType) // { // Jubilent, // Depressed, // Smashed // }; // // This generates code similar to the following: // // enum class DragonType; // using DragonFlags = ::eastl::bitflags; // inline constexpr DragonFlags operator | (DragonType lhs, DragonType rhs) { /* snip */ } // enum class DragonType // { // Jubilent, // Depressed, // Smashed // }; // // // Note: You will still need to pre-shift your enum values yourself when using // the MASKFLAGS version of these macros, just as if you were writing the enum // without the macro: // // EASTL_DECLARE_MASKFLAGS_ENUM_CLASS(SquireFlags, Squires) // { // Absent = 0, // Inexperienced = (1<<0), // Keen = (1<<1), // Jaded = (1<<2), // Competant = (1<<3), // // FreshOutOfUni = Inexperienced | Keen // }; // // Macro expansion: // // enum class Squires; // using SquireFlags = ::eastl::bitflags; // inline constexpr SquireFlags operator | (Squires lhs, Squires rhs) { /* snip */ } // inline constexpr SquireFlags operator & (Squires lhs, Squires rhs) { /* snip */ } // inline constexpr SquireFlags operator ^ (Squires lhs, Squires rhs) { /* snip */ } // enum class Squires // { // Absent = 0, // Inexperienced = (1<<0), // Keen = (1<<1), // Jaded = (1<<2), // Competant = (1<<3), // // FreshOutOfUni = Inexperienced | Keen // }; // // Usage: // // SquireFlags flags{Squires::FreshOutOfUni}; // if (flags & Squires::Keen) { /* snip */ } // if (flags != Squires::Absent) { /* snip */ } // #include #include // forward-declares namespace eastl { template struct basic_flags; template struct flag_marshaller; // tags to determine between flag-types struct bitflag_tag {}; struct maskflag_tag {}; // aliases template using bitflags = basic_flags>; template using maskflags = basic_flags>; } // the mask-type for an enum of a given size namespace eastl::detail { template struct flags_mask_type; template <> struct flags_mask_type<1> { using type = uint8_t; }; template <> struct flags_mask_type<2> { using type = uint16_t; }; template <> struct flags_mask_type<4> { using type = uint32_t; }; template <> struct flags_mask_type<8> { using type = uint64_t; }; template using flags_mask_type_t = typename flags_mask_type::type; } namespace eastl { template struct basic_flags { using flag_type = Flag; using marshaller_type = Marshaller; using mask_type = detail::flags_mask_type_t; static_assert(is_integral_v, "mask_type must be integral"); static_assert(is_unsigned_v, "mask_type must be unsigned"); // constructors static constexpr basic_flags from_mask(mask_type) noexcept; constexpr basic_flags() noexcept = default; constexpr basic_flags(flag_type) noexcept; constexpr basic_flags(const basic_flags&) = default; constexpr basic_flags(std::initializer_list) noexcept; // explicit conversions constexpr operator bool() const noexcept; constexpr explicit operator mask_type() const noexcept; // standard flags operators constexpr basic_flags& operator = (flag_type) noexcept; constexpr basic_flags& operator = (const basic_flags&) noexcept = default; constexpr basic_flags& operator |= (basic_flags) noexcept; constexpr basic_flags& operator &= (basic_flags) noexcept; constexpr basic_flags& operator ^= (basic_flags) noexcept; constexpr basic_flags& operator |= (flag_type) noexcept; constexpr basic_flags& operator &= (flag_type) noexcept; constexpr basic_flags& operator ^= (flag_type) noexcept; constexpr basic_flags operator ~ () const noexcept; // named versions of common operations constexpr basic_flags& set(flag_type, bool enabled) noexcept; constexpr basic_flags& set(flag_type) noexcept; constexpr basic_flags& unset(flag_type) noexcept; constexpr basic_flags& toggle(flag_type) noexcept; constexpr void clear() noexcept; constexpr void reset(mask_type = 0u) noexcept; private: constexpr basic_flags(mask_type mask) noexcept : m_mask{mask} {} private: mask_type m_mask{}; private: // friend non-member bitwise operations template friend constexpr basic_flags operator & (basic_flags, basic_flags) noexcept; template friend constexpr basic_flags operator | (basic_flags, basic_flags) noexcept; template friend constexpr basic_flags operator ^ (basic_flags, basic_flags) noexcept; template friend constexpr basic_flags operator & (basic_flags, typename basic_flags::flag_type) noexcept; template friend constexpr basic_flags operator | (basic_flags, typename basic_flags::flag_type) noexcept; template friend constexpr basic_flags operator ^ (basic_flags, typename basic_flags::flag_type) noexcept; template friend constexpr basic_flags operator & (typename basic_flags::flag_type, basic_flags) noexcept; template friend constexpr basic_flags operator | (typename basic_flags::flag_type, basic_flags) noexcept; template friend constexpr basic_flags operator ^ (typename basic_flags::flag_type, basic_flags) noexcept; // friend comparison operators template friend constexpr bool operator == (basic_flags, basic_flags) noexcept; template friend constexpr bool operator == (basic_flags, typename basic_flags::flag_type) noexcept; template friend constexpr bool operator == (typename basic_flags::flag_type, basic_flags) noexcept; template friend constexpr bool operator != (basic_flags, basic_flags) noexcept; template friend constexpr bool operator != (basic_flags, typename basic_flags::flag_type) noexcept; template friend constexpr bool operator != (typename basic_flags::flag_type, basic_flags) noexcept; }; } // // flag_marshaller // ----------------- // marshalls the flag-type into a mask-type we can perform bitwise operations // on with our mask member // // 'bitflags' expects the flags to be sequentially numbered, where the // value of each flag signifies the bit-position within the mask-type that // will be affected // // // the bottom four bits (position 0, 1, 2, 3) will be affected // enum class MaidenType { Distressed, Defiant, Conniving, Charasmatic }; // // // bits at positions 3, 4, and 8 // enum class KnightType { Armoured = 3, Lazy = 4, Jousting = 8 }; // // // 'maskflags' on the other hand, expects the values of the flags to be // _already_ shifted to the correct position. taking the above examples // and rewriting them as bitmasks: // // // the bottom four bits (position 0, 1, 2, 3) will be affected // enum class MaidenType { Distressed = 1, Defiant = 2, Conniving = 4, Charasmatic = 8 }; // // // bits at positions 3, 4, and 8 (note that "Absent" contributes nothing) // enum class KnightType { Absent = 0, Armoured = (1<<3), Lazy = (1<<4), Jousting = (1<<8) }; // // namespace eastl { template struct flag_marshaller { static_assert(!std::is_integral_v); using tag_type = TagType; using flag_type = FlagType; using mask_type = detail::flags_mask_type_t; // we really want C++ constexpr-if template static constexpr mask_type to_mask(flag_type f) { return to_mask_impl(tag_type{}, f); } private: template static constexpr mask_type to_mask_impl(bitflag_tag, flag_type f) { return mask_type{1} << static_cast(f) << AdditionalOffset; } template static constexpr mask_type to_mask_impl(maskflag_tag, flag_type f) { return static_cast(f) << AdditionalOffset; } }; } // // basic_flags implementation // namespace eastl { template inline constexpr basic_flags basic_flags::from_mask(mask_type mask) noexcept { return basic_flags{mask}; } template inline constexpr basic_flags::basic_flags(flag_type flag) noexcept : m_mask{marshaller_type::to_mask(flag)} {} template inline constexpr basic_flags::basic_flags(std::initializer_list flags) noexcept { for (auto f : flags) set(f); } template inline constexpr basic_flags::operator bool() const noexcept { return static_cast(m_mask); } template inline constexpr basic_flags::operator mask_type() const noexcept { return m_mask; } template inline constexpr basic_flags& basic_flags::operator = (flag_type flag) noexcept { m_mask = marshaller_type::to_mask(flag); return *this; } template inline constexpr basic_flags& basic_flags::operator |= (basic_flags flag) noexcept { m_mask |= flag.m_mask; return *this; } template inline constexpr basic_flags& basic_flags::operator &= (basic_flags flag) noexcept { m_mask &= flag.m_mask; return *this; } template inline constexpr basic_flags& basic_flags::operator ^= (basic_flags flag) noexcept { m_mask ^= flag.m_mask; return *this; } template inline constexpr basic_flags& basic_flags::operator |= (flag_type flag) noexcept { m_mask |= marshaller_type::to_mask(flag); return *this; } template inline constexpr basic_flags& basic_flags::operator &= (flag_type flag) noexcept { m_mask &= marshaller_type::to_mask(flag); return *this; } template inline constexpr basic_flags& basic_flags::operator ^= (flag_type flag) noexcept { m_mask ^= marshaller_type::to_mask(flag); return *this; } template inline constexpr basic_flags basic_flags::operator ~ () const noexcept { return basic_flags{~m_mask}; } template inline constexpr basic_flags& basic_flags::set(flag_type flag, bool enabled) noexcept { mask_type const rhs = marshaller_type::to_mask(flag); m_mask = (m_mask & ~rhs) ^ (rhs * static_cast(enabled)); return *this; } template inline constexpr basic_flags& basic_flags::set(flag_type flag) noexcept { m_mask |= marshaller_type::to_mask(flag); return *this; } template inline constexpr basic_flags& basic_flags::unset(flag_type flag) noexcept { m_mask &= ~marshaller_type::to_mask(flag); return *this; } template inline constexpr basic_flags& basic_flags::toggle(flag_type flag) noexcept { m_mask ^= marshaller_type::to_mask(flag); return *this; } template inline constexpr void basic_flags::clear() noexcept { m_mask = 0u; } template inline constexpr void basic_flags::reset(mask_type mask) noexcept { m_mask = mask; } } // // BF_MASK_CAST_ // --------------- // // bitwise operations will promote to int when available. // // that is undesireable, as we will then encounter narrowing conversion warnings // when assigning to our m_mask of those narrower types. so static-cast. // #define BF_MASK_CAST_(expr) static_cast::mask_type>(expr) // non-member operators namespace eastl { template inline constexpr basic_flags operator & (basic_flags lhs, basic_flags rhs) noexcept { return basic_flags{BF_MASK_CAST_(lhs.m_mask & rhs.m_mask)}; } template inline constexpr basic_flags operator | (basic_flags lhs, basic_flags rhs) noexcept { return basic_flags{BF_MASK_CAST_(lhs.m_mask | rhs.m_mask)}; } template inline constexpr basic_flags operator ^ (basic_flags lhs, basic_flags rhs) noexcept { return basic_flags{BF_MASK_CAST_(lhs.m_mask ^ rhs.m_mask)}; } template inline constexpr basic_flags operator & (basic_flags lhs, typename basic_flags::flag_type rhs) noexcept { return basic_flags{BF_MASK_CAST_(lhs.m_mask & M::to_mask(rhs))}; } template inline constexpr basic_flags operator | (basic_flags lhs, typename basic_flags::flag_type rhs) noexcept { return basic_flags{BF_MASK_CAST_(lhs.m_mask | M::to_mask(rhs))}; } template inline constexpr basic_flags operator ^ (basic_flags lhs, typename basic_flags::flag_type rhs) noexcept { return basic_flags{BF_MASK_CAST_(lhs.m_mask ^ M::to_mask(rhs))}; } template inline constexpr basic_flags operator & (typename basic_flags::flag_type lhs, basic_flags rhs) noexcept { return basic_flags{BF_MASK_CAST_(M::to_mask(lhs) & rhs.m_mask)}; } template inline constexpr basic_flags operator | (typename basic_flags::flag_type lhs, basic_flags rhs) noexcept { return basic_flags{BF_MASK_CAST_(M::to_mask(lhs) | rhs.m_mask)}; } template inline constexpr basic_flags operator ^ (typename basic_flags::flag_type lhs, basic_flags rhs) noexcept { return basic_flags{BF_MASK_CAST_(M::to_mask(lhs) ^ rhs.m_mask)}; } template inline constexpr bool operator == (basic_flags lhs, basic_flags rhs) noexcept { return lhs.m_mask == rhs.m_mask; } template inline constexpr bool operator == (basic_flags lhs, typename basic_flags::flag_type rhs) noexcept { return lhs.m_mask == M::to_mask(rhs); } template inline constexpr bool operator == (typename basic_flags::flag_type lhs, basic_flags rhs) noexcept { return M::to_mask(lhs) == rhs.m_mask; } template inline constexpr bool operator != (basic_flags lhs, basic_flags rhs) noexcept { return lhs.m_mask != rhs.m_mask; } template inline constexpr bool operator != (basic_flags lhs, typename basic_flags::flag_type rhs) noexcept { return lhs.m_mask != M::to_mask(rhs); } template inline constexpr bool operator != (typename basic_flags::flag_type lhs, basic_flags rhs) noexcept { return M::to_mask(lhs) != rhs.m_mask; } } #undef BF_MASK_CAST_ // // mask_of // --------- // returns the mask of a basic_flags. we have made the conversion operator // explicit, so users would have to first know (or get, as the case may be) // the mask_type, and perform an explicit cast. // // this is good because it makes people aware of when their flags are being // interpreted as an integral, but it is kind of wordy. so 'mask_of' has // been introduced to shorten this. // // 'mask' was not chosen due to the high potential for name clashes. // namespace eastl { template inline constexpr typename basic_flags::mask_type mask_of(basic_flags flags) { return static_cast::mask_type>(flags); } } // // macros // -------- // // see top-of-file for explanation // #define EASTL_DECLARE_BITFLAGS(flagstype, enumtype) \ using flagstype = ::eastl::bitflags; \ inline constexpr flagstype operator | (enumtype lhs, enumtype rhs) \ { \ using msh = typename flagstype::marshaller_type; \ return flagstype::from_mask(static_cast(msh::to_mask(lhs) | msh::to_mask(rhs))); \ } #define EASTL_DECLARE_BITFLAGS_ENUM_CLASS(flagstype, enumtype) \ enum class enumtype; \ EASTL_DECLARE_BITFLAGS(flagstype, enumtype) \ enum class enumtype #define EASTL_DECLARE_BITFLAGS_ENUM_CLASS_SIZED(sizetype, flagstype, enumtype) \ enum class enumtype : sizetype; \ EASTL_DECLARE_BITFLAGS(flagstype, enumtype) \ enum class enumtype : sizetype #define EASTL_DECLARE_MASKFLAGS(flagstype, enumtype) \ using flagstype = ::eastl::maskflags; \ inline constexpr flagstype operator | (enumtype lhs, enumtype rhs) \ { \ using msh = typename flagstype::marshaller_type; \ return flagstype::from_mask(static_cast(msh::to_mask(lhs) | msh::to_mask(rhs))); \ } \ inline constexpr flagstype operator & (enumtype lhs, enumtype rhs) \ { \ using msh = typename flagstype::marshaller_type; \ return flagstype::from_mask(static_cast(msh::to_mask(lhs) & msh::to_mask(rhs))); \ } \ inline constexpr flagstype operator ^ (enumtype lhs, enumtype rhs) \ { \ using msh = typename flagstype::marshaller_type; \ return flagstype::from_mask(static_cast(msh::to_mask(lhs) ^ msh::to_mask(rhs))); \ } #define EASTL_DECLARE_MASKFLAGS_ENUM_CLASS(flagstype, enumtype) \ enum class enumtype; \ EASTL_DECLARE_MASKFLAGS(flagstype, enumtype) \ enum class enumtype #define EASTL_DECLARE_MASKFLAGS_ENUM_CLASS_SIZED(sizetype, flagstype, enumtype) \ enum class enumtype : sizetype; \ EASTL_DECLARE_MASKFLAGS(flagstype, enumtype) \ enum class enumtype : sizetype ================================================ FILE: include/EASTL/bonus/intrusive_sdlist.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // intrusive_sdlist is a special kind of intrusive list which we say is // "singly-doubly" linked. Instead of having a typical intrusive list node // which looks like this: // // struct intrusive_sdlist_node { // intrusive_sdlist_node *mpNext; // intrusive_sdlist_node *mpPrev; // }; // // We instead have one that looks like this: // // struct intrusive_sdlist_node { // intrusive_sdlist_node* mpNext; // intrusive_sdlist_node** mppPrevNext; // }; // // This may seem to be suboptimal, but it has one specific advantage: it allows // the intrusive_sdlist class to be the size of only one pointer instead of two. // This may seem like a minor optimization, but some users have wanted to create // thousands of empty instances of these. // This is because while an intrusive_list class looks like this: // // class intrusive_list { // intrusive_list_node mBaseNode; // }; // // an intrusive_sdlist class looks like this: // // class intrusive_sdlist { // intrusive_sdlist_node* mpNext; // }; // // So here we make a list of plusses and minuses of intrusive sdlists // compared to intrusive_lists and intrusive_slists: // // | list | slist | sdlist // --------------------------------------------------------- // min size | 8 | 4 | 4 // node size | 8 | 4 | 8 // anonymous erase | yes | no | yes // reverse iteration | yes | no | no // /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_INTRUSIVE_SDLIST_H #define EASTL_INTRUSIVE_SDLIST_H #include #include #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif namespace eastl { /// intrusive_sdlist_node /// struct intrusive_sdlist_node { intrusive_sdlist_node* mpNext; intrusive_sdlist_node** mppPrevNext; }; /// IntrusiveSDListIterator /// template struct IntrusiveSDListIterator { typedef IntrusiveSDListIterator this_type; typedef IntrusiveSDListIterator iterator; typedef IntrusiveSDListIterator const_iterator; typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. typedef ptrdiff_t difference_type; typedef T value_type; typedef T node_type; typedef Pointer pointer; typedef Reference reference; typedef eastl::forward_iterator_tag iterator_category; public: pointer mpNode; public: IntrusiveSDListIterator(); explicit IntrusiveSDListIterator(pointer pNode); // Note that you can also construct an iterator from T via this, since value_type == node_type. IntrusiveSDListIterator(const iterator& x); reference operator*() const; pointer operator->() const; this_type& operator++(); this_type operator++(int); }; // struct IntrusiveSDListIterator /// intrusive_sdlist_base /// /// Provides a template-less base class for intrusive_sdlist. /// class intrusive_sdlist_base { public: typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. typedef ptrdiff_t difference_type; protected: intrusive_sdlist_node* mpNext; public: intrusive_sdlist_base(); bool empty() const; ///< Returns true if the container is empty. size_type size() const; ///< Returns the number of elements in the list; O(n). void clear(); ///< Clears the list; O(1). No deallocation occurs. void pop_front(); ///< Removes an element from the front of the list; O(1). The element must be present, but is not deallocated. void reverse(); ///< Reverses a list so that front and back are swapped; O(n). //bool validate() const; ///< Scans a list for linkage inconsistencies; O(n) time, O(1) space. Returns false if errors are detected, such as loops or branching. }; // class intrusive_sdlist_base /// intrusive_sdlist /// template class intrusive_sdlist : public intrusive_sdlist_base { public: typedef intrusive_sdlist this_type; typedef intrusive_sdlist_base base_type; typedef T node_type; typedef T value_type; typedef typename base_type::size_type size_type; typedef typename base_type::difference_type difference_type; typedef T& reference; typedef const T& const_reference; typedef T* pointer; typedef const T* const_pointer; typedef IntrusiveSDListIterator iterator; typedef IntrusiveSDListIterator const_iterator; typedef eastl::reverse_iterator reverse_iterator; typedef eastl::reverse_iterator const_reverse_iterator; public: intrusive_sdlist(); ///< Creates an empty list. intrusive_sdlist(const this_type& x); ///< Creates an empty list; ignores the argument. this_type& operator=(const this_type& x); ///< Clears the list; ignores the argument. iterator begin(); ///< Returns an iterator pointing to the first element in the list. const_iterator begin() const; ///< Returns a const_iterator pointing to the first element in the list. const_iterator cbegin() const; ///< Returns a const_iterator pointing to the first element in the list. iterator end(); ///< Returns an iterator pointing one-after the last element in the list. const_iterator end() const; ///< Returns a const_iterator pointing one-after the last element in the list. const_iterator cend() const; ///< Returns a const_iterator pointing one-after the last element in the list. reference front(); ///< Returns a reference to the first element. The list must not be empty. const_reference front() const; ///< Returns a const reference to the first element. The list must not be empty. void push_front(value_type& value); ///< Adds an element to the front of the list; O(1). The element is not copied. The element must not be in any other list. void push_back(value_type& value); ///< Adds an element to the back of the list; O(N). The element is not copied. The element must not be in any other list. void pop_back(); ///< Removes an element from the back of the list; O(N). The element must be present, but is not deallocated. bool contains(const value_type& value) const; ///< Returns true if the given element is in the list; O(n). Equivalent to (locate(x) != end()). iterator locate(value_type& value); ///< Converts a reference to an object in the list back to an iterator, or returns end() if it is not part of the list. O(n) const_iterator locate(const value_type& value) const; ///< Converts a const reference to an object in the list back to a const iterator, or returns end() if it is not part of the list. O(n) iterator insert(iterator position, value_type& value); ///< Inserts an element before the element pointed to by the iterator. O(1) iterator erase(iterator position); ///< Erases the element pointed to by the iterator. O(1) iterator erase(iterator first, iterator last); ///< Erases elements within the iterator range [first, last). O(1). void swap(intrusive_sdlist& x); ///< Swaps the contents of two intrusive lists; O(1). static void remove(value_type& value); ///< Erases an element from a list; O(1). Note that this is static so you don't need to know which list the element, although it must be in some list. void splice(iterator position, value_type& value); ///< Moves the given element into this list before the element pointed to by position; O(1). ///< Required: x must be in some list or have first/next pointers that point it itself. void splice(iterator position, this_type& x); ///< Moves the contents of a list into this list before the element pointed to by position; O(1). ///< Required: &x != this (same as std::list). void splice(iterator position, this_type& x, iterator xPosition); ///< Moves the given element pointed to i within the list x into the current list before ///< the element pointed to by position; O(1). void splice(iterator position, this_type& x, iterator first, iterator last); ///< Moves the range of elements [first, last) from list x into the current list before ///< the element pointed to by position; O(1). ///< Required: position must not be in [first, last). (same as std::list). bool validate() const; int validate_iterator(const_iterator i) const; }; // intrusive_sdlist /////////////////////////////////////////////////////////////////////// // IntrusiveSDListIterator functions /////////////////////////////////////////////////////////////////////// template inline IntrusiveSDListIterator::IntrusiveSDListIterator() { #if EASTL_DEBUG mpNode = NULL; #endif } template inline IntrusiveSDListIterator::IntrusiveSDListIterator(pointer pNode) : mpNode(pNode) { } template inline IntrusiveSDListIterator::IntrusiveSDListIterator(const iterator& x) : mpNode(x.mpNode) { } template inline typename IntrusiveSDListIterator::reference IntrusiveSDListIterator::operator*() const { return *mpNode; } template inline typename IntrusiveSDListIterator::pointer IntrusiveSDListIterator::operator->() const { return mpNode; } template inline typename IntrusiveSDListIterator::this_type& IntrusiveSDListIterator::operator++() { mpNode = static_cast(mpNode->mpNext); return *this; } template inline typename IntrusiveSDListIterator::this_type IntrusiveSDListIterator::operator++(int) { this_type temp = *this; mpNode = static_cast(mpNode->mpNext); return temp; } // The C++ defect report #179 requires that we support comparisons between const and non-const iterators. // Thus we provide additional template paremeters here to support this. The defect report does not // require us to support comparisons between reverse_iterators and const_reverse_iterators. template inline bool operator==(const IntrusiveSDListIterator& a, const IntrusiveSDListIterator& b) { return a.mpNode == b.mpNode; } template inline bool operator!=(const IntrusiveSDListIterator& a, const IntrusiveSDListIterator& b) { return a.mpNode != b.mpNode; } // We provide a version of operator!= for the case where the iterators are of the // same type. This helps prevent ambiguity errors in the presence of rel_ops. template inline bool operator!=(const IntrusiveSDListIterator& a, const IntrusiveSDListIterator& b) { return a.mpNode != b.mpNode; } /////////////////////////////////////////////////////////////////////// // intrusive_sdlist_base /////////////////////////////////////////////////////////////////////// inline intrusive_sdlist_base::intrusive_sdlist_base() { mpNext = NULL; } inline bool intrusive_sdlist_base::empty() const { return mpNext == NULL; } inline intrusive_sdlist_base::size_type intrusive_sdlist_base::size() const { size_type n = 0; for(const intrusive_sdlist_node* pCurrent = mpNext; pCurrent; pCurrent = pCurrent->mpNext) n++; return n; } inline void intrusive_sdlist_base::clear() { mpNext = NULL; } // Note that we don't do anything with the list nodes. inline void intrusive_sdlist_base::pop_front() { // To consider: Set mpNext's pointers to NULL in debug builds. mpNext = mpNext->mpNext; mpNext->mppPrevNext = &mpNext; } /////////////////////////////////////////////////////////////////////// // intrusive_sdlist /////////////////////////////////////////////////////////////////////// template inline intrusive_sdlist::intrusive_sdlist() { } template inline intrusive_sdlist::intrusive_sdlist(const this_type& /*x*/) : intrusive_sdlist_base() { // We intentionally ignore argument x. } template inline typename intrusive_sdlist::this_type& intrusive_sdlist::operator=(const this_type& /*x*/) { return *this; // We intentionally ignore argument x. } template inline typename intrusive_sdlist::iterator intrusive_sdlist::begin() { return iterator(static_cast(mpNext)); } template inline typename intrusive_sdlist::const_iterator intrusive_sdlist::begin() const { return const_iterator(static_cast(const_cast(mpNext))); } template inline typename intrusive_sdlist::const_iterator intrusive_sdlist::cbegin() const { return const_iterator(static_cast(const_cast(mpNext))); } template inline typename intrusive_sdlist::iterator intrusive_sdlist::end() { return iterator(static_cast(NULL)); } template inline typename intrusive_sdlist::const_iterator intrusive_sdlist::end() const { return const_iterator(static_cast(NULL)); } template inline typename intrusive_sdlist::const_iterator intrusive_sdlist::cend() const { return const_iterator(static_cast(NULL)); } template inline typename intrusive_sdlist::reference intrusive_sdlist::front() { return *static_cast(mpNext); } template inline typename intrusive_sdlist::const_reference intrusive_sdlist::front() const { return *static_cast(mpNext); } template inline void intrusive_sdlist::push_front(value_type& value) { value.mpNext = mpNext; value.mppPrevNext = &mpNext; if(mpNext) mpNext->mppPrevNext = &value.mpNext; mpNext = &value; } template inline void intrusive_sdlist::push_back(value_type& value) { intrusive_sdlist_node* pNext = mpNext; intrusive_sdlist_node** ppPrevNext = &mpNext; while(pNext) { ppPrevNext = &pNext->mpNext; pNext = pNext->mpNext; } *ppPrevNext = &value; value.mppPrevNext = ppPrevNext; value.mpNext = NULL; } template inline void intrusive_sdlist::pop_back() { node_type* pCurrent = static_cast(mpNext); while(pCurrent->mpNext) pCurrent = static_cast(pCurrent->mpNext); *pCurrent->mppPrevNext = NULL; } template inline bool intrusive_sdlist::contains(const value_type& value) const { const intrusive_sdlist_node* pCurrent; for(pCurrent = mpNext; pCurrent; pCurrent = pCurrent->mpNext) { if(pCurrent == &value) break; } return (pCurrent != NULL); } template inline typename intrusive_sdlist::iterator intrusive_sdlist::locate(value_type& value) { intrusive_sdlist_node* pCurrent; for(pCurrent = static_cast(mpNext); pCurrent; pCurrent = pCurrent->mpNext) { if(pCurrent == &value) break; } return iterator(static_cast(pCurrent)); } template inline typename intrusive_sdlist::const_iterator intrusive_sdlist::locate(const T& value) const { const intrusive_sdlist_node* pCurrent; for(pCurrent = static_cast(mpNext); pCurrent; pCurrent = pCurrent->mpNext) { if(pCurrent == &value) break; } return const_iterator(static_cast(const_cast(pCurrent))); } template inline typename intrusive_sdlist::iterator intrusive_sdlist::insert(iterator position, value_type& value) { value.mppPrevNext = position.mpNode->mppPrevNext; value.mpNext = position.mpNode; *value.mppPrevNext = &value; position.mpNode->mppPrevNext = &value.mpNext; return iterator(&value); } template inline typename intrusive_sdlist::iterator intrusive_sdlist::erase(iterator position) { *position.mpNode->mppPrevNext = position.mpNode->mpNext; position.mpNode->mpNext->mppPrevNext = position.mpNode->mppPrevNext; return iterator(position.mpNode); } template inline typename intrusive_sdlist::iterator intrusive_sdlist::erase(iterator first, iterator last) { if(first.mpNode) // If not erasing the end... { *first.mpNode->mppPrevNext = last.mpNode; if(last.mpNode) // If not erasing to the end... last.mpNode->mppPrevNext = first.mpNode->mppPrevNext; } return last; } template inline void intrusive_sdlist::remove(value_type& value) { *value.mppPrevNext = value.mpNext; if(value.mpNext) value.mpNext->mppPrevNext = value.mppPrevNext; } template void intrusive_sdlist::swap(intrusive_sdlist& x) { // swap anchors intrusive_sdlist_node* const temp(mpNext); mpNext = x.mpNext; x.mpNext = temp; if(x.mpNext) x.mpNext->mppPrevNext = &mpNext; if(mpNext) mpNext->mppPrevNext = &x.mpNext; } // To do: Complete these splice functions. Might want to look at intrusive_sdlist for help. template void intrusive_sdlist::splice(iterator /*position*/, value_type& /*value*/) { EASTL_ASSERT(false); // If you need this working, ask Paul Pedriana or submit a working version for inclusion. } template void intrusive_sdlist::splice(iterator /*position*/, intrusive_sdlist& /*x*/) { EASTL_ASSERT(false); // If you need this working, ask Paul Pedriana or submit a working version for inclusion. } template void intrusive_sdlist::splice(iterator /*position*/, intrusive_sdlist& /*x*/, iterator /*xPosition*/) { EASTL_ASSERT(false); // If you need this working, ask Paul Pedriana or submit a working version for inclusion. } template void intrusive_sdlist::splice(iterator /*position*/, intrusive_sdlist& /*x*/, iterator /*first*/, iterator /*last*/) { EASTL_ASSERT(false); // If you need this working, ask Paul Pedriana or submit a working version for inclusion. } template inline bool intrusive_sdlist::validate() const { return true; // To do. } template inline int intrusive_sdlist::validate_iterator(const_iterator i) const { // To do: Come up with a more efficient mechanism of doing this. for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) { if(temp == i) return (isf_valid | isf_current | isf_can_dereference); } if(i == end()) return (isf_valid | isf_current); return isf_none; } /////////////////////////////////////////////////////////////////////// // global operators /////////////////////////////////////////////////////////////////////// template bool operator==(const intrusive_sdlist& a, const intrusive_sdlist& b) { // If we store an mSize member for intrusive_sdlist, we want to take advantage of it here. typename intrusive_sdlist::const_iterator ia = a.begin(); typename intrusive_sdlist::const_iterator ib = b.begin(); typename intrusive_sdlist::const_iterator enda = a.end(); typename intrusive_sdlist::const_iterator endb = b.end(); while((ia != enda) && (ib != endb) && (*ia == *ib)) { ++ia; ++ib; } return (ia == enda) && (ib == endb); } template bool operator<(const intrusive_sdlist& a, const intrusive_sdlist& b) { return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); } template bool operator!=(const intrusive_sdlist& a, const intrusive_sdlist& b) { return !(a == b); } template bool operator>(const intrusive_sdlist& a, const intrusive_sdlist& b) { return b < a; } template bool operator<=(const intrusive_sdlist& a, const intrusive_sdlist& b) { return !(b < a); } template bool operator>=(const intrusive_sdlist& a, const intrusive_sdlist& b) { return !(a < b); } template void swap(intrusive_sdlist& a, intrusive_sdlist& b) { a.swap(b); } } // namespace eastl #endif // Header include guard ================================================ FILE: include/EASTL/bonus/intrusive_slist.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // *** Note *** // This implementation is incomplete. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_INTRUSIVE_SLIST_H #define EASTL_INTRUSIVE_SLIST_H #include #include #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif namespace eastl { /// intrusive_slist_node /// struct intrusive_slist_node { intrusive_slist_node* mpNext; }; /// IntrusiveSListIterator /// template struct IntrusiveSListIterator { typedef IntrusiveSListIterator this_type; typedef IntrusiveSListIterator iterator; typedef IntrusiveSListIterator const_iterator; typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. typedef ptrdiff_t difference_type; typedef T value_type; typedef T node_type; typedef Pointer pointer; typedef Reference reference; typedef eastl::forward_iterator_tag iterator_category; public: node_type* mpNode; public: IntrusiveSListIterator(); explicit IntrusiveSListIterator(pointer pNode); // Note that you can also construct an iterator from T via this, since value_type == node_type. IntrusiveSListIterator(const iterator& x); reference operator*() const; pointer operator->() const; this_type& operator++(); this_type operator++(int); }; // struct IntrusiveSListIterator /// intrusive_slist_base /// /// Provides a template-less base class for intrusive_slist. /// class intrusive_slist_base { public: typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. typedef ptrdiff_t difference_type; protected: intrusive_slist_node* mpNext; public: intrusive_slist_base(); bool empty() const; ///< Returns true if the container is empty. size_type size() const; ///< Returns the number of elements in the list; O(n). void clear(); ///< Clears the list; O(1). No deallocation occurs. void pop_front(); ///< Removes an element from the front of the list; O(1). The element must be present, but is not deallocated. void reverse(); ///< Reverses a list so that front and back are swapped; O(n). //bool validate() const; ///< Scans a list for linkage inconsistencies; O(n) time, O(1) space. Returns false if errors are detected, such as loops or branching. }; // class intrusive_slist_base /// intrusive_slist /// template class intrusive_slist : public intrusive_slist_base { public: typedef intrusive_slist this_type; typedef intrusive_slist_base base_type; typedef T node_type; typedef T value_type; typedef typename base_type::size_type size_type; typedef typename base_type::difference_type difference_type; typedef T& reference; typedef const T& const_reference; typedef T* pointer; typedef const T* const_pointer; typedef IntrusiveSListIterator iterator; typedef IntrusiveSListIterator const_iterator; public: intrusive_slist(); ///< Creates an empty list. //intrusive_slist(const this_type& x); ///< Creates an empty list; ignores the argument. To consider: Is this a useful function? //this_type& operator=(const this_type& x); ///< Clears the list; ignores the argument. To consider: Is this a useful function? iterator begin(); ///< Returns an iterator pointing to the first element in the list. O(1). const_iterator begin() const; ///< Returns a const_iterator pointing to the first element in the list. O(1). const_iterator cbegin() const; ///< Returns a const_iterator pointing to the first element in the list. O(1). iterator end(); ///< Returns an iterator pointing one-after the last element in the list. O(1). const_iterator end() const; ///< Returns a const_iterator pointing one-after the last element in the list. O(1). const_iterator cend() const; ///< Returns a const_iterator pointing one-after the last element in the list. O(1). iterator before_begin(); ///< Returns iterator to position before begin. O(1). const_iterator before_begin() const; ///< Returns iterator to previous position. O(1). const_iterator cbefore_begin() const; ///< Returns iterator to previous position. O(1). iterator previous(const_iterator position); ///< Returns iterator to previous position. O(n). const_iterator previous(const_iterator position) const; ///< Returns iterator to previous position. O(n). reference front(); ///< Returns a reference to the first element. The list must be empty. const_reference front() const; ///< Returns a const reference to the first element. The list must be empty. void push_front(value_type& value); ///< Adds an element to the front of the list; O(1). The element is not copied. The element must not be in any other list. void pop_front(); ///< Removes an element from the back of the list; O(n). The element must be present, but is not deallocated. bool contains(const value_type& value) const; ///< Returns true if the given element is in the list; O(n). Equivalent to (locate(x) != end()). iterator locate(value_type& value); ///< Converts a reference to an object in the list back to an iterator, or returns end() if it is not part of the list. O(n) const_iterator locate(const value_type& value) const; ///< Converts a const reference to an object in the list back to a const iterator, or returns end() if it is not part of the list. O(n) iterator insert(iterator position, value_type& value); ///< Inserts an element before the element pointed to by the iterator. O(n) iterator insert_after(iterator position, value_type& value); ///< Inserts an element after the element pointed to by the iterator. O(1) iterator erase(iterator position); ///< Erases the element pointed to by the iterator. O(n) iterator erase_after(iterator position); ///< Erases the element after the element pointed to by the iterator. O(1) iterator erase(iterator first, iterator last); ///< Erases elements within the iterator range [first, last). O(n). iterator erase_after(iterator before_first, iterator last); ///< Erases elements within the iterator range [before_first, last). O(1). void swap(this_type& x); ///< Swaps the contents of two intrusive lists; O(1). void splice(iterator position, value_type& value); ///< Moves the given element into this list before the element pointed to by position; O(n). ///< Required: x must be in some list or have first/next pointers that point it itself. void splice(iterator position, this_type& x); ///< Moves the contents of a list into this list before the element pointed to by position; O(n). ///< Required: &x != this (same as std::list). void splice(iterator position, this_type& x, iterator xPosition); ///< Moves the given element pointed to i within the list x into the current list before ///< the element pointed to by position; O(n). void splice(iterator position, this_type& x, iterator first, iterator last); ///< Moves the range of elements [first, last) from list x into the current list before ///< the element pointed to by position; O(n). ///< Required: position must not be in [first, last). (same as std::list). void splice_after(iterator position, value_type& value); ///< Moves the given element into this list after the element pointed to by position; O(1). ///< Required: x must be in some list or have first/next pointers that point it itself. void splice_after(iterator position, this_type& x); ///< Moves the contents of a list into this list after the element pointed to by position; O(n). ///< Required: &x != this (same as std::list). void splice_after(iterator position, this_type& x, iterator xPrevious); ///< Moves the element after xPrevious to be after position. O(1). ///< Required: &x != this (same as std::list). void splice_after(iterator position, this_type& x, iterator before_first, iterator before_last); ///< Moves the elements in the range of [before_first+1, before_last+1) to be after position. O(1). bool validate() const; int validate_iterator(const_iterator i) const; }; // intrusive_slist /////////////////////////////////////////////////////////////////////// // IntrusiveSListIterator /////////////////////////////////////////////////////////////////////// template inline IntrusiveSListIterator::IntrusiveSListIterator() { #if EASTL_DEBUG mpNode = NULL; #endif } template inline IntrusiveSListIterator::IntrusiveSListIterator(pointer pNode) : mpNode(pNode) { } template inline IntrusiveSListIterator::IntrusiveSListIterator(const iterator& x) : mpNode(x.mpNode) { } /////////////////////////////////////////////////////////////////////// // intrusive_slist_base /////////////////////////////////////////////////////////////////////// // To do. /////////////////////////////////////////////////////////////////////// // intrusive_slist /////////////////////////////////////////////////////////////////////// // To do. /////////////////////////////////////////////////////////////////////// // global operators /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////// // global operators /////////////////////////////////////////////////////////////////////// template bool operator==(const intrusive_slist& a, const intrusive_slist& b) { // If we store an mSize member for intrusive_slist, we want to take advantage of it here. typename intrusive_slist::const_iterator ia = a.begin(); typename intrusive_slist::const_iterator ib = b.begin(); typename intrusive_slist::const_iterator enda = a.end(); typename intrusive_slist::const_iterator endb = b.end(); while((ia != enda) && (ib != endb) && (*ia == *ib)) { ++ia; ++ib; } return (ia == enda) && (ib == endb); } template bool operator<(const intrusive_slist& a, const intrusive_slist& b) { return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); } template bool operator!=(const intrusive_slist& a, const intrusive_slist& b) { return !(a == b); } template bool operator>(const intrusive_slist& a, const intrusive_slist& b) { return b < a; } template bool operator<=(const intrusive_slist& a, const intrusive_slist& b) { return !(b < a); } template bool operator>=(const intrusive_slist& a, const intrusive_slist& b) { return !(a < b); } template void swap(intrusive_slist& a, intrusive_slist& b) { a.swap(b); } } // namespace eastl #endif // Header include guard ================================================ FILE: include/EASTL/bonus/list_map.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #ifndef EASTL_LIST_MAP_H #define EASTL_LIST_MAP_H #include // 4512/4626 - 'class' : assignment operator could not be generated. // This disabling would best be put elsewhere. EA_DISABLE_VC_WARNING(4512 4626); namespace eastl { /// EASTL_MAP_DEFAULT_NAME /// /// Defines a default container name in the absence of a user-provided name. /// #ifndef EASTL_LIST_MAP_DEFAULT_NAME #define EASTL_LIST_MAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " list_map" // Unless the user overrides something, this is "EASTL list_map". #endif /// EASTL_MAP_DEFAULT_ALLOCATOR /// #ifndef EASTL_LIST_MAP_DEFAULT_ALLOCATOR #define EASTL_LIST_MAP_DEFAULT_ALLOCATOR allocator_type(EASTL_LIST_MAP_DEFAULT_NAME) #endif /// list_map_data_base /// /// We define a list_map_data_base separately from list_map_data (below), because it /// allows us to have non-templated operations, and it makes it so that the /// list_map anchor node doesn't carry a T with it, which would waste space and /// possibly lead to surprising the user due to extra Ts existing that the user /// didn't explicitly create. The downside to all of this is that it makes debug /// viewing of an list_map harder, given that the node pointers are of type /// list_map_data_base and not list_map_data. /// struct list_map_data_base { list_map_data_base* mpNext; list_map_data_base* mpPrev; }; /// list_map_data /// template struct list_map_data : public list_map_data_base { typedef Value value_type; list_map_data(const value_type& value); value_type mValue; // This is a pair of key/value. }; /// list_map_iterator /// template struct list_map_iterator { typedef list_map_iterator this_type; typedef list_map_iterator iterator; typedef list_map_iterator const_iterator; typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. typedef ptrdiff_t difference_type; typedef T value_type; typedef list_map_data_base base_node_type; typedef list_map_data node_type; typedef Pointer pointer; typedef Reference reference; typedef eastl::bidirectional_iterator_tag iterator_category; private: base_node_type* mpNode; public: list_map_iterator(); list_map_iterator(const base_node_type* pNode); // This is the converting constructor of a non-const iterator to a const iterator // This is never a copy constructor (due to enable_if) template , bool> = true> inline list_map_iterator(const iterator& x) : mpNode(x.mpNode) { // Empty } reference operator*() const; pointer operator->() const; this_type& operator++(); this_type operator++(int); this_type& operator--(); this_type operator--(int); private: template friend bool operator==(const list_map_iterator&, const list_map_iterator&); template friend bool operator!=(const list_map_iterator&, const list_map_iterator&); template friend bool operator!=(const list_map_iterator&, const list_map_iterator&); // list_map uses mpNode template friend class list_map; // for the "copy" constructor, which uses non-const iterator even in the // const_iterator case. friend iterator; friend const_iterator; }; // list_map_iterator /// use_value_first /// /// operator()(x) simply returns x.mValue.first. Used in list_map. /// This is similar to eastl::use_first, however it assumes that the input type is an object /// whose mValue is an eastl::pair, and the first value in the pair is the desired return. /// template struct use_value_first { typedef Object argument_type; typedef typename Object::value_type::first_type result_type; const result_type& operator()(const Object& x) const { return x.mValue.first; } }; /// list_map /// /// Implements a map like container, which also provides functionality similar to a list. /// /// Note: Like a map, keys must still be unique. As such, push_back() and push_front() operations /// return a bool indicating success, or failure if the entry's key is already in use. /// /// list_map is designed to improve performance for situations commonly implemented as: /// A map, which must be iterated over to find the oldest entry, or purge expired entries. /// A list, which must be iterated over to remove a player's record when they sign off. /// /// list_map requires a little more memory per node than either a list or map alone, /// and many of list_map's functions have a higher operational cost (CPU time) than their /// counterparts in list and map. However, as the node count increases, list_map quickly outperforms /// either a list or a map when find [by-index] and front/back type operations are required. /// /// In essence, list_map avoids O(n) iterations at the expense of additional costs to quick (O(1) and O(log n) operations: /// push_front(), push_back(), pop_front() and pop_back() have O(log n) operation time, similar to map::insert(), rather than O(1) time like a list, /// however, front() and back() maintain O(1) operation time. /// /// As a canonical example, consider a large backlog of player group invites, which are removed when either: /// The invitation times out - in main loop: while( !listMap.empty() && listMap.front().IsExpired() ) { listMap.pop_front(); } /// The player rejects the outstanding invitation - on rejection: iter = listMap.find(playerId); if (iter != listMap.end()) { listMap.erase(iter); } /// /// For a similar example, consider a high volume pending request container which must: /// Time out old requests (similar to invites timing out above) /// Remove requests once they've been handled (similar to rejecting invites above) /// /// For such usage patterns, the performance benefits of list_map become dramatic with /// common O(n) operations once the node count rises to hundreds or more. /// /// When high performance is a priority, Containers with thousands of nodes or more /// can quickly result in unacceptable performance when executing even infrequenty O(n) operations. /// /// In order to maintain strong performance, avoid iterating over list_map whenever possible. /// /////////////////////////////////////////////////////////////////////// /// find_as /// In order to support the ability to have a tree of strings but /// be able to do efficiently lookups via char pointers (i.e. so they /// aren't converted to string objects), we provide the find_as /// function. This function allows you to do a find with a key of a /// type other than the tree's key type. See the find_as function /// for more documentation on this. /// /////////////////////////////////////////////////////////////////////// /// Pool allocation /// If you want to make a custom memory pool for a list_map container, your pool /// needs to contain items of type list_map::node_type. So if you have a memory /// pool that has a constructor that takes the size of pool items and the /// count of pool items, you would do this (assuming that MemoryPool implements /// the Allocator interface): /// typedef list_map, MemoryPool> WidgetMap; // Delare your WidgetMap type. /// MemoryPool myPool(sizeof(WidgetMap::node_type), 100); // Make a pool of 100 Widget nodes. /// WidgetMap myMap(&myPool); // Create a map that uses the pool. /// template , typename Allocator = EASTLAllocatorType> class list_map : protected rbtree >, Compare, Allocator, eastl::use_value_first > >, true, true> { public: typedef rbtree >, Compare, Allocator, eastl::use_value_first > >, true, true> base_type; typedef list_map this_type; typedef typename base_type::size_type size_type; typedef typename base_type::key_type key_type; typedef T mapped_type; typedef typename eastl::pair value_type; // This is intentionally different from base_type::value_type typedef value_type& reference; typedef const value_type& const_reference; typedef typename base_type::node_type node_type; // Despite the internal and external values being different, we're keeping the node type the same as the base // in order to allow for pool allocation. See EASTL/map.h for more information. typedef typename eastl::list_map_iterator iterator; // This is intentionally different from base_type::iterator typedef typename eastl::list_map_iterator const_iterator; // This is intentionally different from base_type::const_iterator typedef eastl::reverse_iterator reverse_iterator; typedef eastl::reverse_iterator const_reverse_iterator; typedef typename base_type::allocator_type allocator_type; typedef typename eastl::pair insert_return_type; // This is intentionally removed, as list_map doesn't support insert() functions, in favor of list like push_back and push_front typedef typename eastl::use_first extract_key; // This is intentionally different from base_type::extract_key using base_type::get_allocator; using base_type::set_allocator; using base_type::key_comp; using base_type::empty; using base_type::size; protected: typedef typename eastl::list_map_data > internal_value_type; protected: // internal base node, acting as the sentinel for list like behaviors list_map_data_base mNode; public: list_map(const allocator_type& allocator = EASTL_LIST_MAP_DEFAULT_ALLOCATOR); list_map(const Compare& compare, const allocator_type& allocator = EASTL_MAP_DEFAULT_ALLOCATOR); // To do: Implement the following: //list_map(const this_type& x); //list_map(this_type&& x); //list_map(this_type&& x, const allocator_type& allocator); //list_map(std::initializer_list ilist, const Compare& compare = Compare(), const allocator_type& allocator = EASTL_LIST_MAP_DEFAULT_ALLOCATOR); //template //list_map(Iterator itBegin, Iterator itEnd); //this_type& operator=(const this_type& x); //this_type& operator=(std::initializer_list ilist); //this_type& operator=(this_type&& x); //void swap(this_type& x); public: // iterators iterator begin() EA_NOEXCEPT; const_iterator begin() const EA_NOEXCEPT; const_iterator cbegin() const EA_NOEXCEPT; iterator end() EA_NOEXCEPT; const_iterator end() const EA_NOEXCEPT; const_iterator cend() const EA_NOEXCEPT; reverse_iterator rbegin() EA_NOEXCEPT; const_reverse_iterator rbegin() const EA_NOEXCEPT; const_reverse_iterator crbegin() const EA_NOEXCEPT; reverse_iterator rend() EA_NOEXCEPT; const_reverse_iterator rend() const EA_NOEXCEPT; const_reverse_iterator crend() const EA_NOEXCEPT; public: // List like methods reference front(); const_reference front() const; reference back(); const_reference back() const; // push_front and push_back which takes in a key/value pair bool push_front(const value_type& value); bool push_back(const value_type& value); // push_front and push_back which take key and value separately, for convenience bool push_front(const key_type& key, const mapped_type& value); bool push_back(const key_type& key, const mapped_type& value); void pop_front(); void pop_back(); public: // Map like methods iterator find(const key_type& key); const_iterator find(const key_type& key) const; template iterator find_as(const U& u, Compare2 compare2); template const_iterator find_as(const U& u, Compare2 compare2) const; size_type count(const key_type& key) const; size_type erase(const key_type& key); // todo: add heterogenous lookup support (using a heterogeneous comparator - a type 'Comp' where 'Comp::is_transparent' is valid and denotes a type): // template, bool> = true> // iterator find(const KX& key); // // ... also for count() ... // ... also for erase() ... public: // Shared methods which are common to list and map iterator erase(const_iterator position); reverse_iterator erase(const_reverse_iterator position); void clear(); void reset_lose_memory(); bool validate() const; int validate_iterator(const_iterator i) const; public: // list like functionality which is in consideration for implementation: // iterator insert(const_iterator position, const value_type& value); // void remove(const mapped_type& x); public: // list like functionality which may be implemented, but is discouraged from implementation: // due to the liklihood that they would require O(n) time to execute. // template // void remove_if(Predicate); // void reverse(); // void sort(); // template // void sort(Compare compare); public: // map like functionality which list_map does not support, due to abmiguity with list like functionality: #if !defined(EA_COMPILER_NO_DELETED_FUNCTIONS) template list_map(InputIterator first, InputIterator last, const Compare& compare, const allocator_type& allocator = EASTL_RBTREE_DEFAULT_ALLOCATOR) = delete; insert_return_type insert(const value_type& value) = delete; iterator insert(const_iterator position, const value_type& value) = delete; template void insert(InputIterator first, InputIterator last) = delete; insert_return_type insert(const key_type& key) = delete; iterator erase(const_iterator first, const_iterator last) = delete; reverse_iterator erase(reverse_iterator first, reverse_iterator last) = delete; void erase(const key_type* first, const key_type* last) = delete; iterator lower_bound(const key_type& key) = delete; const_iterator lower_bound(const key_type& key) const = delete; iterator upper_bound(const key_type& key) = delete; const_iterator upper_bound(const key_type& key) const = delete; eastl::pair equal_range(const key_type& key) = delete; eastl::pair equal_range(const key_type& key) const = delete; mapped_type& operator[](const key_type& key) = delete; // Of map, multimap, set, and multimap, only map has operator[]. #endif public: // list like functionality which list_map does not support, due to ambiguity with map like functionality: #if 0 reference push_front() = delete; void* push_front_uninitialized() = delete; reference push_back() = delete; void* push_back_uninitialized() = delete; iterator insert(const_iterator position) = delete; void insert(const_iterator position, size_type n, const value_type& value) = delete; template void insert(const_iterator position, InputIterator first, InputIterator last) = delete; iterator erase(const_iterator first, const_iterator last) = delete; reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last) = delete; void splice(const_iterator position, this_type& x) = delete void splice(const_iterator position, this_type& x, const_iterator i) = delete; void splice(const_iterator position, this_type& x, const_iterator first, const_iterator last) = delete; void merge(this_type& x) = delete; template void merge(this_type& x, Compare compare) = delete; void unique() = delete; // Uniqueness is enforced by map functionality template void unique(BinaryPredicate) = delete; // Uniqueness is enforced by map functionality #endif }; // list_map /////////////////////////////////////////////////////////////////////// // list_map_data /////////////////////////////////////////////////////////////////////// template inline list_map_data::list_map_data(const Value& value) : mValue(value) { mpNext = NULL; // GCC 4.8 is generating warnings about referencing these values in list_map::push_front unless we mpPrev = NULL; // initialize them here. The compiler seems to be mistaken, as our code isn't actually using them unintialized. } /////////////////////////////////////////////////////////////////////// // list_map_iterator /////////////////////////////////////////////////////////////////////// template inline list_map_iterator::list_map_iterator() : mpNode(NULL) { // Empty } template inline list_map_iterator::list_map_iterator(const base_node_type* pNode) : mpNode(const_cast(pNode)) { // Empty } template inline typename list_map_iterator::reference list_map_iterator::operator*() const { return static_cast(mpNode)->mValue; } template inline typename list_map_iterator::pointer list_map_iterator::operator->() const { return &static_cast(mpNode)->mValue; } template inline typename list_map_iterator::this_type& list_map_iterator::operator++() { mpNode = mpNode->mpNext; return *this; } template inline typename list_map_iterator::this_type list_map_iterator::operator++(int) { this_type temp(*this); mpNode = mpNode->mpNext; return temp; } template inline typename list_map_iterator::this_type& list_map_iterator::operator--() { mpNode = mpNode->mpPrev; return *this; } template inline typename list_map_iterator::this_type list_map_iterator::operator--(int) { this_type temp(*this); mpNode = mpNode->mpPrev; return temp; } // We provide additional template paremeters here to support comparisons between const and non-const iterators. // See C++ defect report #179, or EASTL/list.h for more information. template inline bool operator==(const list_map_iterator& a, const list_map_iterator& b) { return a.mpNode == b.mpNode; } template inline bool operator!=(const list_map_iterator& a, const list_map_iterator& b) { return a.mpNode != b.mpNode; } // We provide a version of operator!= for the case where the iterators are of the // same type. This helps prevent ambiguity errors in the presence of rel_ops. template inline bool operator!=(const list_map_iterator& a, const list_map_iterator& b) { return a.mpNode != b.mpNode; } /////////////////////////////////////////////////////////////////////// // list_map /////////////////////////////////////////////////////////////////////// template inline list_map::list_map(const allocator_type& allocator) : base_type(allocator) { mNode.mpNext = &mNode; mNode.mpPrev = &mNode; } template inline list_map::list_map(const Compare& compare, const allocator_type& allocator) : base_type(compare, allocator) { mNode.mpNext = &mNode; mNode.mpPrev = &mNode; } template inline typename list_map::iterator list_map::begin() EA_NOEXCEPT { return iterator(mNode.mpNext); } template inline typename list_map::const_iterator list_map::begin() const EA_NOEXCEPT { return const_iterator(mNode.mpNext); } template inline typename list_map::const_iterator list_map::cbegin() const EA_NOEXCEPT { return const_iterator(mNode.mpNext); } template inline typename list_map::iterator list_map::end() EA_NOEXCEPT { return iterator(&mNode); } template inline typename list_map::const_iterator list_map::end() const EA_NOEXCEPT { return const_iterator(&mNode); } template inline typename list_map::const_iterator list_map::cend() const EA_NOEXCEPT { return const_iterator(&mNode); } template inline typename list_map::reverse_iterator list_map::rbegin() EA_NOEXCEPT { return reverse_iterator(&mNode); } template inline typename list_map::const_reverse_iterator list_map::rbegin() const EA_NOEXCEPT { return const_reverse_iterator(&mNode); } template inline typename list_map::const_reverse_iterator list_map::crbegin() const EA_NOEXCEPT { return const_reverse_iterator(&mNode); } template inline typename list_map::reverse_iterator list_map::rend() EA_NOEXCEPT { return reverse_iterator(mNode.mpNext); } template inline typename list_map::const_reverse_iterator list_map::rend() const EA_NOEXCEPT { return const_reverse_iterator(mNode.mpNext); } template inline typename list_map::const_reverse_iterator list_map::crend() const EA_NOEXCEPT { return const_reverse_iterator(mNode.mpNext); } template inline typename list_map::reference list_map::front() { #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED if (EASTL_UNLIKELY(static_cast(mNode.mpNext) == &mNode)) EASTL_FAIL_MSG("list_map::front -- empty container"); #else // We allow the user to reference an empty container. #endif return static_cast(mNode.mpNext)->mValue; } template inline typename list_map::const_reference list_map::front() const { #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED if (EASTL_UNLIKELY(static_cast(mNode.mpNext) == &mNode)) EASTL_FAIL_MSG("list_map::front -- empty container"); #else // We allow the user to reference an empty container. #endif return static_cast(mNode.mpNext)->mValue; } template inline typename list_map::reference list_map::back() { #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED if (EASTL_UNLIKELY(static_cast(mNode.mpNext) == &mNode)) EASTL_FAIL_MSG("list_map::back -- empty container"); #else // We allow the user to reference an empty container. #endif return static_cast(mNode.mpPrev)->mValue; } template inline typename list_map::const_reference list_map::back() const { #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED if (EASTL_UNLIKELY(static_cast(mNode.mpNext) == &mNode)) EASTL_FAIL_MSG("list_map::back -- empty container"); #else // We allow the user to reference an empty container. #endif return static_cast(mNode.mpPrev)->mValue; } template bool list_map::push_front(const value_type& value) { internal_value_type tempValue(value); typename base_type::insert_return_type baseReturn = base_type::insert(tempValue); // Did the insert succeed? if (baseReturn.second) { internal_value_type* pNode = &(*baseReturn.first); pNode->mpNext = mNode.mpNext; pNode->mpPrev = &mNode; mNode.mpNext->mpPrev = pNode; mNode.mpNext = pNode; return true; } else { return false; } } template bool list_map::push_back(const value_type& value) { internal_value_type tempValue(value); typename base_type::insert_return_type baseReturn = base_type::insert(tempValue); // Did the insert succeed? if (baseReturn.second) { internal_value_type* pNode = &(*baseReturn.first); pNode->mpPrev = mNode.mpPrev; pNode->mpNext = &mNode; mNode.mpPrev->mpNext = pNode; mNode.mpPrev = pNode; return true; } else { return false; } } template bool list_map::push_front(const key_type& key, const mapped_type& value) { return push_front(eastl::make_pair(key, value)); } template bool list_map::push_back(const key_type& key, const mapped_type& value) { return push_back(eastl::make_pair(key, value)); } template void list_map::pop_front() { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(empty())) EASTL_FAIL_MSG("list_map::pop_front -- empty container"); #endif erase(static_cast(mNode.mpNext)->mValue.first); } template void list_map::pop_back() { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(empty())) EASTL_FAIL_MSG("list_map::pop_back -- empty container"); #endif erase(static_cast(mNode.mpPrev)->mValue.first); } template inline typename list_map::iterator list_map::find(const key_type& key) { typename base_type::iterator baseIter = base_type::find(key); if (baseIter != base_type::end()) { return iterator(&(*baseIter)); } else { return end(); } } template inline typename list_map::const_iterator list_map::find(const key_type& key) const { typename base_type::const_iterator baseIter = base_type::find(key); if (baseIter != base_type::end()) { return const_iterator(&(*baseIter)); } else { return end(); } } template template inline typename list_map::iterator list_map::find_as(const U& u, Compare2 compare2) { typename base_type::iterator baseIter = base_type::find_as(u, compare2); if (baseIter != base_type::end()) { return iterator(&(*baseIter)); } else { return end(); } } template template inline typename list_map::const_iterator list_map::find_as(const U& u, Compare2 compare2) const { typename base_type::const_iterator baseIter = base_type::find_as(u, compare2); if (baseIter != base_type::end()) { return const_iterator(&(*baseIter)); } else { return end(); } } template inline typename list_map::size_type list_map::count(const key_type& key) const { const typename base_type::const_iterator it = base_type::find(key); return (it != base_type::end()) ? 1 : 0; } template inline typename list_map::size_type list_map::erase(const key_type& key) { typename base_type::iterator baseIter = base_type::find(key); if (baseIter != base_type::end()) { internal_value_type* node = &(*baseIter); node->mpNext->mpPrev = node->mpPrev; node->mpPrev->mpNext = node->mpNext; base_type::erase(baseIter); return 1; } return 0; } template inline typename list_map::iterator list_map::erase(const_iterator position) { iterator posIter(position.mpNode); // Convert from const. iterator eraseIter(posIter++); erase(eraseIter->first); return posIter; } template inline typename list_map::reverse_iterator list_map::erase(const_reverse_iterator position) { return reverse_iterator(erase((++position).base())); } template void list_map::clear() { base_type::clear(); mNode.mpNext = &mNode; mNode.mpPrev = &mNode; } template void list_map::reset_lose_memory() { base_type::reset_lose_memory(); mNode.mpNext = &mNode; mNode.mpPrev = &mNode; } template bool list_map::validate() const { if (!base_type::validate()) { return false; } size_type nodeCount(0); list_map_data_base* node = mNode.mpNext; while (node != &mNode) { internal_value_type* data = static_cast(node); if (base_type::find(data->mValue.first) == base_type::end()) { return false; } node = node->mpNext; ++nodeCount; } if (nodeCount != size()) { return false; } nodeCount = 0; node = mNode.mpPrev; while (node != &mNode) { internal_value_type* data = static_cast(node); if (base_type::find(data->mValue.first) == base_type::end()) { return false; } node = node->mpPrev; ++nodeCount; } if (nodeCount != size()) { return false; } return true; } template int list_map::validate_iterator(const_iterator iter) const { for (const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) { if (temp == iter) { return (isf_valid | isf_current | isf_can_dereference); } } if (iter == end()) return (isf_valid | isf_current); return isf_none; } } // namespace eastl EA_RESTORE_VC_WARNING(); #endif // Header include guard ================================================ FILE: include/EASTL/bonus/lru_cache.h ================================================ /////////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // lru_cache is a container that simplifies caching of objects in a map. // Basically, you give the container a key, like a string, and the data you want. // The container provides callback mechanisms to generate data if it's missing // as well as delete data when it's purged from the cache. This container // uses a least recently used method: whatever the oldest item is will be // replaced with a new entry. // // Algorithmically, the container is a combination of a map and a list. // The list stores the age of the entries by moving the entry to the head // of the list on each access, either by a call to get() or to touch(). // The map is just the map as one would expect. // // This is useful for caching off data that is expensive to generate, // for example text to speech wave files that are dynamically generated, // but that will need to be reused, as is the case in narration of menu // entries as a user scrolls through the entries. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_LRUCACHE_H #define EASTL_LRUCACHE_H #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once #endif #include #include #include #include // for pair #include // for function, hash, equal_to namespace eastl { /// EASTL_LRUCACHE_DEFAULT_NAME /// /// Defines a default container name in the absence of a user-provided name. /// #ifndef EASTL_LRUCACHE_DEFAULT_NAME #define EASTL_LRUCACHE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " lru_cache" // Unless the user overrides something, this is "EASTL lru_cache". #endif /// EASTL_LRUCACHE_DEFAULT_ALLOCATOR /// #ifndef EASTL_LRUCACHE_DEFAULT_ALLOCATOR #define EASTL_LRUCACHE_DEFAULT_ALLOCATOR allocator_type(EASTL_LRUCACHE_DEFAULT_NAME) #endif /// lru_cache /// /// Implements a caching map based off of a key and data. /// LRUList parameter is any container that guarantees the validity of its iterator even after a modification (e.g. list) /// LRUMap is any associative container that can map a key to some data. By default, we use unordered_map, but it might be better /// to use hash_map or some other structure depending on your key/data combination. For example, you may want to swap the /// map backing if using strings as keys or if the data objects are small. In any case, unordered_map is a good default and should /// work well enough since the purpose of this class is to cache results of expensive, order of milliseconds, operations /// /// Algorithmic Performance (default data structures): /// touch() -> O(1) /// insert() / update(), get() / operator[] -> equivalent to unordered_map (O(1) on average, O(n) worst) /// size() -> O(1) /// /// All accesses to a given key (insert, update, get) will push that key to most recently used. /// If the data objects are shared between threads, it would be best to use a smartptr to manage the lifetime of the data. /// as it could be removed from the cache while in use by another thread. template , typename map_type = eastl::unordered_map, eastl::hash, eastl::equal_to, Allocator>> class lru_cache { public: using key_type = Key; using value_type = Value; using allocator_type = Allocator; using size_type = eastl_size_t; using list_iterator = typename list_type::iterator; using map_iterator = typename map_type::iterator; using data_container_type = eastl::pair; using iterator = typename map_type::iterator; using const_iterator = typename map_type::const_iterator; using this_type = lru_cache; using create_callback_type = eastl::function; using delete_callback_type = eastl::function; /// lru_cache constructor /// /// Creates a Key / Value map that only stores size Value objects until it deletes them. /// For complex objects or operations, the creator and deletor callbacks can be used. /// This works just like a regular map object: on access, the Value will be created if it doesn't exist, returned otherwise. explicit lru_cache(size_type size, const allocator_type& allocator = EASTL_LRUCACHE_DEFAULT_ALLOCATOR, create_callback_type creator = nullptr, delete_callback_type deletor = nullptr) : m_list(allocator) , m_map(allocator) , m_capacity(size) , m_create_callback(creator) , m_delete_callback(deletor) { } /// lru_cache destructor /// /// Iterates across every entry in the map and calls the deletor before calling the standard destructors ~lru_cache() { // Destruct everything we have cached for (auto& iter : m_map) { if (m_delete_callback) m_delete_callback(iter.second.first); } } lru_cache(std::initializer_list> il) : lru_cache(static_cast(il.size())) { for(auto& p : il) insert_or_assign(p.first, p.second); } // TODO(rparolin): Why do we prevent copies? And what about moves? lru_cache(const this_type&) = delete; this_type &operator=(const this_type&) = delete; /// insert /// /// insert key k with value v. /// If key already exists, no change is made and the return value is false. /// If the key doesn't exist, the data is added to the map and the return value is true. bool insert(const key_type& k, const value_type& v) { if (m_map.find(k) == m_map.end()) { make_space(); m_list.push_front(k); m_map[k] = data_container_type(v, m_list.begin()); return true; } else { return false; } } /// emplace /// /// Places a new object in place k created with args /// If the key already exists, no change is made. /// return value is a pair of the iterator to the emplaced or already-existing element and a bool denoting whether insertion took place. template eastl::pair emplace(const key_type& k, Args&&... args) { auto it = m_map.find(k); if (it == m_map.end()) { make_space(); m_list.push_front(k); return m_map.emplace(k, data_container_type(piecewise_construct, eastl::forward_as_tuple(eastl::forward(args)...), make_tuple(m_list.begin()))); } else { return make_pair(it, false); } } /// insert_or_assign /// /// Same as add, but replaces the data at key k, if it exists, with the new entry v /// Note that the deletor for the old v will be called before it's replaced with the new value of v void insert_or_assign(const key_type& k, const value_type& v) { auto iter = m_map.find(k); if (m_map.find(k) != m_map.end()) { assign(iter, v); } else { insert(k, v); } } /// contains /// /// Returns true if key k exists in the cache bool contains(const key_type& k) const { return m_map.find(k) != m_map.end(); } /// at /// /// Retrives the data for key k, not valid if k does not exist eastl::optional at(const key_type& k) { auto iter = m_map.find(k); if (iter != m_map.end()) { return iter->second.first; } else { return eastl::nullopt; } } /// get /// /// Retrives the data for key k. If no data exists, it will be created by calling the /// creator. value_type& get(const key_type& k) { auto iter = m_map.find(k); // The entry exists in the cache if (iter != m_map.end()) { touch(k); return iter->second.first; } else // The entry doesn't exist in the cache, so create one { // Add the entry to the map insert(k, m_create_callback ? m_create_callback(k) : value_type()); // return the new data return m_map[k].first; } } /// Equivalent to get(k) value_type& operator[](const key_type& k) { return get(k); } /// erase /// /// erases key k from the cache. /// If k does not exist, returns false. If k exists, returns true. bool erase(const key_type& k) { auto iter = m_map.find(k); if (iter != m_map.end()) { m_list.erase(iter->second.second); // Delete the actual entry map_erase(iter); return true; } return false; } /// erase_oldest /// /// Removes the oldest entry from the cache. void erase_oldest() { auto key = m_list.back(); m_list.pop_back(); // Delete the actual entry auto iter = m_map.find(key); map_erase(iter); } /// touch /// /// Touches key k, marking it as most recently used. /// If k does not exist, returns false. If the touch was successful, returns true. bool touch(const key_type& k) { auto iter = m_map.find(k); if (iter != m_map.end()) { touch(iter); return true; } return false; } /// touch /// /// Touches key at iterator iter, moving it to most recently used position void touch(iterator& iter) { auto listRef = iter->second.second; m_list.erase(listRef); m_list.push_front(iter->first); iter->second.second = m_list.begin(); } /// assign /// /// Updates key k with data v. /// If key k does not exist, returns false and no changes are made. /// If key k exists, existing data has its deletor called and key k's data is replaced with new v data bool assign(const key_type& k, const value_type& v) { auto iter = m_map.find(k); if (iter != m_map.end()) { assign(iter, v); return true; } return false; } /// assign /// /// Updates data at spot iter with data v. void assign(iterator& iter, const value_type& v) { if (m_delete_callback) m_delete_callback(iter->second.first); touch(iter); iter->second.first = v; } // standard container functions iterator begin() EA_NOEXCEPT { return m_map.begin(); } iterator end() EA_NOEXCEPT { return m_map.end(); } iterator rbegin() EA_NOEXCEPT { return m_map.rbegin(); } iterator rend() EA_NOEXCEPT { return m_map.rend(); } const_iterator begin() const EA_NOEXCEPT { return m_map.begin(); } const_iterator cbegin() const EA_NOEXCEPT { return m_map.cbegin(); } const_iterator crbegin() const EA_NOEXCEPT { return m_map.crbegin(); } const_iterator end() const EA_NOEXCEPT { return m_map.end(); } const_iterator cend() const EA_NOEXCEPT { return m_map.cend(); } const_iterator crend() const EA_NOEXCEPT { return m_map.crend(); } bool empty() const EA_NOEXCEPT { return m_map.empty(); } size_type size() const EA_NOEXCEPT { return m_map.size(); } size_type capacity() const EA_NOEXCEPT { return m_capacity; } void clear() EA_NOEXCEPT { // Since we have a delete callback, we want to reuse the trim function by cheating the max // size to clear all the entries to avoid duplicating code. auto old_max = m_capacity; m_capacity = 0; trim(); m_capacity = old_max; } /// resize /// /// Resizes the cache. Can be used to either expand or contract the cache. /// In the case of a contraction, the oldest entries will be evicted with their respective /// deletors called before completing. void resize(size_type newSize) { m_capacity = newSize; trim(); } void setCreateCallback(create_callback_type callback) { m_create_callback = callback; } void setDeleteCallback(delete_callback_type callback) { m_delete_callback = callback; } // EASTL extensions const allocator_type& get_allocator() const EA_NOEXCEPT { return m_map.get_allocator(); } allocator_type& get_allocator() EA_NOEXCEPT { return m_map.get_allocator(); } void set_allocator(const allocator_type& allocator) { m_map.set_allocator(allocator); m_list.set_allocator(allocator); } /// Does not reset the callbacks void reset_lose_memory() EA_NOEXCEPT { m_map.reset_lose_memory(); m_list.reset_lose_memory(); } private: inline void map_erase(map_iterator pos) { if (m_delete_callback) m_delete_callback(pos->second.first); m_map.erase(pos); } bool trim() { if (size() <= m_capacity) { return false; // No trim necessary } // We need to trim do { erase_oldest(); } while (m_list.size() > m_capacity); return true; } void make_space() { if (size() == m_capacity) { erase_oldest(); } } private: list_type m_list; map_type m_map; size_type m_capacity; create_callback_type m_create_callback; delete_callback_type m_delete_callback; }; } #endif ================================================ FILE: include/EASTL/bonus/overloaded.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #ifndef EASTL_OVERLOADED_H #define EASTL_OVERLOADED_H #include #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed // improvements in apps as a result. #endif // 4512/4626 - 'class' : assignment operator could not be generated. // This disabling would best be put elsewhere. EA_DISABLE_VC_WARNING(4512 4626); namespace eastl { /////////////////////////////////////////////////////////////////////////// /// overloaded /// /// A helper class that permits you to combine multiple function objects into one. /// Typically, this helper is really handy when visiting an eastl::variant with multiple lambdas. /// Example: /// /// eastl::variant v{42}; /// /// eastl::visit( /// eastl::overloaded{ /// [](const int& x) { std::cout << "Visited an integer: " << x << "\n"; }, // Will reach that lambda with x == 42. /// [](const string& s) { std::cout << "Visited an string: " << s << "\n"; } /// }, /// v /// ); /////////////////////////////////////////////////////////////////////////// template struct overloaded; template struct overloaded : T { template EA_CPP14_CONSTEXPR overloaded(U&& u) : T(eastl::forward(u)) { } using T::operator(); }; template struct overloaded : T, overloaded { template EA_CPP14_CONSTEXPR overloaded(U&& u, V&&... v) : T(eastl::forward(u)), overloaded(eastl::forward(v)...) { } using T::operator(); using overloaded::operator(); }; #ifdef __cpp_deduction_guides template overloaded(T...) -> overloaded; #endif /////////////////////////////////////////////////////////////////////////// /// make_overloaded /// /// Helper function to create an overloaded instance when lacking deduction guides. /// make_overloaded(f1, f2, f3) == overloaded{f1, f2, f3} /////////////////////////////////////////////////////////////////////////// template EA_CPP14_CONSTEXPR overloaded::type...> make_overloaded(T&&... t) { return overloaded::type...>{eastl::forward(t)...}; } } // namespace eastl EA_RESTORE_VC_WARNING(); #endif // EASTL_OVERLOADED_H ================================================ FILE: include/EASTL/bonus/ring_buffer.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // A ring buffer is a FIFO (first-in, first-out) container which acts // much like a queue. The difference is that a ring buffer is implemented // via chasing pointers around a given container instead of like queue // adds to the writes to the end of the container are reads from the begin. // The benefit of a ring buffer is that memory allocations don't occur // and new elements are neither added nor removed from the container. // Elements in the container are simply assigned values in circles around // the container. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_RING_BUFFER_H #define EASTL_RING_BUFFER_H #include #include #include #include #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif namespace eastl { /// EASTL_RING_BUFFER_DEFAULT_NAME /// /// Defines a default container name in the absence of a user-provided name. /// #ifndef EASTL_RING_BUFFER_DEFAULT_NAME #define EASTL_RING_BUFFER_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " ring_buffer" // Unless the user overrides something, this is "EASTL ring_buffer". #endif /// EASTL_RING_BUFFER_DEFAULT_ALLOCATOR /// #ifndef EASTL_RING_BUFFER_DEFAULT_ALLOCATOR #define EASTL_RING_BUFFER_DEFAULT_ALLOCATOR allocator_type(EASTL_RING_BUFFER_DEFAULT_NAME) #endif /// ring_buffer_iterator /// /// We force this iterator to act like a random access iterator even if /// the underlying container doesn't support random access iteration. /// Any BidirectionalIterator can be a RandomAccessIterator; it just /// might be inefficient in some cases. /// template struct ring_buffer_iterator { public: typedef ring_buffer_iterator this_type; typedef T value_type; typedef Pointer pointer; typedef Reference reference; typedef typename Container::size_type size_type; typedef typename Container::difference_type difference_type; typedef typename Container::iterator container_iterator; typedef typename Container::const_iterator container_const_iterator; typedef ring_buffer_iterator iterator; typedef ring_buffer_iterator const_iterator; typedef eastl::random_access_iterator_tag iterator_category; public: Container* mpContainer; container_iterator mContainerIterator; public: ring_buffer_iterator(); ring_buffer_iterator(Container* pContainer, const container_iterator& containerIterator); ring_buffer_iterator(const iterator& x); ring_buffer_iterator& operator=(const iterator& x); reference operator*() const; pointer operator->() const; this_type& operator++(); this_type operator++(int); this_type& operator--(); this_type operator--(int); this_type& operator+=(difference_type n); this_type& operator-=(difference_type n); this_type operator+(difference_type n) const; this_type operator-(difference_type n) const; protected: void increment(difference_type n, eastl::input_iterator_tag); void increment(difference_type n, eastl::random_access_iterator_tag); }; // struct ring_buffer_iterator /// ring_buffer /// /// Implements a ring buffer via a given container type, which would /// typically be a vector or array, though any container which supports /// bidirectional iteration would work. /// /// A ring buffer is a FIFO (first-in, first-out) container which acts /// much like a queue. The difference is that a ring buffer is implemented /// via chasing pointers around a container and moving the read and write /// positions forward (and possibly wrapping around) as the container is /// read and written via pop_front and push_back. /// /// The benefit of a ring buffer is that memory allocations don't occur /// and new elements are neither added nor removed from the container. /// Elements in the container are simply assigned values in circles around /// the container. /// /// ring_buffer is different from other containers -- including adapter /// containers -- in how iteration is done. Iteration of a ring buffer /// starts at the current begin position, proceeds to the end of the underlying /// container, and continues at the begin of the underlying container until /// the ring buffer's current end position. Thus a ring_buffer does /// indeed have a begin and an end, though the values of begin and end /// chase each other around the container. An empty ring_buffer is one /// in which end == begin, and a full ring_buffer is one in which /// end + 1 == begin. /// /// Example of a ring buffer layout, where + indicates queued items: /// ++++++++++--------------------------------+++++++++ /// ^ ^ /// end begin /// /// Empty ring buffer: /// --------------------------------------------------- /// ^ /// begin / end /// /// Full ring buffer. Note that one item is necessarily unused; it is /// analagous to a '\0' at the end of a C string: /// +++++++++++++++++++++++++++++++++++++++++-+++++++++ /// ^^ /// end begin /// /// A push_back operation on a ring buffer assigns the new value to end. /// If there is no more space in the buffer, this will result in begin /// being overwritten and the begin position being moved foward one position. /// The user can use the full() function to detect this condition. /// Note that elements in a ring buffer are not created or destroyed as /// their are added and removed; they are merely assigned. Only on /// container construction and destruction are any elements created and /// destroyed. /// /// The ring buffer can be used in either direction. By this we mean that /// you can use push_back to add items and pop_front to remove them; or you can /// use push_front to add items and pop_back to remove them. You aren't /// limited to these operations; you can push or pop from either side /// arbitrarily and you can insert or erase anywhere in the container. /// /// The ring buffer requires the user to specify a Container type, which /// by default is vector. However, any container with bidirectional iterators /// will work, such as list, deque, string or any of the fixed_* versions /// of these containers, such as fixed_string. Since ring buffer works via copying /// elements instead of allocating and freeing nodes, inserting in the middle /// of a ring buffer based on list (instead of vector) is no more efficient. /// /// To use the ring buffer, its container must be resized to the desired /// ring buffer size. Changing the size of a ring buffer may cause ring /// buffer iterators to invalidate. /// /// An alternative to using a ring buffer is to use a list with a user-created /// node pool and custom allocator. There are various tradeoffs that result from this. /// /// Example usage: /// ring_buffer< int, list > rb(100); /// rb.push_back(1); /// /// Example usage: /// // Example of creating an on-screen debug log that shows 16 /// // strings at a time and scrolls older strings away. /// /// // Create ring buffer of 16 strings. /// ring_buffer< string, vector > debugLogText(16); /// /// // Reserve 128 chars for each line. This can make it so that no /// // runtime memory allocations occur. /// for(vector::iterator it = debugLogText.get_container().begin(), /// itEnd = debugLogText.get_container().end(); it != itEnd; ++it) /// { /// (*it).reserve(128); /// } /// /// // Add a new string, using push_front() and front() instead of /// // push_front(str) in order to avoid creating a temporary str. /// debugLogText.push_front(); /// debugLogText.front() = "Player fired weapon"; /// template , typename Allocator = typename Container::allocator_type> class ring_buffer { public: typedef ring_buffer this_type; typedef Container container_type; typedef Allocator allocator_type; typedef typename Container::value_type value_type; typedef typename Container::reference reference; typedef typename Container::const_reference const_reference; typedef typename Container::size_type size_type; typedef typename Container::difference_type difference_type; typedef typename Container::iterator container_iterator; typedef typename Container::const_iterator container_const_iterator; typedef ring_buffer_iterator iterator; typedef ring_buffer_iterator const_iterator; typedef eastl::reverse_iterator reverse_iterator; typedef eastl::reverse_iterator const_reverse_iterator; public: // We declare public so that global comparison operators can be implemented without adding an inline level and without tripping up GCC 2.x friend declaration failures. GCC (through at least v4.0) is poor at inlining and performance wins over correctness. Container c; // We follow the naming convention established for stack, queue, priority_queue and name this 'c'. This variable must always have a size of at least 1, as even an empty ring_buffer has an unused terminating element. protected: container_iterator mBegin; // We keep track of where our begin and end are by using Container iterators. container_iterator mEnd; size_type mSize; public: // There currently isn't a ring_buffer constructor that specifies an initial size, unlike other containers. explicit ring_buffer(size_type cap = 0); // Construct with an initial capacity (but size of 0). explicit ring_buffer(size_type cap, const allocator_type& allocator); explicit ring_buffer(const Container& x); explicit ring_buffer(const allocator_type& allocator); ring_buffer(const this_type& x); ring_buffer(this_type&& x); ring_buffer(this_type&& x, const allocator_type& allocator); ring_buffer(std::initializer_list ilist, const allocator_type& allocator = EASTL_RING_BUFFER_DEFAULT_ALLOCATOR); // This function sets the capacity to be equal to the size of the initializer list. // No destructor necessary. Default will do. this_type& operator=(const this_type& x); this_type& operator=(std::initializer_list ilist); this_type& operator=(this_type&& x); template void assign(InputIterator first, InputIterator last); void swap(this_type& x); iterator begin() EA_NOEXCEPT; const_iterator begin() const EA_NOEXCEPT; const_iterator cbegin() const EA_NOEXCEPT; iterator end() EA_NOEXCEPT; const_iterator end() const EA_NOEXCEPT; const_iterator cend() const EA_NOEXCEPT; reverse_iterator rbegin() EA_NOEXCEPT; const_reverse_iterator rbegin() const EA_NOEXCEPT; const_reverse_iterator crbegin() const EA_NOEXCEPT; reverse_iterator rend() EA_NOEXCEPT; const_reverse_iterator rend() const EA_NOEXCEPT; const_reverse_iterator crend() const EA_NOEXCEPT; bool empty() const EA_NOEXCEPT; bool full() const EA_NOEXCEPT; size_type size() const EA_NOEXCEPT; size_type capacity() const EA_NOEXCEPT; void resize(size_type n); void set_capacity(size_type n); // Sets the capacity to the given value, including values less than the current capacity. Adjusts the size downward if n < size, by throwing out the oldest elements in the buffer. void reserve(size_type n); // Reserve a given capacity. Doesn't decrease the capacity; it only increases it (for compatibility with other containers' behavior). reference front(); const_reference front() const; reference back(); const_reference back() const; void push_back(const value_type& value); reference push_back(); void push_front(const value_type& value); reference push_front(); void pop_back(); void pop_front(); reference operator[](size_type n); const_reference operator[](size_type n) const; // To consider: // size_type read(value_type* pDestination, size_type nCount); // size_type read(iterator** pPosition1, iterator** pPosition2, size_type& nCount1, size_type& nCount2); /* To do: template reference emplace_front(Args&&... args); template reference emplace_back(Args&&... args); template iterator emplace(const_iterator position, Args&&... args); */ iterator insert(const_iterator position, const value_type& value); void insert(const_iterator position, size_type n, const value_type& value); void insert(const_iterator position, std::initializer_list ilist); template void insert(const_iterator position, InputIterator first, InputIterator last); iterator erase(const_iterator position); iterator erase(const_iterator first, const_iterator last); reverse_iterator erase(const_reverse_iterator position); reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last); void clear(); container_type& get_container(); const container_type& get_container() const; bool validate() const; int validate_iterator(const_iterator i) const; protected: //size_type DoGetSize(eastl::input_iterator_tag) const; //size_type DoGetSize(eastl::random_access_iterator_tag) const; }; // class ring_buffer /////////////////////////////////////////////////////////////////////// // ring_buffer_iterator /////////////////////////////////////////////////////////////////////// template ring_buffer_iterator::ring_buffer_iterator() : mpContainer(NULL), mContainerIterator() { } template ring_buffer_iterator::ring_buffer_iterator(Container* pContainer, const container_iterator& containerIterator) : mpContainer(pContainer), mContainerIterator(containerIterator) { } template ring_buffer_iterator::ring_buffer_iterator(const iterator& x) : mpContainer(x.mpContainer), mContainerIterator(x.mContainerIterator) { } template ring_buffer_iterator& ring_buffer_iterator::operator=(const iterator& x) { mpContainer = x.mpContainer; mContainerIterator = x.mContainerIterator; return *this; } template typename ring_buffer_iterator::reference ring_buffer_iterator::operator*() const { return *mContainerIterator; } template typename ring_buffer_iterator::pointer ring_buffer_iterator::operator->() const { return &*mContainerIterator; } template typename ring_buffer_iterator::this_type& ring_buffer_iterator::operator++() { if(EASTL_UNLIKELY(++mContainerIterator == mpContainer->end())) mContainerIterator = mpContainer->begin(); return *this; } template typename ring_buffer_iterator::this_type ring_buffer_iterator::operator++(int) { const this_type temp(*this); if(EASTL_UNLIKELY(++mContainerIterator == mpContainer->end())) mContainerIterator = mpContainer->begin(); return temp; } template typename ring_buffer_iterator::this_type& ring_buffer_iterator::operator--() { if(EASTL_UNLIKELY(mContainerIterator == mpContainer->begin())) mContainerIterator = mpContainer->end(); --mContainerIterator; return *this; } template typename ring_buffer_iterator::this_type ring_buffer_iterator::operator--(int) { const this_type temp(*this); if(EASTL_UNLIKELY(mContainerIterator == mpContainer->begin())) mContainerIterator = mpContainer->end(); --mContainerIterator; return temp; } template typename ring_buffer_iterator::this_type& ring_buffer_iterator::operator+=(difference_type n) { typedef typename eastl::iterator_traits::iterator_category IC; increment(n, IC()); return *this; } template typename ring_buffer_iterator::this_type& ring_buffer_iterator::operator-=(difference_type n) { typedef typename eastl::iterator_traits::iterator_category IC; increment(-n, IC()); return *this; } template typename ring_buffer_iterator::this_type ring_buffer_iterator::operator+(difference_type n) const { return this_type(*this).operator+=(n); } template typename ring_buffer_iterator::this_type ring_buffer_iterator::operator-(difference_type n) const { return this_type(*this).operator+=(-n); } template void ring_buffer_iterator::increment(difference_type n, eastl::input_iterator_tag) { // n cannot be negative, as input iterators don't support reverse iteration. while(n-- > 0) operator++(); } template void ring_buffer_iterator::increment(difference_type n, eastl::random_access_iterator_tag) { // We make the assumption here that the user is incrementing from a valid // starting position to a valid ending position. Thus *this + n yields a // valid iterator, including if n happens to be a negative value. if(n >= 0) { const difference_type d = mpContainer->end() - mContainerIterator; if(n < d) mContainerIterator += n; else mContainerIterator = mpContainer->begin() + (n - d); } else { // Recall that n and d here will be negative and so the logic here works as intended. const difference_type d = mpContainer->begin() - mContainerIterator; if(n >= d) mContainerIterator += n; else mContainerIterator = mpContainer->end() + (n - d); } } // Random access iterators must support operator + and operator -. // You can only add an integer to an iterator, and you cannot add two iterators. template inline ring_buffer_iterator operator+(ptrdiff_t n, const ring_buffer_iterator& x) { return x + n; // Implement (n + x) in terms of (x + n). } // You can only add an integer to an iterator, but you can subtract two iterators. template inline typename ring_buffer_iterator::difference_type operator-(const ring_buffer_iterator& a, const ring_buffer_iterator& b) { typedef typename ring_buffer_iterator::difference_type difference_type; // To do: If container_iterator is a random access iterator, then do a simple calculation. // Otherwise, we have little choice but to iterate from a to b and count as we go. // See the ring_buffer::size function for an implementation of this. // Iteration implementation: difference_type d = 0; for(ring_buffer_iterator temp(b); temp != a; ++temp) ++d; return d; } // The C++ defect report #179 requires that we support comparisons between const and non-const iterators. // Thus we provide additional template paremeters here to support this. The defect report does not // require us to support comparisons between reverse_iterators and const_reverse_iterators. template inline bool operator==(const ring_buffer_iterator& a, const ring_buffer_iterator& b) { // Perhaps we should compare the container pointer as well. // However, for valid iterators this shouldn't be necessary. return a.mContainerIterator == b.mContainerIterator; } template inline bool operator!=(const ring_buffer_iterator& a, const ring_buffer_iterator& b) { // Perhaps we should compare the container pointer as well. // However, for valid iterators this shouldn't be necessary. return !(a.mContainerIterator == b.mContainerIterator); } // We provide a version of operator!= for the case where the iterators are of the // same type. This helps prevent ambiguity errors in the presence of rel_ops. template inline bool operator!=(const ring_buffer_iterator& a, const ring_buffer_iterator& b) { return !(a.mContainerIterator == b.mContainerIterator); } /////////////////////////////////////////////////////////////////////// // ring_buffer /////////////////////////////////////////////////////////////////////// template ring_buffer::ring_buffer(size_type cap) : c() // Default construction with default allocator for the container. { // To do: This code needs to be amended to deal with possible exceptions // that could occur during the resize call below. // We add one because the element at mEnd is necessarily unused. c.resize(cap + 1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function. mBegin = c.begin(); mEnd = mBegin; mSize = 0; } template ring_buffer::ring_buffer(size_type cap, const allocator_type& allocator) : c(allocator) { // To do: This code needs to be amended to deal with possible exceptions // that could occur during the resize call below. // We add one because the element at mEnd is necessarily unused. c.resize(cap + 1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function. mBegin = c.begin(); mEnd = mBegin; mSize = 0; } template ring_buffer::ring_buffer(const Container& x) : c(x) // This copies elements from x, but unless the user is doing some tricks, the only thing that matters is that c.size() == x.size(). { // To do: This code needs to be amended to deal with possible exceptions // that could occur during the resize call below. if(c.empty()) c.resize(1); mBegin = c.begin(); mEnd = mBegin; mSize = 0; } template ring_buffer::ring_buffer(const allocator_type& allocator) : c(allocator) { // To do: This code needs to be amended to deal with possible exceptions // that could occur during the resize call below. // We add one because the element at mEnd is necessarily unused. c.resize(1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function. mBegin = c.begin(); mEnd = mBegin; mSize = 0; } template ring_buffer::ring_buffer(const this_type& x) : c(x.c) { mBegin = c.begin(); mEnd = mBegin; mSize = x.mSize; eastl::advance(mBegin, eastl::distance(const_cast(x).c.begin(), x.mBegin)); // We can do a simple distance algorithm here, as there will be no wraparound. eastl::advance(mEnd, eastl::distance(const_cast(x).c.begin(), x.mEnd)); } template ring_buffer::ring_buffer(this_type&& x) : c() // Default construction with default allocator for the container. { c.resize(1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function. mBegin = c.begin(); mEnd = mBegin; mSize = 0; swap(x); // We are leaving x in an unusual state by swapping default-initialized members with it, as it won't be usable and can be only destructible. } template ring_buffer::ring_buffer(this_type&& x, const allocator_type& allocator) : c(allocator) { c.resize(1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function. mBegin = c.begin(); mEnd = mBegin; mSize = 0; if(c.get_allocator() == x.c.get_allocator()) swap(x); // We are leaving x in an unusual state by swapping default-initialized members with it, as it won't be usable and can be only destructible. else operator=(x); } template ring_buffer::ring_buffer(std::initializer_list ilist, const allocator_type& allocator) : c(allocator) { c.resize((eastl_size_t)ilist.size() + 1); mBegin = c.begin(); mEnd = mBegin; mSize = 0; assign(ilist.begin(), ilist.end()); } template typename ring_buffer::this_type& ring_buffer::operator=(const this_type& x) { if(&x != this) { c = x.c; mBegin = c.begin(); mEnd = mBegin; mSize = x.mSize; eastl::advance(mBegin, eastl::distance(const_cast(x).c.begin(), x.mBegin)); // We can do a simple distance algorithm here, as there will be no wraparound. eastl::advance(mEnd, eastl::distance(const_cast(x).c.begin(), x.mEnd)); } return *this; } template typename ring_buffer::this_type& ring_buffer::operator=(this_type&& x) { swap(x); return *this; } template typename ring_buffer::this_type& ring_buffer::operator=(std::initializer_list ilist) { assign(ilist.begin(), ilist.end()); return *this; } template template void ring_buffer::assign(InputIterator first, InputIterator last) { // To consider: We can make specializations of this for pointer-based // iterators to PODs and turn the action into a memcpy. clear(); for(; first != last; ++first) push_back(*first); } template void ring_buffer::swap(this_type& x) { if(&x != this) { const difference_type dBegin = eastl::distance(c.begin(), mBegin); // We can do a simple distance algorithm here, as there will be no wraparound. const difference_type dEnd = eastl::distance(c.begin(), mEnd); const difference_type dxBegin = eastl::distance(x.c.begin(), x.mBegin); const difference_type dxEnd = eastl::distance(x.c.begin(), x.mEnd); eastl::swap(c, x.c); eastl::swap(mSize, x.mSize); mBegin = c.begin(); eastl::advance(mBegin, dxBegin); // We can do a simple advance algorithm here, as there will be no wraparound. mEnd = c.begin(); eastl::advance(mEnd, dxEnd); x.mBegin = x.c.begin(); eastl::advance(x.mBegin, dBegin); x.mEnd = x.c.begin(); eastl::advance(x.mEnd, dEnd); } } template typename ring_buffer::iterator ring_buffer::begin() EA_NOEXCEPT { return iterator(&c, mBegin); } template typename ring_buffer::const_iterator ring_buffer::begin() const EA_NOEXCEPT { return const_iterator(const_cast(&c), mBegin); // We trust that the const_iterator will respect const-ness. } template typename ring_buffer::const_iterator ring_buffer::cbegin() const EA_NOEXCEPT { return const_iterator(const_cast(&c), mBegin); // We trust that the const_iterator will respect const-ness. } template typename ring_buffer::iterator ring_buffer::end() EA_NOEXCEPT { return iterator(&c, mEnd); } template typename ring_buffer::const_iterator ring_buffer::end() const EA_NOEXCEPT { return const_iterator(const_cast(&c), mEnd); // We trust that the const_iterator will respect const-ness. } template typename ring_buffer::const_iterator ring_buffer::cend() const EA_NOEXCEPT { return const_iterator(const_cast(&c), mEnd); // We trust that the const_iterator will respect const-ness. } template typename ring_buffer::reverse_iterator ring_buffer::rbegin() EA_NOEXCEPT { return reverse_iterator(iterator(&c, mEnd)); } template typename ring_buffer::const_reverse_iterator ring_buffer::rbegin() const EA_NOEXCEPT { return const_reverse_iterator(const_iterator(const_cast(&c), mEnd)); } template typename ring_buffer::const_reverse_iterator ring_buffer::crbegin() const EA_NOEXCEPT { return const_reverse_iterator(const_iterator(const_cast(&c), mEnd)); } template typename ring_buffer::reverse_iterator ring_buffer::rend() EA_NOEXCEPT { return reverse_iterator(iterator(&c, mBegin)); } template typename ring_buffer::const_reverse_iterator ring_buffer::rend() const EA_NOEXCEPT { return const_reverse_iterator(const_iterator(const_cast(&c), mBegin)); } template typename ring_buffer::const_reverse_iterator ring_buffer::crend() const EA_NOEXCEPT { return const_reverse_iterator(const_iterator(const_cast(&c), mBegin)); } template bool ring_buffer::empty() const EA_NOEXCEPT { return mBegin == mEnd; } template bool ring_buffer::full() const EA_NOEXCEPT { // Implementation that relies on c.size() being a fast operation: // return mSize == (c.size() - 1); // (c.size() - 1) == capacity(); we are attempting to reduce function calls. // Version that has constant speed guarantees, but is still pretty fast. const_iterator afterEnd(end()); ++afterEnd; return afterEnd.mContainerIterator == mBegin; } template typename ring_buffer::size_type ring_buffer::size() const EA_NOEXCEPT { return mSize; // Alternatives: // return eastl::distance(begin(), end()); // return end() - begin(); // This is more direct than using distance(). //typedef typename eastl::iterator_traits::iterator_category IC; //return DoGetSize(IC()); // This is more direct than using iterator math. } /* template typename ring_buffer::size_type ring_buffer::DoGetSize(eastl::input_iterator_tag) const { // We could alternatively just use eastl::distance() here, but we happen to // know that such code would boil down to what we have here, and we might // as well remove function calls where possible. difference_type d = 0; for(const_iterator temp(begin()), tempEnd(end()); temp != tempEnd; ++temp) ++d; return (size_type)d; } */ /* template typename ring_buffer::size_type ring_buffer::DoGetSize(eastl::random_access_iterator_tag) const { // A simpler but less efficient implementation fo this function would be: // return eastl::distance(mBegin, mEnd); // // The calculation of distance here takes advantage of the fact that random // access iterators' distances can be calculated by simple pointer calculation. // Thus the code below boils down to a few subtractions when using a vector, // string, or array as the Container type. // const difference_type dBegin = eastl::distance(const_cast(c).begin(), mBegin); // const_cast here solves a little compiler const difference_type dEnd = eastl::distance(const_cast(c).begin(), mEnd); // argument matching problem. if(dEnd >= dBegin) return dEnd - dBegin; return c.size() - (dBegin - dEnd); } */ namespace Internal { /////////////////////////////////////////////////////////////// // has_overflow_allocator // // returns true_type when the specified container type is an // eastl::fixed_* container and therefore has an overflow // allocator type. // template struct has_overflow_allocator : false_type {}; template struct has_overflow_allocator().get_overflow_allocator())>> : true_type {}; /////////////////////////////////////////////////////////////// // GetFixedContainerCtorAllocator // // eastl::fixed_* containers are only constructible via their // overflow allocator type. This helper select the appropriate // allocator from the specified container. // template ()()> struct GetFixedContainerCtorAllocator { auto& operator()(Container& c) { return c.get_overflow_allocator(); } }; template struct GetFixedContainerCtorAllocator { auto& operator()(Container& c) { return c.get_allocator(); } }; } // namespace Internal /////////////////////////////////////////////////////////////// // ContainerTemporary // // Helper type which prevents utilizing excessive stack space // when creating temporaries when swapping/copying the underlying // ring_buffer container type. // template = EASTL_MAX_STACK_USAGE)> struct ContainerTemporary { Container mContainer; ContainerTemporary(Container& parentContainer) : mContainer(Internal::GetFixedContainerCtorAllocator{}(parentContainer)) { } Container& get() { return mContainer; } }; template struct ContainerTemporary { typename Container::allocator_type* mAllocator; Container* mContainer; ContainerTemporary(Container& parentContainer) : mAllocator(&parentContainer.get_allocator()) , mContainer(new (mAllocator->allocate(sizeof(Container))) Container) { } ~ContainerTemporary() { mContainer->~Container(); mAllocator->deallocate(mContainer, sizeof(Container)); } Container& get() { return *mContainer; } }; template void ring_buffer::resize(size_type n) { // Note that if n > size(), we just move the end position out to // the begin + n, with the data being the old end and the new end // being stale values from the past. This is by design, as the concept // of arbitrarily resizing a ring buffer like this is currently deemed // to be vague in what it intends to do. We can only assume that the // user knows what he is doing and will deal with the stale values. EASTL_ASSERT(c.size() >= 1); const size_type cap = (c.size() - 1); mSize = n; if(n > cap) // If we need to grow in capacity... { // Given that a growing operation will always result in memory allocation, // we currently implement this function via the usage of a temp container. // This makes for a simple implementation, but in some cases it is less // efficient. In particular, if the container is a node-based container like // a (linked) list, this function would be faster if we simply added nodes // to ourself. We would do this by inserting the nodes to be after end() // and adjusting the begin() position if it was after end(). // To do: This code needs to be amended to deal with possible exceptions // that could occur during the resize call below. ContainerTemporary cTemp(c); cTemp.get().resize(n + 1); eastl::copy(begin(), end(), cTemp.get().begin()); eastl::swap(c, cTemp.get()); mBegin = c.begin(); mEnd = mBegin; eastl::advance(mEnd, n); // We can do a simple advance algorithm on this because we know that mEnd will not wrap around. } else // We could do a check here for n != size(), but that would be costly and people don't usually resize things to their same size. { mEnd = mBegin; // eastl::advance(mEnd, n); // We *cannot* use this because there may be wraparound involved. // To consider: Possibly we should implement some more detailed logic to optimize the code here. // We'd need to do different behaviour dending on whether the container iterator type is a // random access iterator or otherwise. while(n--) { if(EASTL_UNLIKELY(++mEnd == c.end())) mEnd = c.begin(); } } } template typename ring_buffer::size_type ring_buffer::capacity() const EA_NOEXCEPT { EASTL_ASSERT(c.size() >= 1); // This is required because even an empty ring_buffer has one unused termination element, somewhat like a \0 at the end of a C string. return (c.size() - 1); // Need to subtract one because the position at mEnd is unused. } template void ring_buffer::set_capacity(size_type n) { const size_type capacity = (c.size() - 1); if(n != capacity) // If we need to change capacity... { ContainerTemporary cTemp(c); cTemp.get().resize(n + 1); iterator itCopyBegin = begin(); if(n < mSize) // If we are shrinking the capacity, to less than our size... { eastl::advance(itCopyBegin, mSize - n); mSize = n; } eastl::copy(itCopyBegin, end(), cTemp.get().begin()); // The begin-end range may in fact be larger than n, in which case values will be overwritten. eastl::swap(c, cTemp.get()); mBegin = c.begin(); mEnd = mBegin; eastl::advance(mEnd, mSize); // We can do a simple advance algorithm on this because we know that mEnd will not wrap around. } } template void ring_buffer::reserve(size_type n) { // We follow the pattern of vector and only do something if n > capacity. EASTL_ASSERT(c.size() >= 1); if(n > (c.size() - 1)) // If we need to grow in capacity... // (c.size() - 1) == capacity(); we are attempting to reduce function calls. { ContainerTemporary cTemp(c); cTemp.get().resize(n + 1); eastl::copy(begin(), end(), cTemp.get().begin()); eastl::swap(c, cTemp.get()); mBegin = c.begin(); mEnd = mBegin; eastl::advance(mEnd, mSize); // We can do a simple advance algorithm on this because we know that mEnd will not wrap around. } } template typename ring_buffer::reference ring_buffer::front() { return *mBegin; } template typename ring_buffer::const_reference ring_buffer::front() const { return *mBegin; } template typename ring_buffer::reference ring_buffer::back() { // return *(end() - 1); // Can't use this because not all iterators support operator-. iterator temp(end()); // To do: Find a way to construct this temporary in the return statement. return *(--temp); // We can do it by making all our containers' iterators support operator-. } template typename ring_buffer::const_reference ring_buffer::back() const { // return *(end() - 1); // Can't use this because not all iterators support operator-. const_iterator temp(end()); // To do: Find a way to construct this temporary in the return statement. return *(--temp); // We can do it by making all our containers' iterators support operator-. } /// A push_back operation on a ring buffer assigns the new value to end. /// If there is no more space in the buffer, this will result in begin /// being overwritten and the begin position being moved foward one position. template void ring_buffer::push_back(const value_type& value) { *mEnd = value; if(++mEnd == c.end()) mEnd = c.begin(); if(mEnd == mBegin) { if(++mBegin == c.end()) mBegin = c.begin(); } else ++mSize; } /// A push_back operation on a ring buffer assigns the new value to end. /// If there is no more space in the buffer, this will result in begin /// being overwritten and the begin position being moved foward one position. template typename ring_buffer::reference ring_buffer::push_back() { // We don't do the following assignment, as the value at mEnd is already constructed; // it is merely possibly not default-constructed. However, the spirit of push_back // is that the user intends to do an assignment or data modification after the // push_back call. The user can always execute *back() = value_type() if he wants. //*mEnd = value_type(); if(++mEnd == c.end()) mEnd = c.begin(); if(mEnd == mBegin) { if(++mBegin == c.end()) mBegin = c.begin(); } else ++mSize; return back(); } template void ring_buffer::pop_back() { EASTL_ASSERT(mEnd != mBegin); // We assume that size() > 0 and thus that there is something to pop. if(EASTL_UNLIKELY(mEnd == c.begin())) mEnd = c.end(); --mEnd; --mSize; } template void ring_buffer::push_front(const value_type& value) { if(EASTL_UNLIKELY(mBegin == c.begin())) mBegin = c.end(); if(--mBegin == mEnd) { if(EASTL_UNLIKELY(mEnd == c.begin())) mEnd = c.end(); --mEnd; } else ++mSize; *mBegin = value; } template typename ring_buffer::reference ring_buffer::push_front() { if(EASTL_UNLIKELY(mBegin == c.begin())) mBegin = c.end(); if(--mBegin == mEnd) { if(EASTL_UNLIKELY(mEnd == c.begin())) mEnd = c.end(); --mEnd; } else ++mSize; // See comments above in push_back for why we don't execute this: // *mBegin = value_type(); return *mBegin; // Same as return front(); } template void ring_buffer::pop_front() { EASTL_ASSERT(mBegin != mEnd); // We assume that mEnd > mBegin and thus that there is something to pop. if(++mBegin == c.end()) mBegin = c.begin(); --mSize; } template typename ring_buffer::reference ring_buffer::operator[](size_type n) { // return *(begin() + n); // Can't use this because not all iterators support operator+. // This should compile to code that is nearly as efficient as that above. // The primary difference is the possible generation of a temporary in this case. iterator temp(begin()); eastl::advance(temp, n); return *(temp.mContainerIterator); } template typename ring_buffer::const_reference ring_buffer::operator[](size_type n) const { // return *(begin() + n); // Can't use this because not all iterators support operator+. // This should compile to code that is nearly as efficient as that above. // The primary difference is the possible generation of a temporary in this case. const_iterator temp(begin()); eastl::advance(temp, n); return *(temp.mContainerIterator); } template typename ring_buffer::iterator ring_buffer::insert(const_iterator position, const value_type& value) { // To consider: It would be faster if we could tell that position was in the first // half of the container and instead of moving things after the position back, // we could move things before the position forward. iterator afterEnd(end()); iterator beforeEnd(afterEnd); ++afterEnd; if(afterEnd.mContainerIterator == mBegin) // If we are at full capacity... --beforeEnd; else push_back(); iterator itPosition(position.mpContainer, position.mContainerIterator); // We merely copy from const_iterator to iterator. eastl::copy_backward(itPosition, beforeEnd, end()); *itPosition = value; return itPosition; } template void ring_buffer::insert(const_iterator position, size_type n, const value_type& value) { // To do: This can be improved with a smarter version. However, // this is a little tricky because we need to deal with the case // whereby n is greater than the size of the container itself. while(n--) insert(position, value); } template void ring_buffer::insert(const_iterator position, std::initializer_list ilist) { insert(position, ilist.begin(), ilist.end()); } template template void ring_buffer::insert(const_iterator position, InputIterator first, InputIterator last) { // To do: This can possibly be improved with a smarter version. // However, this can be tricky if distance(first, last) is greater // than the size of the container itself. for(; first != last; ++first, ++position) insert(position, *first); } template typename ring_buffer::iterator ring_buffer::erase(const_iterator position) { iterator itPosition(position.mpContainer, position.mContainerIterator); // We merely copy from const_iterator to iterator. iterator iNext(itPosition); eastl::copy(++iNext, end(), itPosition); pop_back(); return itPosition; } template typename ring_buffer::iterator ring_buffer::erase(const_iterator first, const_iterator last) { iterator itFirst(first.mpContainer, first.mContainerIterator); // We merely copy from const_iterator to iterator. iterator itLast(last.mpContainer, last.mContainerIterator); typename iterator::difference_type d = eastl::distance(itFirst, itLast); eastl::copy(itLast, end(), itFirst); while(d--) // To do: improve this implementation. pop_back(); return itFirst; } template typename ring_buffer::reverse_iterator ring_buffer::erase(const_reverse_iterator position) { return reverse_iterator(erase((++position).base())); } template typename ring_buffer::reverse_iterator ring_buffer::erase(const_reverse_iterator first, const_reverse_iterator last) { // Version which erases in order from first to last. // difference_type i(first.base() - last.base()); // while(i--) // first = erase(first); // return first; // Version which erases in order from last to first, but is slightly more efficient: return reverse_iterator(erase((++last).base(), (++first).base())); } template void ring_buffer::clear() { // Don't clear the container; we use its valid data for our elements. mBegin = c.begin(); mEnd = c.begin(); mSize = 0; } template typename ring_buffer::container_type& ring_buffer::get_container() { return c; } template const typename ring_buffer::container_type& ring_buffer::get_container() const { return c; } template inline bool ring_buffer::validate() const { if(!c.validate()) // This requires that the container implement the validate function. That pretty much return false; // means that the container is an EASTL container and not a std STL container. if(c.empty()) // c must always have a size of at least 1, as even an empty ring_buffer has an unused terminating element. return false; if(size() > capacity()) return false; if((validate_iterator(begin()) & (isf_valid | isf_current)) != (isf_valid | isf_current)) return false; if((validate_iterator(end()) & (isf_valid | isf_current)) != (isf_valid | isf_current)) return false; // Verify that the size calculation is consistent. size_type n = 0; for(const_iterator i(begin()), iEnd(end()); i != iEnd; ++i) ++n; if(n != mSize) return false; return true; } template inline int ring_buffer::validate_iterator(const_iterator i) const { // To do: Replace this with a more efficient implementation if possible. for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) { if(temp == i) return (isf_valid | isf_current | isf_can_dereference); } if(i == end()) return (isf_valid | isf_current); return isf_none; } /////////////////////////////////////////////////////////////////////// // global operators /////////////////////////////////////////////////////////////////////// template inline bool operator==(const ring_buffer& a, const ring_buffer& b) { return (a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin()); } template inline bool operator<(const ring_buffer& a, const ring_buffer& b) { return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); } template inline bool operator!=(const ring_buffer& a, const ring_buffer& b) { return !(a == b); } template inline bool operator>(const ring_buffer& a, const ring_buffer& b) { return (b < a); } template inline bool operator<=(const ring_buffer& a, const ring_buffer& b) { return !(b < a); } template inline bool operator>=(const ring_buffer& a, const ring_buffer& b) { return !(a < b); } template inline void swap(ring_buffer& a, ring_buffer& b) { a.swap(b); } } // namespace eastl #endif // Header include guard ================================================ FILE: include/EASTL/bonus/sort_extra.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // This file implements additional sort algorithms beyond the basic set. // Included here are: // selection_sort -- Unstable. // shaker_sort -- Stable. // bucket_sort -- Stable. // ////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_SORT_EXTRA_H #define EASTL_SORT_EXTRA_H #include #include #include #include #include #include // For backwards compatibility due to sorts moved from here to sort.h. #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif namespace eastl { /// selection_sort /// /// Implements the SelectionSort algorithm. /// template void selection_sort(ForwardIterator first, ForwardIterator last, StrictWeakOrdering compare) { ForwardIterator iCurrent, iMin; for(; first != last; ++first) { iCurrent = first; iMin = iCurrent; for(++iCurrent; iCurrent != last; ++iCurrent) { if(compare(*iCurrent, *iMin)) { EASTL_VALIDATE_COMPARE(!compare(*iMin, *iCurrent)); // Validate that the compare function is sane. iMin = iCurrent; } } if(first != iMin) eastl::iter_swap(first, iMin); } } // selection_sort template inline void selection_sort(ForwardIterator first, ForwardIterator last) { typedef eastl::less::value_type> Less; eastl::selection_sort(first, last, Less()); } /// shaker_sort /// /// Implements the ShakerSort algorithm, which is a sorting algorithm which /// improves on bubble_sort by sweeping both from left to right and right /// to left, resulting in less iteration. /// template void shaker_sort(BidirectionalIterator first, BidirectionalIterator last, StrictWeakOrdering compare) { if(first != last) { BidirectionalIterator iCurrent, iNext, iLastModified; --last; while(first != last) { iLastModified = first; for(iCurrent = first; iCurrent != last; iCurrent = iNext) { iNext = iCurrent; ++iNext; if(compare(*iNext, *iCurrent)) { EASTL_VALIDATE_COMPARE(!compare(*iCurrent, *iNext)); // Validate that the compare function is sane. iLastModified = iCurrent; eastl::iter_swap(iCurrent, iNext); } } last = iLastModified; if(first != last) { for(iCurrent = last; iCurrent != first; iCurrent = iNext) { iNext = iCurrent; --iNext; if(compare(*iCurrent, *iNext)) { EASTL_VALIDATE_COMPARE(!compare(*iNext, *iCurrent)); // Validate that the compare function is sane. iLastModified = iCurrent; eastl::iter_swap(iNext, iCurrent); } } first = iLastModified; } } } } // shaker_sort template inline void shaker_sort(BidirectionalIterator first, BidirectionalIterator last) { typedef eastl::less::value_type> Less; eastl::shaker_sort(first, last, Less()); } /// bucket_sort /// /// Implements the BucketSort algorithm. /// /// Example usage: /// const size_t kElementRange = 32; /// vector intArray(1000); /// /// for(int i = 0; i < 1000; i++) /// intArray[i] = rand() % kElementRange; /// /// vector< vector > bucketArray(kElementRange); /// bucket_sort(intArray.begin(), intArray.end(), bucketArray, eastl::hash_use_self()); /// template struct hash_use_self { T operator()(const T& x) const { return x; } }; // Requires buckeyArray to be an array of arrays with a size equal to the range of values // returned by the hash function. The hash function is required to return a unique value // for each uniquely sorted element. Usually the way this is done is the elements are // integers of a limited range (e.g. 0-64) and the hash function returns the element value // itself. If you had a case where all elements were always even numbers (e.g. 0-128), // you could use a custom hash function that returns (element value / 2). // // The user is required to provide an empty bucketArray to this function. This function returns // with the bucketArray non-empty. This function doesn't clear the bucketArray because that takes // time and the user might not need it to be cleared, at least at that time. // template void bucket_sort(ForwardIterator first, ForwardIterator last, ContainerArray& bucketArray, HashFunction hash /*= hash_use_self*/) { for(ForwardIterator iInput = first; iInput != last; ++iInput) bucketArray[hash(*iInput)].push_back(*iInput); for(typename ContainerArray::const_iterator iBucket = bucketArray.begin(); iBucket != bucketArray.end(); ++iBucket) first = eastl::copy((*iBucket).begin(), (*iBucket).end(), first); } } // namespace eastl #endif // Header include guard ================================================ FILE: include/EASTL/bonus/tuple_vector.h ================================================ /////////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // tuple_vector is a data container that is designed to abstract and simplify // the handling of a "structure of arrays" layout of data in memory. In // particular, it mimics the interface of vector, including functionality to do // inserts, erases, push_backs, and random-access. It also provides a // RandomAccessIterator and corresponding functionality, making it compatible // with most STL (and STL-esque) algorithms such as ranged-for loops, find_if, // remove_if, or sort. // When used or applied properly, this container can improve performance of // some algorithms through cache-coherent data accesses or allowing for // sensible SIMD programming, while keeping the structure of a single // container, to permit a developer to continue to use existing algorithms in // STL and the like. // // Consult doc/Bonus/tuple_vector_readme.md for more information. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_TUPLEVECTOR_H #define EASTL_TUPLEVECTOR_H #include #include #include #include #include #include #if EASTL_EXCEPTIONS_ENABLED #include #endif #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif EA_DISABLE_VC_WARNING(4244) // warning C4244: 'conversion from '___' to '___', possible loss of data EA_DISABLE_VC_WARNING(4623) // warning C4623: default constructor was implicitly defined as deleted EA_DISABLE_VC_WARNING(4625) // warning C4625: copy constructor was implicitly defined as deleted EA_DISABLE_VC_WARNING(4510) // warning C4510: default constructor could not be generated namespace eastl { /// EASTL_TUPLE_VECTOR_DEFAULT_NAME /// /// Defines a default container name in the absence of a user-provided name. /// #ifndef EASTL_TUPLE_VECTOR_DEFAULT_NAME #define EASTL_TUPLE_VECTOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " tuple-vector" // Unless the user overrides something, this is "EASTL tuple-vector". #endif /// EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR /// #ifndef EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR #define EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR allocator_type(EASTL_TUPLE_VECTOR_DEFAULT_NAME) #endif namespace TupleVecInternal { // forward declarations template struct tuplevec_element; template using tuplevec_element_t = typename tuplevec_element::type; template struct TupleTypes {}; template class TupleVecImpl; template struct TupleRecurser; template struct TupleIndexRecurser; template struct TupleVecLeaf; template struct TupleVecIter; // tuplevec_element helper to be able to isolate a type given an index template struct tuplevec_element { static_assert(I != I, "tuplevec_element index out of range"); }; template struct tuplevec_element<0, T, Ts...> { tuplevec_element() = delete; // tuplevec_element should only be used for compile-time assistance, and never be instantiated typedef T type; }; template struct tuplevec_element { typedef tuplevec_element_t type; }; // attempt to isolate index given a type template struct tuplevec_index { }; template struct tuplevec_index> { typedef void DuplicateTypeCheck; tuplevec_index() = delete; // tuplevec_index should only be used for compile-time assistance, and never be instantiated static const eastl_size_t index = 0; }; template struct tuplevec_index> { typedef int DuplicateTypeCheck; static_assert(is_void>::DuplicateTypeCheck>::value, "duplicate type T in tuple_vector::get(); unique types must be provided in declaration, or only use get()"); static const eastl_size_t index = 0; }; template struct tuplevec_index> { typedef typename tuplevec_index>::DuplicateTypeCheck DuplicateTypeCheck; static const eastl_size_t index = tuplevec_index>::index + 1; }; template struct tuplevec_index> : public tuplevec_index> { }; // helper to calculate the layout of the allocations for the tuple of types (esp. to take alignment into account) template <> struct TupleRecurser<> { typedef eastl_size_t size_type; // This class should never be instantiated. This is just a helper for working with static functions when anonymous functions don't work // and provide some other utilities TupleRecurser() = delete; static EA_CONSTEXPR size_type GetTotalAlignment() { return 0; } static EA_CONSTEXPR size_type GetTotalAllocationSize(size_type capacity, size_type offset) { EA_UNUSED(capacity); return offset; } template static pair DoAllocate(TupleVecImpl &vec, void** ppNewLeaf, size_type capacity, size_type offset) { EA_UNUSED(ppNewLeaf); // If n is zero, then we allocate no memory and just return NULL. // This is fine, as our default ctor initializes with NULL pointers. size_type alignment = TupleRecurser::GetTotalAlignment(); void* ptr = capacity ? allocate_memory(vec.get_allocator(), offset, alignment, 0) : nullptr; #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY((size_t)ptr & (alignment - 1)) != 0) { EASTL_FAIL_MSG("tuple_vector::DoAllocate -- memory not alignment at requested alignment"); } #endif return make_pair(ptr, offset); } template static void SetNewData(TupleVecImplType &vec, void* pData, size_type capacity, size_type offset) { EA_UNUSED(vec); EA_UNUSED(pData); EA_UNUSED(capacity); EA_UNUSED(offset); } }; template struct TupleRecurser : TupleRecurser { typedef eastl_size_t size_type; static EA_CONSTEXPR size_type GetTotalAlignment() { return max(static_cast(alignof(T)), TupleRecurser::GetTotalAlignment()); } static EA_CONSTEXPR size_type GetTotalAllocationSize(size_type capacity, size_type offset) { return TupleRecurser::GetTotalAllocationSize(capacity, CalculateAllocationSize(offset, capacity)); } template static pair DoAllocate(TupleVecImpl &vec, void** ppNewLeaf, size_type capacity, size_type offset) { size_type allocationOffset = CalculatAllocationOffset(offset); size_type allocationSize = CalculateAllocationSize(offset, capacity); pair allocation = TupleRecurser::template DoAllocate( vec, ppNewLeaf, capacity, allocationSize); ppNewLeaf[I] = (void*)((uintptr_t)(allocation.first) + allocationOffset); return allocation; } template static void SetNewData(TupleVecImplType &vec, void* pData, size_type capacity, size_type offset) { size_type allocationOffset = CalculatAllocationOffset(offset); size_type allocationSize = CalculateAllocationSize(offset, capacity); vec.TupleVecLeaf::mpData = (T*)((uintptr_t)pData + allocationOffset); TupleRecurser::template SetNewData(vec, pData, capacity, allocationSize); } private: static EA_CONSTEXPR size_type CalculateAllocationSize(size_type offset, size_type capacity) { return CalculatAllocationOffset(offset) + sizeof(T) * capacity; } static EA_CONSTEXPR size_type CalculatAllocationOffset(size_type offset) { return (offset + alignof(T) - 1) & (~alignof(T) + 1); } }; template struct TupleVecLeaf { typedef eastl_size_t size_type; void DoUninitializedMoveAndDestruct(const size_type begin, const size_type end, T* pDest) { T* pBegin = mpData + begin; T* pEnd = mpData + end; eastl::uninitialized_move_if_noexcept(pBegin, pEnd, pDest); eastl::destruct(pBegin, pEnd); } void DoInsertAndFill(size_type pos, size_type n, size_type numElements, const T& arg) { T* pDest = mpData + pos; T* pDataEnd = mpData + numElements; const T temp = arg; const size_type nExtra = (numElements - pos); if (n < nExtra) // If the inserted values are entirely within initialized memory (i.e. are before mpEnd)... { eastl::uninitialized_move(pDataEnd - n, pDataEnd, pDataEnd); eastl::move_backward(pDest, pDataEnd - n, pDataEnd); // We need move_backward because of potential overlap issues. eastl::fill(pDest, pDest + n, temp); } else { eastl::uninitialized_fill_n(pDataEnd, n - nExtra, temp); eastl::uninitialized_move(pDest, pDataEnd, pDataEnd + n - nExtra); eastl::fill(pDest, pDataEnd, temp); } } void DoInsertRange(T* pSrcBegin, T* pSrcEnd, T* pDestBegin, size_type numDataElements) { size_type pos = static_cast(pDestBegin - mpData); size_type n = static_cast(pSrcEnd - pSrcBegin); T* pDataEnd = mpData + numDataElements; const size_type nExtra = numDataElements - pos; if (n < nExtra) // If the inserted values are entirely within initialized memory (i.e. are before mpEnd)... { eastl::uninitialized_move(pDataEnd - n, pDataEnd, pDataEnd); eastl::move_backward(pDestBegin, pDataEnd - n, pDataEnd); // We need move_backward because of potential overlap issues. eastl::copy(pSrcBegin, pSrcEnd, pDestBegin); } else { eastl::uninitialized_copy(pSrcEnd - (n - nExtra), pSrcEnd, pDataEnd); eastl::uninitialized_move(pDestBegin, pDataEnd, pDataEnd + n - nExtra); eastl::copy(pSrcBegin, pSrcEnd - (n - nExtra), pDestBegin); } } void DoInsertValue(size_type pos, size_type numElements, T&& arg) { T* pDest = mpData + pos; T* pDataEnd = mpData + numElements; eastl::uninitialized_move(pDataEnd - 1, pDataEnd, pDataEnd); eastl::move_backward(pDest, pDataEnd - 1, pDataEnd); // We need move_backward because of potential overlap issues. eastl::destruct(pDest); ::new (pDest) T(eastl::forward(arg)); } T* mpData = nullptr; }; // swallow allows for parameter pack expansion of arguments as means of expanding operations performed // if a void function is used for operation expansion, it should be wrapped in (..., 0) so that the compiler // thinks it has a parameter to pass into the function template void swallow(Ts&&...) { } inline bool variadicAnd(bool cond) { return cond; } inline bool variadicAnd(bool cond, bool conds...) { return cond && variadicAnd(conds); } // Helper struct to check for strict compatibility between two iterators, whilst still allowing for // conversion between TupleVecImpl::iterator and TupleVecImpl::const_iterator. template struct TupleVecIterCompatibleImpl : public false_type { }; template<> struct TupleVecIterCompatibleImpl, TupleTypes<>> : public true_type { }; template struct TupleVecIterCompatibleImpl, TupleTypes> : public integral_constant, TupleTypes>::value && is_same::type, typename remove_const::type>::value > { }; template struct TupleVecIterCompatible; template struct TupleVecIterCompatible, TupleTypes> : public TupleVecIterCompatibleImpl, TupleTypes> { }; // The Iterator operates by storing a persistent index internally, // and resolving the tuple of pointers to the various parts of the original tupleVec when dereferenced. // While resolving the tuple is a non-zero operation, it consistently generated better code than the alternative of // storing - and harmoniously updating on each modification - a full tuple of pointers to the tupleVec's data template struct TupleVecIter, Ts...> { private: typedef TupleVecIter, Ts...> this_type; typedef eastl_size_t size_type; template friend struct TupleVecIter; template friend class TupleVecImpl; template friend class move_iterator; public: typedef eastl::random_access_iterator_tag iterator_category; typedef tuple value_type; typedef eastl_size_t difference_type; typedef tuple pointer; typedef tuple reference; TupleVecIter() = default; template TupleVecIter(VecImplType* tupleVec, size_type index) : mIndex(index) , mpData{(void*)tupleVec->TupleVecLeaf::mpData...} { } template , TupleTypes>::value, bool>::type> TupleVecIter(const TupleVecIter& other) : mIndex(other.mIndex) , mpData{other.mpData[Indices]...} { } bool operator==(const TupleVecIter& other) const { return mIndex == other.mIndex && mpData[0] == other.mpData[0]; } bool operator!=(const TupleVecIter& other) const { return mIndex != other.mIndex || mpData[0] != other.mpData[0]; } reference operator*() const { return MakeReference(); } this_type& operator++() { ++mIndex; return *this; } this_type operator++(int) { this_type temp = *this; ++mIndex; return temp; } this_type& operator--() { --mIndex; return *this; } this_type operator--(int) { this_type temp = *this; --mIndex; return temp; } this_type& operator+=(difference_type n) { mIndex += n; return *this; } this_type operator+(difference_type n) const { this_type temp = *this; return temp += n; } friend this_type operator+(difference_type n, const this_type& rhs) { this_type temp = rhs; return temp += n; } this_type& operator-=(difference_type n) { mIndex -= n; return *this; } this_type operator-(difference_type n) const { this_type temp = *this; return temp -= n; } friend this_type operator-(difference_type n, const this_type& rhs) { this_type temp = rhs; return temp -= n; } difference_type operator-(const this_type& rhs) const { return mIndex - rhs.mIndex; } bool operator<(const this_type& rhs) const { return mIndex < rhs.mIndex; } bool operator>(const this_type& rhs) const { return mIndex > rhs.mIndex; } bool operator>=(const this_type& rhs) const { return mIndex >= rhs.mIndex; } bool operator<=(const this_type& rhs) const { return mIndex <= rhs.mIndex; } reference operator[](const size_type n) const { return *(*this + n); } private: value_type MakeValue() const { return value_type(((Ts*)mpData[Indices])[mIndex]...); } reference MakeReference() const { return reference(((Ts*)mpData[Indices])[mIndex]...); } pointer MakePointer() const { return pointer(&((Ts*)mpData[Indices])[mIndex]...); } size_type mIndex = 0; const void* mpData[sizeof...(Ts)]; }; // TupleVecImpl template class TupleVecImpl, Ts...> : public TupleVecLeaf... { typedef Allocator allocator_type; typedef index_sequence index_sequence_type; typedef TupleVecImpl this_type; typedef TupleVecImpl const_this_type; public: typedef TupleVecInternal::TupleVecIter iterator; typedef TupleVecInternal::TupleVecIter const_iterator; typedef eastl::reverse_iterator reverse_iterator; typedef eastl::reverse_iterator const_reverse_iterator; typedef eastl_size_t size_type; typedef eastl::tuple value_tuple; typedef eastl::tuple reference_tuple; typedef eastl::tuple const_reference_tuple; typedef eastl::tuple ptr_tuple; typedef eastl::tuple const_ptr_tuple; typedef eastl::tuple rvalue_tuple; TupleVecImpl() : mDataSizeAndAllocator(0, EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) {} TupleVecImpl(const allocator_type& allocator) : mDataSizeAndAllocator(0, allocator) {} TupleVecImpl(this_type&& x) : mDataSizeAndAllocator(0, eastl::move(x.get_allocator())) { swap(x); } TupleVecImpl(this_type&& x, const Allocator& allocator) : mDataSizeAndAllocator(0, allocator) { if (get_allocator() == x.get_allocator()) // If allocators are equivalent, then we can safely swap member-by-member { swap(x); } else { this_type temp(eastl::move(*this)); temp.swap(x); } } TupleVecImpl(const this_type& x) : mDataSizeAndAllocator(0, x.get_allocator()) { DoInitFromIterator(x.begin(), x.end()); } template TupleVecImpl(const TupleVecImpl& x, const Allocator& allocator) : mDataSizeAndAllocator(0, allocator) { DoInitFromIterator(x.begin(), x.end()); } template TupleVecImpl(move_iterator begin, move_iterator end, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) : mDataSizeAndAllocator(0, allocator) { DoInitFromIterator(begin, end); } TupleVecImpl(const_iterator begin, const_iterator end, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) : mDataSizeAndAllocator(0, allocator ) { DoInitFromIterator(begin, end); } TupleVecImpl(size_type n, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) : mDataSizeAndAllocator(0, allocator) { DoInitDefaultFill(n); } TupleVecImpl(size_type n, const Ts&... args) : mDataSizeAndAllocator(0, EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) { DoInitFillArgs(n, args...); } TupleVecImpl(size_type n, const Ts&... args, const allocator_type& allocator) : mDataSizeAndAllocator(0, allocator) { DoInitFillArgs(n, args...); } TupleVecImpl(size_type n, const_reference_tuple tup, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) : mDataSizeAndAllocator(0, allocator) { DoInitFillTuple(n, tup); } TupleVecImpl(const value_tuple* first, const value_tuple* last, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) : mDataSizeAndAllocator(0, allocator) { DoInitFromTupleArray(first, last); } TupleVecImpl(std::initializer_list iList, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) : mDataSizeAndAllocator(0, allocator) { DoInitFromTupleArray(iList.begin(), iList.end()); } protected: // ctor to provide a pre-allocated field of data that the container will own, specifically for fixed_tuple_vector TupleVecImpl(const allocator_type& allocator, void* pData, size_type capacity, size_type dataSize) : mpData(pData), mNumCapacity(capacity), mDataSizeAndAllocator(dataSize, allocator) { TupleRecurser::template SetNewData(*this, mpData, mNumCapacity, 0); } public: ~TupleVecImpl() { swallow((eastl::destruct(TupleVecLeaf::mpData, TupleVecLeaf::mpData + mNumElements), 0)...); if (mpData) EASTLFree(get_allocator(), mpData, internalDataSize()); } void assign(size_type n, const Ts&... args) { if (n > mNumCapacity) { this_type temp(n, args..., get_allocator()); // We have little choice but to reallocate with new memory. swap(temp); } else if (n > mNumElements) // If n > mNumElements ... { size_type oldNumElements = mNumElements; swallow((eastl::fill(TupleVecLeaf::mpData, TupleVecLeaf::mpData + oldNumElements, args), 0)...); swallow((eastl::uninitialized_fill(TupleVecLeaf::mpData + oldNumElements, TupleVecLeaf::mpData + n, args), 0)...); mNumElements = n; } else // else 0 <= n <= mNumElements { swallow((eastl::fill(TupleVecLeaf::mpData, TupleVecLeaf::mpData + n, args), 0)...); erase(begin() + n, end()); } } void assign(const_iterator first, const_iterator last) { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(!validate_iterator_pair(first, last))) EASTL_FAIL_MSG("tuple_vector::assign -- invalid iterator pair"); #endif size_type newNumElements = last - first; if (newNumElements > mNumCapacity) { this_type temp(first, last, get_allocator()); swap(temp); } else { const void* ppOtherData[sizeof...(Ts)] = {first.mpData[Indices]...}; size_type firstIdx = first.mIndex; size_type lastIdx = last.mIndex; if (newNumElements > mNumElements) // If n > mNumElements ... { size_type oldNumElements = mNumElements; swallow((eastl::copy((Ts*)(ppOtherData[Indices]) + firstIdx, (Ts*)(ppOtherData[Indices]) + firstIdx + oldNumElements, TupleVecLeaf::mpData), 0)...); swallow((eastl::uninitialized_copy((Ts*)(ppOtherData[Indices]) + firstIdx + oldNumElements, (Ts*)(ppOtherData[Indices]) + lastIdx, TupleVecLeaf::mpData + oldNumElements), 0)...); mNumElements = newNumElements; } else // else 0 <= n <= mNumElements { swallow((eastl::copy((Ts*)(ppOtherData[Indices]) + firstIdx, (Ts*)(ppOtherData[Indices]) + lastIdx, TupleVecLeaf::mpData), 0)...); erase(begin() + newNumElements, end()); } } } void assign(const value_tuple* first, const value_tuple* last) { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(first > last || first == nullptr || last == nullptr)) EASTL_FAIL_MSG("tuple_vector::assign from tuple array -- invalid ptrs"); #endif size_type newNumElements = static_cast(last - first); if (newNumElements > mNumCapacity) { this_type temp(first, last, get_allocator()); swap(temp); } else { if (newNumElements > mNumElements) // If n > mNumElements ... { size_type oldNumElements = mNumElements; DoCopyFromTupleArray(begin(), begin() + oldNumElements, first); DoUninitializedCopyFromTupleArray(begin() + oldNumElements, begin() + newNumElements, first + oldNumElements); mNumElements = newNumElements; } else // else 0 <= n <= mNumElements { DoCopyFromTupleArray(begin(), begin() + newNumElements, first); erase(begin() + newNumElements, end()); } } } reference_tuple push_back() { size_type oldNumElements = mNumElements; size_type newNumElements = oldNumElements + 1; size_type oldNumCapacity = mNumCapacity; mNumElements = newNumElements; DoGrow(oldNumElements, oldNumCapacity, newNumElements); swallow((detail::allocator_construct(get_allocator(), TupleVecLeaf::mpData + oldNumElements), 0)...); return back(); } void push_back(const Ts&... args) { size_type oldNumElements = mNumElements; size_type newNumElements = oldNumElements + 1; size_type oldNumCapacity = mNumCapacity; mNumElements = newNumElements; DoGrow(oldNumElements, oldNumCapacity, newNumElements); swallow((detail::allocator_construct(get_allocator(), TupleVecLeaf::mpData + oldNumElements, args), 0)...); } void push_back_uninitialized() { size_type oldNumElements = mNumElements; size_type newNumElements = oldNumElements + 1; size_type oldNumCapacity = mNumCapacity; mNumElements = newNumElements; DoGrow(oldNumElements, oldNumCapacity, newNumElements); } reference_tuple emplace_back(Ts&&... args) { size_type oldNumElements = mNumElements; size_type newNumElements = oldNumElements + 1; size_type oldNumCapacity = mNumCapacity; mNumElements = newNumElements; DoGrow(oldNumElements, oldNumCapacity, newNumElements); swallow((detail::allocator_construct(get_allocator(), TupleVecLeaf::mpData + oldNumElements, eastl::forward(args)), 0)...); return back(); } iterator emplace(const_iterator pos, Ts&&... args) { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none)) EASTL_FAIL_MSG("tuple_vector::emplace -- invalid iterator"); #endif size_type firstIdx = pos - cbegin(); size_type oldNumElements = mNumElements; size_type newNumElements = mNumElements + 1; size_type oldNumCapacity = mNumCapacity; mNumElements = newNumElements; if (newNumElements > oldNumCapacity || firstIdx != oldNumElements) { if (newNumElements > oldNumCapacity) { const size_type newCapacity = eastl::max(GetNewCapacity(oldNumCapacity), newNumElements); void* ppNewLeaf[sizeof...(Ts)]; pair allocation = TupleRecurser::template DoAllocate( *this, ppNewLeaf, newCapacity, 0); swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( 0, firstIdx, (Ts*)ppNewLeaf[Indices]), 0)...); swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( firstIdx, oldNumElements, (Ts*)ppNewLeaf[Indices] + firstIdx + 1), 0)...); swallow(::new ((Ts*)ppNewLeaf[Indices] + firstIdx) Ts(eastl::forward(args))...); swallow(TupleVecLeaf::mpData = (Ts*)ppNewLeaf[Indices]...); EASTLFree(get_allocator(), mpData, internalDataSize()); mpData = allocation.first; mNumCapacity = newCapacity; internalDataSize() = allocation.second; } else { swallow((TupleVecLeaf::DoInsertValue(firstIdx, oldNumElements, eastl::forward(args)), 0)...); } } else { swallow(::new (TupleVecLeaf::mpData + oldNumElements) Ts(eastl::forward(args))...); } return begin() + firstIdx; } iterator insert(const_iterator pos, size_type n, const Ts&... args) { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none)) EASTL_FAIL_MSG("tuple_vector::insert -- invalid iterator"); #endif size_type firstIdx = pos - cbegin(); size_type lastIdx = firstIdx + n; size_type oldNumElements = mNumElements; size_type newNumElements = mNumElements + n; size_type oldNumCapacity = mNumCapacity; mNumElements = newNumElements; if (newNumElements > oldNumCapacity || firstIdx != oldNumElements) { if (newNumElements > oldNumCapacity) { const size_type newCapacity = eastl::max(GetNewCapacity(oldNumCapacity), newNumElements); void* ppNewLeaf[sizeof...(Ts)]; pair allocation = TupleRecurser::template DoAllocate( *this, ppNewLeaf, newCapacity, 0); swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( 0, firstIdx, (Ts*)ppNewLeaf[Indices]), 0)...); swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( firstIdx, oldNumElements, (Ts*)ppNewLeaf[Indices] + lastIdx), 0)...); swallow((eastl::uninitialized_fill((Ts*)ppNewLeaf[Indices] + firstIdx, (Ts*)ppNewLeaf[Indices] + lastIdx, args), 0)...); swallow(TupleVecLeaf::mpData = (Ts*)ppNewLeaf[Indices]...); EASTLFree(get_allocator(), mpData, internalDataSize()); mpData = allocation.first; mNumCapacity = newCapacity; internalDataSize() = allocation.second; } else { swallow((TupleVecLeaf::DoInsertAndFill(firstIdx, n, oldNumElements, args), 0)...); } } else { swallow((eastl::uninitialized_fill(TupleVecLeaf::mpData + oldNumElements, TupleVecLeaf::mpData + newNumElements, args), 0)...); } return begin() + firstIdx; } iterator insert(const_iterator pos, const_iterator first, const_iterator last) { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none)) EASTL_FAIL_MSG("tuple_vector::insert -- invalid iterator"); if (EASTL_UNLIKELY(!validate_iterator_pair(first, last))) EASTL_FAIL_MSG("tuple_vector::insert -- invalid iterator pair"); #endif size_type posIdx = pos - cbegin(); size_type firstIdx = first.mIndex; size_type lastIdx = last.mIndex; size_type numToInsert = last - first; size_type oldNumElements = mNumElements; size_type newNumElements = oldNumElements + numToInsert; size_type oldNumCapacity = mNumCapacity; mNumElements = newNumElements; const void* ppOtherData[sizeof...(Ts)] = {first.mpData[Indices]...}; if (newNumElements > oldNumCapacity || posIdx != oldNumElements) { if (newNumElements > oldNumCapacity) { const size_type newCapacity = eastl::max(GetNewCapacity(oldNumCapacity), newNumElements); void* ppNewLeaf[sizeof...(Ts)]; pair allocation = TupleRecurser::template DoAllocate( *this, ppNewLeaf, newCapacity, 0); swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( 0, posIdx, (Ts*)ppNewLeaf[Indices]), 0)...); swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( posIdx, oldNumElements, (Ts*)ppNewLeaf[Indices] + posIdx + numToInsert), 0)...); swallow((eastl::uninitialized_copy((Ts*)(ppOtherData[Indices]) + firstIdx, (Ts*)(ppOtherData[Indices]) + lastIdx, (Ts*)ppNewLeaf[Indices] + posIdx), 0)...); swallow(TupleVecLeaf::mpData = (Ts*)ppNewLeaf[Indices]...); EASTLFree(get_allocator(), mpData, internalDataSize()); mpData = allocation.first; mNumCapacity = newCapacity; internalDataSize() = allocation.second; } else { swallow((TupleVecLeaf::DoInsertRange( (Ts*)(ppOtherData[Indices]) + firstIdx, (Ts*)(ppOtherData[Indices]) + lastIdx, TupleVecLeaf::mpData + posIdx, oldNumElements), 0)...); } } else { swallow((eastl::uninitialized_copy((Ts*)(ppOtherData[Indices]) + firstIdx, (Ts*)(ppOtherData[Indices]) + lastIdx, TupleVecLeaf::mpData + posIdx), 0)...); } return begin() + posIdx; } iterator insert(const_iterator pos, const value_tuple* first, const value_tuple* last) { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none)) EASTL_FAIL_MSG("tuple_vector::insert -- invalid iterator"); if (EASTL_UNLIKELY(first > last || first == nullptr || last == nullptr)) EASTL_FAIL_MSG("tuple_vector::insert -- invalid source pointers"); #endif size_type posIdx = pos - cbegin(); size_type numToInsert = static_cast(last - first); size_type oldNumElements = mNumElements; size_type newNumElements = oldNumElements + numToInsert; size_type oldNumCapacity = mNumCapacity; mNumElements = newNumElements; if (newNumElements > oldNumCapacity || posIdx != oldNumElements) { if (newNumElements > oldNumCapacity) { const size_type newCapacity = eastl::max(GetNewCapacity(oldNumCapacity), newNumElements); void* ppNewLeaf[sizeof...(Ts)]; pair allocation = TupleRecurser::template DoAllocate( *this, ppNewLeaf, newCapacity, 0); swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( 0, posIdx, (Ts*)ppNewLeaf[Indices]), 0)...); swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( posIdx, oldNumElements, (Ts*)ppNewLeaf[Indices] + posIdx + numToInsert), 0)...); swallow(TupleVecLeaf::mpData = (Ts*)ppNewLeaf[Indices]...); // Do this after mpData is updated so that we can use new iterators DoUninitializedCopyFromTupleArray(begin() + posIdx, begin() + posIdx + numToInsert, first); EASTLFree(get_allocator(), mpData, internalDataSize()); mpData = allocation.first; mNumCapacity = newCapacity; internalDataSize() = allocation.second; } else { const size_type nExtra = oldNumElements - posIdx; void* ppDataEnd[sizeof...(Ts)] = { (void*)(TupleVecLeaf::mpData + oldNumElements)... }; void* ppDataBegin[sizeof...(Ts)] = { (void*)(TupleVecLeaf::mpData + posIdx)... }; if (numToInsert < nExtra) // If the inserted values are entirely within initialized memory (i.e. are before mpEnd)... { swallow((eastl::uninitialized_move((Ts*)ppDataEnd[Indices] - numToInsert, (Ts*)ppDataEnd[Indices], (Ts*)ppDataEnd[Indices]), 0)...); // We need move_backward because of potential overlap issues. swallow((eastl::move_backward((Ts*)ppDataBegin[Indices], (Ts*)ppDataEnd[Indices] - numToInsert, (Ts*)ppDataEnd[Indices]), 0)...); DoCopyFromTupleArray(pos, pos + numToInsert, first); } else { size_type numToInitialize = numToInsert - nExtra; swallow((eastl::uninitialized_move((Ts*)ppDataBegin[Indices], (Ts*)ppDataEnd[Indices], (Ts*)ppDataEnd[Indices] + numToInitialize), 0)...); DoCopyFromTupleArray(pos, begin() + oldNumElements, first); DoUninitializedCopyFromTupleArray(begin() + oldNumElements, pos + numToInsert, first + nExtra); } } } else { DoUninitializedCopyFromTupleArray(pos, pos + numToInsert, first); } return begin() + posIdx; } iterator erase(const_iterator first, const_iterator last) { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(validate_iterator(first) == isf_none || validate_iterator(last) == isf_none)) EASTL_FAIL_MSG("tuple_vector::erase -- invalid iterator"); if (EASTL_UNLIKELY(!validate_iterator_pair(first, last))) EASTL_FAIL_MSG("tuple_vector::erase -- invalid iterator pair"); #endif if (first != last) { size_type firstIdx = first - cbegin(); size_type lastIdx = last - cbegin(); size_type oldNumElements = mNumElements; size_type newNumElements = oldNumElements - (lastIdx - firstIdx); mNumElements = newNumElements; swallow((eastl::move(TupleVecLeaf::mpData + lastIdx, TupleVecLeaf::mpData + oldNumElements, TupleVecLeaf::mpData + firstIdx), 0)...); swallow((eastl::destruct(TupleVecLeaf::mpData + newNumElements, TupleVecLeaf::mpData + oldNumElements), 0)...); } return begin() + first.mIndex; } iterator erase_unsorted(const_iterator pos) { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none)) EASTL_FAIL_MSG("tuple_vector::erase_unsorted -- invalid iterator"); #endif size_type oldNumElements = mNumElements; size_type newNumElements = oldNumElements - 1; mNumElements = newNumElements; swallow((eastl::move(TupleVecLeaf::mpData + newNumElements, TupleVecLeaf::mpData + oldNumElements, TupleVecLeaf::mpData + (pos - begin())), 0)...); swallow((eastl::destruct(TupleVecLeaf::mpData + newNumElements, TupleVecLeaf::mpData + oldNumElements), 0)...); return begin() + pos.mIndex; } void resize(size_type n) { size_type oldNumElements = mNumElements; size_type oldNumCapacity = mNumCapacity; mNumElements = n; if (n > oldNumElements) { if (n > oldNumCapacity) { DoReallocate(oldNumElements, eastl::max(GetNewCapacity(oldNumCapacity), n)); } swallow((eastl::uninitialized_value_construct_n(TupleVecLeaf::mpData + oldNumElements, n - oldNumElements), 0)...); } else { swallow((eastl::destruct(TupleVecLeaf::mpData + n, TupleVecLeaf::mpData + oldNumElements), 0)...); } } void resize(size_type n, const Ts&... args) { size_type oldNumElements = mNumElements; size_type oldNumCapacity = mNumCapacity; mNumElements = n; if (n > oldNumElements) { if (n > oldNumCapacity) { DoReallocate(oldNumElements, eastl::max(GetNewCapacity(oldNumCapacity), n)); } swallow((eastl::uninitialized_fill(TupleVecLeaf::mpData + oldNumElements, TupleVecLeaf::mpData + n, args), 0)...); } else { swallow((eastl::destruct(TupleVecLeaf::mpData + n, TupleVecLeaf::mpData + oldNumElements), 0)...); } } void reserve(size_type n) { DoConditionalReallocate(mNumElements, mNumCapacity, n); } void shrink_to_fit() { this_type temp(move_iterator(begin()), move_iterator(end()), get_allocator()); swap(temp); } void clear() EA_NOEXCEPT { size_type oldNumElements = mNumElements; mNumElements = 0; swallow((eastl::destruct(TupleVecLeaf::mpData, TupleVecLeaf::mpData + oldNumElements), 0)...); } void pop_back() { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(mNumElements <= 0)) EASTL_FAIL_MSG("tuple_vector::pop_back -- container is empty"); #endif size_type oldNumElements = mNumElements--; swallow((eastl::destruct(TupleVecLeaf::mpData + oldNumElements - 1, TupleVecLeaf::mpData + oldNumElements), 0)...); } void swap(this_type& x) { swallow((eastl::swap(TupleVecLeaf::mpData, x.TupleVecLeaf::mpData), 0)...); eastl::swap(mpData, x.mpData); eastl::swap(mNumElements, x.mNumElements); eastl::swap(mNumCapacity, x.mNumCapacity); eastl::swap(get_allocator(), x.get_allocator()); eastl::swap(internalDataSize(), x.internalDataSize()); } void assign(size_type n, const_reference_tuple tup) { assign(n, eastl::get(tup)...); } void assign(std::initializer_list iList) { assign(iList.begin(), iList.end()); } void push_back(Ts&&... args) { emplace_back(eastl::forward(args)...); } void push_back(const_reference_tuple tup) { push_back(eastl::get(tup)...); } void push_back(rvalue_tuple tup) { emplace_back(eastl::forward(eastl::get(tup))...); } void emplace_back(rvalue_tuple tup) { emplace_back(eastl::forward(eastl::get(tup))...); } void emplace(const_iterator pos, rvalue_tuple tup) { emplace(pos, eastl::forward(eastl::get(tup))...); } iterator insert(const_iterator pos, const Ts&... args) { return insert(pos, 1, args...); } iterator insert(const_iterator pos, Ts&&... args) { return emplace(pos, eastl::forward(args)...); } iterator insert(const_iterator pos, rvalue_tuple tup) { return emplace(pos, eastl::forward(eastl::get(tup))...); } iterator insert(const_iterator pos, const_reference_tuple tup) { return insert(pos, eastl::get(tup)...); } iterator insert(const_iterator pos, size_type n, const_reference_tuple tup) { return insert(pos, n, eastl::get(tup)...); } iterator insert(const_iterator pos, std::initializer_list iList) { return insert(pos, iList.begin(), iList.end()); } iterator erase(const_iterator pos) { return erase(pos, pos + 1); } reverse_iterator erase(const_reverse_iterator pos) { return reverse_iterator(erase((pos + 1).base(), (pos).base())); } reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last) { return reverse_iterator(erase((last).base(), (first).base())); } reverse_iterator erase_unsorted(const_reverse_iterator pos) { return reverse_iterator(erase_unsorted((pos + 1).base())); } void resize(size_type n, const_reference_tuple tup) { resize(n, eastl::get(tup)...); } bool empty() const EA_NOEXCEPT { return mNumElements == 0; } size_type size() const EA_NOEXCEPT { return mNumElements; } size_type capacity() const EA_NOEXCEPT { return mNumCapacity; } iterator begin() EA_NOEXCEPT { return iterator(this, 0); } const_iterator begin() const EA_NOEXCEPT { return const_iterator((const_this_type*)(this), 0); } const_iterator cbegin() const EA_NOEXCEPT { return const_iterator((const_this_type*)(this), 0); } iterator end() EA_NOEXCEPT { return iterator(this, size()); } const_iterator end() const EA_NOEXCEPT { return const_iterator((const_this_type*)(this), size()); } const_iterator cend() const EA_NOEXCEPT { return const_iterator((const_this_type*)(this), size()); } reverse_iterator rbegin() EA_NOEXCEPT { return reverse_iterator(end()); } const_reverse_iterator rbegin() const EA_NOEXCEPT { return const_reverse_iterator(end()); } const_reverse_iterator crbegin() const EA_NOEXCEPT { return const_reverse_iterator(end()); } reverse_iterator rend() EA_NOEXCEPT { return reverse_iterator(begin()); } const_reverse_iterator rend() const EA_NOEXCEPT { return const_reverse_iterator(begin()); } const_reverse_iterator crend() const EA_NOEXCEPT { return const_reverse_iterator(begin()); } ptr_tuple data() EA_NOEXCEPT { return ptr_tuple(TupleVecLeaf::mpData...); } const_ptr_tuple data() const EA_NOEXCEPT { return const_ptr_tuple(TupleVecLeaf::mpData...); } reference_tuple at(size_type n) { #if EASTL_EXCEPTIONS_ENABLED if (EASTL_UNLIKELY(n >= mNumElements)) throw std::out_of_range("tuple_vector::at -- out of range"); #elif EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(n >= mNumElements)) EASTL_FAIL_MSG("tuple_vector::at -- out of range"); #endif return reference_tuple(*(TupleVecLeaf::mpData + n)...); } const_reference_tuple at(size_type n) const { #if EASTL_EXCEPTIONS_ENABLED if (EASTL_UNLIKELY(n >= mNumElements)) throw std::out_of_range("tuple_vector::at -- out of range"); #elif EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(n >= mNumElements)) EASTL_FAIL_MSG("tuple_vector::at -- out of range"); #endif return const_reference_tuple(*(TupleVecLeaf::mpData + n)...); } reference_tuple operator[](size_type n) { return at(n); } const_reference_tuple operator[](size_type n) const { return at(n); } reference_tuple front() { #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED if (EASTL_UNLIKELY(mNumElements == 0)) // We don't allow the user to reference an empty container. EASTL_FAIL_MSG("tuple_vector::front -- empty vector"); #else // We allow the user to reference an empty container. #endif return at(0); } const_reference_tuple front() const { #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED if (EASTL_UNLIKELY(mNumElements == 0)) // We don't allow the user to reference an empty container. EASTL_FAIL_MSG("tuple_vector::front -- empty vector"); #else // We allow the user to reference an empty container. #endif return at(0); } reference_tuple back() { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(mNumElements == 0)) // We don't allow the user to reference an empty container. EASTL_FAIL_MSG("tuple_vector::back -- empty vector"); #endif return at(size() - 1); } const_reference_tuple back() const { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(mNumElements == 0)) // We don't allow the user to reference an empty container. EASTL_FAIL_MSG("tuple_vector::back -- empty vector"); #endif return at(size() - 1); } template tuplevec_element_t* get() { typedef tuplevec_element_t Element; return TupleVecLeaf::mpData; } template const tuplevec_element_t* get() const { typedef tuplevec_element_t Element; return TupleVecLeaf::mpData; } template T* get() { typedef tuplevec_index> Index; return TupleVecLeaf::mpData; } template const T* get() const { typedef tuplevec_index> Index; return TupleVecLeaf::mpData; } this_type& operator=(const this_type& other) { if (this != &other) { clear(); assign(other.begin(), other.end()); } return *this; } this_type& operator=(this_type&& other) { if (this != &other) { swap(other); } return *this; } this_type& operator=(std::initializer_list iList) { assign(iList.begin(), iList.end()); return *this; } bool validate() const EA_NOEXCEPT { if (mNumElements > mNumCapacity) return false; if (!(variadicAnd(mpData <= TupleVecLeaf::mpData...))) return false; void* pDataEnd = (void*)((uintptr_t)mpData + internalDataSize()); if (!(variadicAnd(pDataEnd >= TupleVecLeaf::mpData...))) return false; return true; } int validate_iterator(const_iterator iter) const EA_NOEXCEPT { if (!(variadicAnd(iter.mpData[Indices] == TupleVecLeaf::mpData...))) return isf_none; if (iter.mIndex < mNumElements) return (isf_valid | isf_current | isf_can_dereference); if (iter.mIndex <= mNumElements) return (isf_valid | isf_current); return isf_none; } static bool validate_iterator_pair(const_iterator first, const_iterator last) EA_NOEXCEPT { return (first.mIndex <= last.mIndex) && variadicAnd(first.mpData[Indices] == last.mpData[Indices]...); } EASTL_INTERNAL_DISABLE_DEPRECATED() // 'unwrap_iterator': was declared deprecated template ::value, bool>::type> EASTL_REMOVE_AT_2024_SEPT int validate_iterator(Iterator iter) const EA_NOEXCEPT { return validate_iterator(unwrap_iterator(iter)); } template ::value, bool>::type> EASTL_REMOVE_AT_2024_SEPT static bool validate_iterator_pair(Iterator first, Iterator last) EA_NOEXCEPT { return validate_iterator_pair(unwrap_iterator(first), unwrap_iterator(last)); } EASTL_INTERNAL_RESTORE_DEPRECATED() allocator_type& get_allocator() EA_NOEXCEPT { return mDataSizeAndAllocator.second(); } const allocator_type& get_allocator() const EA_NOEXCEPT { return mDataSizeAndAllocator.second(); } void set_allocator(const allocator_type& alloc) { if(mNumCapacity > 0 && mDataSizeAndAllocator.second() != alloc) EASTL_THROW_MSG_OR_ASSERT(std::logic_error, "tuple_vector::set_allocator -- cannot change allocator after allocations have been made."); mDataSizeAndAllocator.second() = alloc; } protected: void* mpData = nullptr; size_type mNumElements = 0; size_type mNumCapacity = 0; compressed_pair mDataSizeAndAllocator; size_type& internalDataSize() EA_NOEXCEPT { return mDataSizeAndAllocator.first(); } size_type const& internalDataSize() const EA_NOEXCEPT { return mDataSizeAndAllocator.first(); } friend struct TupleRecurser<>; template friend struct TupleRecurser; template void DoInitFromIterator(move_iterator begin, move_iterator end) { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(!validate_iterator_pair(begin.base(), end.base()))) EASTL_FAIL_MSG("tuple_vector::erase -- invalid iterator pair"); #endif size_type newNumElements = (size_type)(end - begin); const void* ppOtherData[sizeof...(Ts)] = { begin.base().mpData[Indices]... }; size_type beginIdx = begin.base().mIndex; size_type endIdx = end.base().mIndex; DoConditionalReallocate(0, mNumCapacity, newNumElements); mNumElements = newNumElements; swallow((eastl::uninitialized_move(eastl::move_iterator((Ts*)(ppOtherData[Indices]) + beginIdx), eastl::move_iterator((Ts*)(ppOtherData[Indices]) + endIdx), TupleVecLeaf::mpData), 0)...); } void DoInitFromIterator(const_iterator begin, const_iterator end) { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(!validate_iterator_pair(begin, end))) EASTL_FAIL_MSG("tuple_vector::erase -- invalid iterator pair"); #endif size_type newNumElements = (size_type)(end - begin); const void* ppOtherData[sizeof...(Ts)] = { begin.mpData[Indices]... }; size_type beginIdx = begin.mIndex; size_type endIdx = end.mIndex; DoConditionalReallocate(0, mNumCapacity, newNumElements); mNumElements = newNumElements; swallow((eastl::uninitialized_copy((Ts*)(ppOtherData[Indices]) + beginIdx, (Ts*)(ppOtherData[Indices]) + endIdx, TupleVecLeaf::mpData), 0)...); } void DoInitFillTuple(size_type n, const_reference_tuple tup) { DoInitFillArgs(n, eastl::get(tup)...); } void DoInitFillArgs(size_type n, const Ts&... args) { DoConditionalReallocate(0, mNumCapacity, n); mNumElements = n; swallow((eastl::uninitialized_fill(TupleVecLeaf::mpData, TupleVecLeaf::mpData + n, args), 0)...); } void DoInitDefaultFill(size_type n) { DoConditionalReallocate(0, mNumCapacity, n); mNumElements = n; swallow((eastl::uninitialized_value_construct_n(TupleVecLeaf::mpData, n), 0)...); } void DoInitFromTupleArray(const value_tuple* first, const value_tuple* last) { #if EASTL_ASSERT_ENABLED if (EASTL_UNLIKELY(first > last || first == nullptr || last == nullptr)) EASTL_FAIL_MSG("tuple_vector::ctor from tuple array -- invalid ptrs"); #endif size_type newNumElements = static_cast(last - first); DoConditionalReallocate(0, mNumCapacity, newNumElements); mNumElements = newNumElements; DoUninitializedCopyFromTupleArray(begin(), end(), first); } void DoCopyFromTupleArray(iterator destPos, iterator destEnd, const value_tuple* srcTuple) { // assign to constructed region while (destPos < destEnd) { *destPos = *srcTuple; ++destPos; ++srcTuple; } } void DoUninitializedCopyFromTupleArray(iterator destPos, iterator destEnd, const value_tuple* srcTuple) { // placement-new/copy-ctor to unconstructed regions while (destPos < destEnd) { swallow((detail::allocator_construct(get_allocator(), eastl::get(destPos.MakePointer()), eastl::get(*srcTuple)), 0)...); ++destPos; ++srcTuple; } } // Try to grow the size of the container "naturally" given the number of elements being used void DoGrow(size_type oldNumElements, size_type oldNumCapacity, size_type requiredCapacity) { if (requiredCapacity > oldNumCapacity) DoReallocate(oldNumElements, GetNewCapacity(requiredCapacity)); } // Reallocate to the newCapacity (IFF it's actually larger, though) void DoConditionalReallocate(size_type oldNumElements, size_type oldNumCapacity, size_type requiredCapacity) { if (requiredCapacity > oldNumCapacity) DoReallocate(oldNumElements, requiredCapacity); } void DoReallocate(size_type oldNumElements, size_type requiredCapacity) { void* ppNewLeaf[sizeof...(Ts)]; pair allocation = TupleRecurser::template DoAllocate( *this, ppNewLeaf, requiredCapacity, 0); swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct(0, oldNumElements, (Ts*)ppNewLeaf[Indices]), 0)...); swallow(TupleVecLeaf::mpData = (Ts*)ppNewLeaf[Indices]...); EASTLFree(get_allocator(), mpData, internalDataSize()); mpData = allocation.first; mNumCapacity = requiredCapacity; internalDataSize() = allocation.second; } size_type GetNewCapacity(size_type oldNumCapacity) { return (oldNumCapacity > 0) ? (2 * oldNumCapacity) : 1; } }; } // namespace TupleVecInternal // Move_iterator specialization for TupleVecIter. // An rvalue reference of a move_iterator would normaly be "tuple &&" whereas // what we actually want is "tuple". This specialization gives us that. template class move_iterator, Ts...>> { public: typedef TupleVecInternal::TupleVecIter, Ts...> iterator_type; typedef iterator_traits traits_type; typedef typename traits_type::iterator_category iterator_category; typedef typename traits_type::value_type value_type; typedef typename traits_type::difference_type difference_type; typedef typename traits_type::pointer pointer; typedef tuple reference; typedef move_iterator this_type; protected: iterator_type mIterator; public: move_iterator() : mIterator() {} explicit move_iterator(iterator_type mi) : mIterator(mi) {} template move_iterator(const move_iterator& mi) : mIterator(mi.base()) {} iterator_type base() const { return mIterator; } reference operator*() const { return eastl::move(MakeReference()); } pointer operator->() const { return mIterator; } this_type& operator++() { ++mIterator; return *this; } this_type operator++(int) { this_type tempMoveIterator = *this; ++mIterator; return tempMoveIterator; } this_type& operator--() { --mIterator; return *this; } this_type operator--(int) { this_type tempMoveIterator = *this; --mIterator; return tempMoveIterator; } this_type operator+(difference_type n) const { return move_iterator(mIterator + n); } this_type& operator+=(difference_type n) { mIterator += n; return *this; } this_type operator-(difference_type n) const { return move_iterator(mIterator - n); } this_type& operator-=(difference_type n) { mIterator -= n; return *this; } difference_type operator-(const this_type& rhs) const { return mIterator - rhs.mIterator; } bool operator<(const this_type& rhs) const { return mIterator < rhs.mIterator; } bool operator>(const this_type& rhs) const { return mIterator > rhs.mIterator; } bool operator>=(const this_type& rhs) const { return mIterator >= rhs.mIterator; } bool operator<=(const this_type& rhs) const { return mIterator <= rhs.mIterator; } reference operator[](difference_type n) const { return *(*this + n); } private: reference MakeReference() const { return reference(eastl::move(((Ts*)mIterator.mpData[Indices])[mIterator.mIndex])...); } EASTL_INTERNAL_DISABLE_DEPRECATED() // 'is_iterator_wrapper': was declared deprecated // Unwrapping interface, not part of the public API. EASTL_REMOVE_AT_2024_SEPT iterator_type unwrap() const { return mIterator; } // The unwrapper helpers need access to unwrap(). friend is_iterator_wrapper_helper; friend is_iterator_wrapper; EASTL_INTERNAL_RESTORE_DEPRECATED() }; template inline bool operator==(const TupleVecInternal::TupleVecImpl& a, const TupleVecInternal::TupleVecImpl& b) { return ((a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin())); } template inline bool operator!=(const TupleVecInternal::TupleVecImpl& a, const TupleVecInternal::TupleVecImpl& b) { return ((a.size() != b.size()) || !eastl::equal(a.begin(), a.end(), b.begin())); } template inline bool operator<(const TupleVecInternal::TupleVecImpl& a, const TupleVecInternal::TupleVecImpl& b) { return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); } template inline bool operator>(const TupleVecInternal::TupleVecImpl& a, const TupleVecInternal::TupleVecImpl& b) { return b < a; } template inline bool operator<=(const TupleVecInternal::TupleVecImpl& a, const TupleVecInternal::TupleVecImpl& b) { return !(b < a); } template inline bool operator>=(const TupleVecInternal::TupleVecImpl& a, const TupleVecInternal::TupleVecImpl& b) { return !(a < b); } template inline void swap(TupleVecInternal::TupleVecImpl& a, TupleVecInternal::TupleVecImpl& b) { a.swap(b); } // A customization of swap is made for r-values of tuples-of-references - // normally, swapping rvalues doesn't make sense, but in this case, we do want to // swap the contents of what the tuple-of-references are referring to // // This is required due to TupleVecIter returning a value-type for its dereferencing, // as opposed to an actual real reference of some sort template inline typename enable_if...>::value>::type swap(tuple&& a, tuple&& b) { a.swap(b); } template inline typename enable_if...>::value>::type swap(tuple&& a, tuple&& b) = delete; // External interface of tuple_vector template class tuple_vector : public TupleVecInternal::TupleVecImpl, Ts...> { typedef tuple_vector this_type; typedef TupleVecInternal::TupleVecImpl, Ts...> base_type; using base_type::base_type; public: this_type& operator=(std::initializer_list iList) { base_type::operator=(iList); return *this; } }; // Variant of tuple_vector that allows a user-defined allocator type (can't mix default template params with variadics) template class tuple_vector_alloc : public TupleVecInternal::TupleVecImpl, Ts...> { typedef tuple_vector_alloc this_type; typedef TupleVecInternal::TupleVecImpl, Ts...> base_type; using base_type::base_type; public: this_type& operator=(std::initializer_list iList) { base_type::operator=(iList); return *this; } }; } // namespace eastl EA_RESTORE_VC_WARNING() EA_RESTORE_VC_WARNING() EA_RESTORE_VC_WARNING() EA_RESTORE_VC_WARNING() #endif // EASTL_TUPLEVECTOR_H ================================================ FILE: include/EASTL/chrono.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // This file implements the eastl::chrono specification which is part of the // standard STL date and time library. eastl::chrono implements all the // mechanisms required to capture and manipulate times retrieved from the // provided clocks. It implements the all of the features to allow type safe // durations to be used in code. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_CHRONO_H #define EASTL_CHRONO_H #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once #endif #include #include #include #include // TODO: move to platform specific cpp or header file #if defined EA_PLATFORM_MICROSOFT EA_DISABLE_ALL_VC_WARNINGS() #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #undef NOMINMAX #define NOMINMAX #include #ifdef min #undef min #endif #ifdef max #undef max #endif EA_RESTORE_ALL_VC_WARNINGS() #endif #if defined(EA_PLATFORM_MICROSOFT) && !defined(EA_PLATFORM_MINGW) // Nothing to do #elif defined(EA_PLATFORM_SONY) #include #include #elif defined(EA_PLATFORM_APPLE) #include #elif defined(EA_PLATFORM_POSIX) || defined(EA_PLATFORM_MINGW) || defined(EA_PLATFORM_ANDROID) // Posix means Linux, Unix, and Macintosh OSX, among others (including Linux-based mobile platforms). #if defined(EA_PLATFORM_MINGW) #include #endif #include #if (defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC)) #include #else #include #include #endif #endif namespace eastl { namespace chrono { /////////////////////////////////////////////////////////////////////////////// // treat_as_floating_point /////////////////////////////////////////////////////////////////////////////// template struct treat_as_floating_point : is_floating_point {}; /////////////////////////////////////////////////////////////////////////////// // 20.12.4, duration_values /////////////////////////////////////////////////////////////////////////////// template struct duration_values { public: EASTL_FORCE_INLINE static EA_CONSTEXPR Rep zero() { return Rep(0); } EASTL_FORCE_INLINE static EA_CONSTEXPR Rep max() { return eastl::numeric_limits::max(); } EASTL_FORCE_INLINE static EA_CONSTEXPR Rep min() { return eastl::numeric_limits::lowest(); } }; /////////////////////////////////////////////////////////////////////////////// // duration fwd_decl /////////////////////////////////////////////////////////////////////////////// template > class duration; namespace Internal { /////////////////////////////////////////////////////////////////////////////// // IsRatio /////////////////////////////////////////////////////////////////////////////// template struct IsRatio : eastl::false_type {}; template struct IsRatio> : eastl::true_type {}; template struct IsRatio> : eastl::true_type {}; template struct IsRatio> : eastl::true_type {}; template struct IsRatio> : eastl::true_type {}; /////////////////////////////////////////////////////////////////////////////// // IsDuration /////////////////////////////////////////////////////////////////////////////// template struct IsDuration : eastl::false_type{}; template struct IsDuration> : eastl::true_type{}; template struct IsDuration> : eastl::true_type{}; template struct IsDuration> : eastl::true_type{}; template struct IsDuration> : eastl::true_type{}; /////////////////////////////////////////////////////////////////////////////// // RatioGCD /////////////////////////////////////////////////////////////////////////////// template struct RatioGCD { static_assert(IsRatio::value, "Period1 is not a eastl::ratio type"); static_assert(IsRatio::value, "Period2 is not a eastl::ratio type"); typedef ratio::value, eastl::Internal::lcm::value> type; }; }; /////////////////////////////////////////////////////////////////////////////// // 20.12.5.7, duration_cast /////////////////////////////////////////////////////////////////////////////// namespace Internal { template ::type, typename CommonRep = typename eastl::decay::type>::type, bool = CommonPeriod::num == 1, bool = CommonPeriod::den == 1> struct DurationCastImpl; template struct DurationCastImpl { inline static ToDuration DoCast(const FromDuration& fd) { return ToDuration(static_cast(fd.count())); } }; template struct DurationCastImpl { inline static ToDuration DoCast(const FromDuration& d) { return ToDuration(static_cast(static_cast(d.count()) * static_cast(CommonPeriod::num))); } }; template struct DurationCastImpl { inline static ToDuration DoCast(const FromDuration& d) { return ToDuration(static_cast(static_cast(d.count()) / static_cast(CommonPeriod::den))); } }; template struct DurationCastImpl { inline static ToDuration DoCast(const FromDuration& d) { return ToDuration(static_cast(static_cast(d.count()) * static_cast(CommonPeriod::num) / static_cast(CommonPeriod::den))); } }; }; // namespace Internal /////////////////////////////////////////////////////////////////////////////// // duration_cast /////////////////////////////////////////////////////////////////////////////// template inline typename eastl::enable_if::value, ToDuration>::type duration_cast(const duration& d) { typedef typename duration::this_type FromDuration; return Internal::DurationCastImpl::DoCast(d); } /////////////////////////////////////////////////////////////////////////////// // duration /////////////////////////////////////////////////////////////////////////////// template class duration { Rep mRep; public: typedef Rep rep; typedef Period period; typedef duration this_type; #if defined(EA_COMPILER_NO_DEFAULTED_FUNCTIONS) EA_CONSTEXPR duration() : mRep() {} duration(const duration& other) : mRep(Rep(other.mRep)) {} duration& operator=(const duration& other) { mRep = other.mRep; return *this; } #else EA_CONSTEXPR duration() = default; duration(const duration&) = default; duration& operator=(const duration&) = default; #endif /////////////////////////////////////////////////////////////////////////////// // conversion constructors /////////////////////////////////////////////////////////////////////////////// template inline EA_CONSTEXPR explicit duration( const Rep2& rep2, typename eastl::enable_if::value && (treat_as_floating_point::value || !treat_as_floating_point::value)>::type** = 0) : mRep(static_cast(rep2)) {} template EA_CONSTEXPR duration(const duration& d2, typename eastl::enable_if::value || (eastl::ratio_divide::type::den == 1 && !treat_as_floating_point::value), void>::type** = 0) : mRep(duration_cast(d2).count()) {} /////////////////////////////////////////////////////////////////////////////// // returns the count of ticks /////////////////////////////////////////////////////////////////////////////// EA_CONSTEXPR Rep count() const { return mRep; } /////////////////////////////////////////////////////////////////////////////// // static accessors of special duration values /////////////////////////////////////////////////////////////////////////////// EA_CONSTEXPR inline static duration zero() { return duration(duration_values::zero()); } EA_CONSTEXPR inline static duration min() { return duration(duration_values::min()); } EA_CONSTEXPR inline static duration max() { return duration(duration_values::max()); } /////////////////////////////////////////////////////////////////////////////// // const arithmetic operations /////////////////////////////////////////////////////////////////////////////// EA_CONSTEXPR inline duration operator+() const { return *this; } EA_CONSTEXPR inline duration operator-() const { return duration(0-mRep); } /////////////////////////////////////////////////////////////////////////////// // arithmetic operations /////////////////////////////////////////////////////////////////////////////// inline duration operator++(int) { return duration(mRep++); } inline duration operator--(int) { return duration(mRep--); } inline duration& operator++() { ++mRep; return *this; } inline duration& operator--() { --mRep; return *this; } inline duration& operator+=(const duration& d) { mRep += d.count(); return *this; } inline duration& operator-=(const duration& d) { mRep -= d.count(); return *this; } inline duration& operator*=(const Rep& rhs) { mRep *= rhs; return *this; } inline duration& operator/=(const Rep& rhs) { mRep /= rhs; return *this; } inline duration& operator%=(const Rep& rhs) { mRep %= rhs; return *this; } inline duration& operator%=(const duration& d) { mRep %= d.count(); return *this; } }; /////////////////////////////////////////////////////////////////////////////// // 20.12.5.5, arithmetic operations with durations as arguments /////////////////////////////////////////////////////////////////////////////// template typename eastl::common_type, duration>::type EASTL_FORCE_INLINE operator+(const duration& lhs, const duration& rhs) { typedef typename eastl::common_type, duration>::type common_duration_t; return common_duration_t(common_duration_t(lhs).count() + common_duration_t(rhs).count()); } template typename eastl::common_type, duration>::type EASTL_FORCE_INLINE operator-(const duration& lhs, const duration& rhs) { typedef typename eastl::common_type, duration>::type common_duration_t; return common_duration_t(common_duration_t(lhs).count() - common_duration_t(rhs).count()); } template duration::type, Period1> EASTL_FORCE_INLINE operator*(const duration& lhs, const Rep2& rhs) { typedef duration::type, Period1> common_duration_t; return common_duration_t(common_duration_t(lhs).count() * rhs); } template duration::type, Period2> EASTL_FORCE_INLINE operator*(const Rep1& lhs, const duration& rhs) { typedef duration::type, Period2> common_duration_t; return common_duration_t(lhs * common_duration_t(rhs).count()); } template duration::type, Period1> EASTL_FORCE_INLINE operator/(const duration& lhs, const Rep2& rhs) { typedef duration::type, Period1> common_duration_t; return common_duration_t(common_duration_t(lhs).count() / rhs); } template typename eastl::common_type, duration>::type EASTL_FORCE_INLINE operator/(const duration& lhs, const duration& rhs) { typedef typename eastl::common_type, duration>::type common_duration_t; return common_duration_t(common_duration_t(lhs).count() / common_duration_t(rhs).count()); } template duration::type, Period1> EASTL_FORCE_INLINE operator%(const duration& lhs, const Rep2& rhs) { typedef duration::type, Period1> common_duration_t; return common_duration_t(common_duration_t(lhs).count() % rhs); } template typename eastl::common_type, duration>::type EASTL_FORCE_INLINE operator%(const duration& lhs, const duration& rhs) { typedef typename eastl::common_type, duration>::type common_duration_t; return common_duration_t(common_duration_t(lhs).count() % common_duration_t(rhs).count()); } /////////////////////////////////////////////////////////////////////////////// // 20.12.5.6, compares two durations /////////////////////////////////////////////////////////////////////////////// template EASTL_FORCE_INLINE bool operator==(const duration& lhs, const duration& rhs) { typedef typename eastl::common_type, duration>::type common_duration_t; return common_duration_t(lhs).count() == common_duration_t(rhs).count(); } template EASTL_FORCE_INLINE bool operator<(const duration& lhs, const duration& rhs) { typedef typename eastl::common_type, duration>::type common_duration_t; return common_duration_t(lhs).count() < common_duration_t(rhs).count(); } template EASTL_FORCE_INLINE bool operator!=(const duration& lhs, const duration& rhs) { return !(lhs == rhs); } template EASTL_FORCE_INLINE bool operator<=(const duration& lhs, const duration& rhs) { return !(rhs < lhs); } template EASTL_FORCE_INLINE bool operator>(const duration& lhs, const duration& rhs) { return rhs < lhs; } template EASTL_FORCE_INLINE bool operator>=(const duration& lhs, const duration& rhs) { return !(lhs < rhs); } /////////////////////////////////////////////////////////////////////////////// // standard duration units /////////////////////////////////////////////////////////////////////////////// typedef duration nanoseconds; typedef duration microseconds; typedef duration milliseconds; typedef duration seconds; typedef duration> minutes; typedef duration> hours; /////////////////////////////////////////////////////////////////////////////// // 20.12.6, time_point /////////////////////////////////////////////////////////////////////////////// template class time_point { Duration mDuration; public: typedef Clock clock; typedef Duration duration; typedef typename Duration::rep rep; typedef typename Duration::period period; inline EA_CONSTEXPR time_point() : mDuration(Duration::zero()) {} EA_CONSTEXPR explicit time_point(const Duration& other) : mDuration(other) {} template inline EA_CONSTEXPR time_point( const time_point& t, typename eastl::enable_if::value>::type** = 0) : mDuration(t.time_since_epoch()) {} EA_CONSTEXPR Duration time_since_epoch() const { return mDuration; } time_point& operator+=(const Duration& d) { mDuration += d; return *this; } time_point& operator-=(const Duration& d) { mDuration -= d; return *this; } static EA_CONSTEXPR time_point min() { return time_point(Duration::min()); } static EA_CONSTEXPR time_point max() { return time_point(Duration::max()); } }; /////////////////////////////////////////////////////////////////////////////// // 20.12.6.5, time_point arithmetic /////////////////////////////////////////////////////////////////////////////// template inline EA_CONSTEXPR time_point>::type> operator+(const time_point& lhs, const duration& rhs) { typedef time_point>::type> common_timepoint_t; return common_timepoint_t(lhs.time_since_epoch() + rhs); } template inline EA_CONSTEXPR time_point>::type> operator+(const duration& lhs, const time_point& rhs) { typedef time_point>::type> common_timepoint_t; return common_timepoint_t(lhs + rhs.time_since_epoch()); } template inline EA_CONSTEXPR time_point>::type> operator-(const time_point& lhs, const duration& rhs) { typedef time_point>::type> common_timepoint_t; return common_timepoint_t(lhs.time_since_epoch() - rhs); } template inline EA_CONSTEXPR typename eastl::common_type::type operator-( const time_point& lhs, const time_point& rhs) { return lhs.time_since_epoch() - rhs.time_since_epoch(); } template inline EA_CONSTEXPR bool operator==(const time_point& lhs, const time_point& rhs) { return lhs.time_since_epoch() == rhs.time_since_epoch(); } template inline EA_CONSTEXPR bool operator!=(const time_point& lhs, const time_point& rhs) { return !(lhs == rhs); } template inline EA_CONSTEXPR bool operator<(const time_point& lhs, const time_point& rhs) { return lhs.time_since_epoch() < rhs.time_since_epoch(); } template inline EA_CONSTEXPR bool operator<=(const time_point& lhs, const time_point& rhs) { return !(rhs < lhs); } template inline EA_CONSTEXPR bool operator>(const time_point& lhs, const time_point& rhs) { return rhs < lhs; } template inline EA_CONSTEXPR bool operator>=(const time_point& lhs, const time_point& rhs) { return !(lhs < rhs); } /////////////////////////////////////////////////////////////////////////////// // 20.12.6.7, time_point_cast /////////////////////////////////////////////////////////////////////////////// template EA_CONSTEXPR time_point time_point_cast( const time_point& t, typename eastl::enable_if::value>::type** = 0) { return time_point(duration_cast(t.time_since_epoch())); } /////////////////////////////////////////////////////////////////////////////// // 20.12.7, clocks /////////////////////////////////////////////////////////////////////////////// namespace Internal { #if defined(EA_PLATFORM_MICROSOFT) && !defined(EA_PLATFORM_MINGW) #define EASTL_NS_PER_TICK 1 #elif defined EA_PLATFORM_SONY #define EASTL_NS_PER_TICK 1 #elif defined EA_PLATFORM_POSIX #define EASTL_NS_PER_TICK _XTIME_NSECS_PER_TICK #else #define EASTL_NS_PER_TICK 100 #endif #if defined(EA_PLATFORM_POSIX) typedef chrono::nanoseconds::period SystemClock_Period; typedef chrono::nanoseconds::period SteadyClock_Period; #else typedef eastl::ratio_multiply, nano>::type SystemClock_Period; typedef eastl::ratio_multiply, nano>::type SteadyClock_Period; #endif /////////////////////////////////////////////////////////////////////////////// // Internal::GetTicks /////////////////////////////////////////////////////////////////////////////// inline uint64_t GetTicks() { #if defined EA_PLATFORM_MICROSOFT auto queryFrequency = [] { LARGE_INTEGER frequency; QueryPerformanceFrequency(&frequency); return double(1000000000.0L / (long double)frequency.QuadPart); // nanoseconds per tick }; auto queryCounter = [] { LARGE_INTEGER counter; QueryPerformanceCounter(&counter); return counter.QuadPart; }; EA_DISABLE_VC_WARNING(4640) // warning C4640: construction of local static object is not thread-safe (VS2013) static auto frequency = queryFrequency(); // cache cpu frequency on first call EA_RESTORE_VC_WARNING() return uint64_t(frequency * (double)queryCounter()); #elif defined EA_PLATFORM_SONY auto queryFrequency = [] { // nanoseconds/seconds / ticks/seconds return double(1000000000.0L / (long double)sceKernelGetProcessTimeCounterFrequency()); // nanoseconds per tick }; auto queryCounter = [] { return sceKernelGetProcessTimeCounter(); }; EA_DISABLE_VC_WARNING(4640) // warning C4640: construction of local static object is not thread-safe (VS2013) static auto frequency = queryFrequency(); // cache cpu frequency on first call EA_RESTORE_VC_WARNING() return uint64_t(frequency * (double)queryCounter()); #elif defined(EA_PLATFORM_APPLE) auto queryTimeInfo = [] { mach_timebase_info_data_t info; mach_timebase_info(&info); return info; }; static auto timeInfo = queryTimeInfo(); uint64_t t = mach_absolute_time(); t *= timeInfo.numer; t /= timeInfo.denom; return t; #elif defined(EA_PLATFORM_POSIX) // Posix means Linux, Unix, and Macintosh OSX, among others (including Linux-based mobile platforms). #if (defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC)) timespec ts; int result = clock_gettime(CLOCK_MONOTONIC, &ts); if (result == -1 && errno == EINVAL) result = clock_gettime(CLOCK_REALTIME, &ts); const uint64_t nNanoseconds = (uint64_t)ts.tv_nsec + ((uint64_t)ts.tv_sec * UINT64_C(1000000000)); return nNanoseconds; #else struct timeval tv; gettimeofday(&tv, NULL); const uint64_t nMicroseconds = (uint64_t)tv.tv_usec + ((uint64_t)tv.tv_sec * 1000000); return nMicroseconds; #endif #else #error "chrono not implemented for platform" #endif } } // namespace Internal /////////////////////////////////////////////////////////////////////////////// // system_clock /////////////////////////////////////////////////////////////////////////////// class system_clock { public: typedef long long rep; // signed arithmetic type representing the number of ticks in the clock's duration typedef Internal::SystemClock_Period period; typedef chrono::duration duration; // duration, capable of representing negative durations typedef chrono::time_point time_point; // true if the time between ticks is always increases monotonically EA_CONSTEXPR_OR_CONST static bool is_steady = false; // returns a time point representing the current point in time. static time_point now() EA_NOEXCEPT { return time_point(duration(Internal::GetTicks())); } }; /////////////////////////////////////////////////////////////////////////////// // steady_clock /////////////////////////////////////////////////////////////////////////////// class steady_clock { public: typedef long long rep; // signed arithmetic type representing the number of ticks in the clock's duration typedef Internal::SteadyClock_Period period; typedef chrono::duration duration; // duration, capable of representing negative durations typedef chrono::time_point time_point; // true if the time between ticks is always increases monotonically EA_CONSTEXPR_OR_CONST static bool is_steady = true; // returns a time point representing the current point in time. static time_point now() EA_NOEXCEPT { return time_point(duration(Internal::GetTicks())); } }; /////////////////////////////////////////////////////////////////////////////// // high_resolution_clock /////////////////////////////////////////////////////////////////////////////// typedef system_clock high_resolution_clock; } // namespace chrono /////////////////////////////////////////////////////////////////////////////// // duration common_type specialization /////////////////////////////////////////////////////////////////////////////// template struct common_type, chrono::duration> { typedef chrono::duration::type>::type, typename chrono::Internal::RatioGCD::type> type; }; /////////////////////////////////////////////////////////////////////////////// // time_point common_type specialization /////////////////////////////////////////////////////////////////////////////// template struct common_type, chrono::time_point> { typedef chrono::time_point::type> type; }; /////////////////////////////////////////////////////////////////////////////// // chrono_literals /////////////////////////////////////////////////////////////////////////////// #if EASTL_USER_LITERALS_ENABLED && EASTL_INLINE_NAMESPACES_ENABLED // Disabling the Clang/GCC/MSVC warning about using user // defined literals without a leading '_' as they are reserved // for standard libary usage. EA_DISABLE_VC_WARNING(4455) EA_DISABLE_CLANG_WARNING(-Wuser-defined-literals) EA_DISABLE_GCC_WARNING(-Wliteral-suffix) inline namespace literals { inline namespace chrono_literals { /////////////////////////////////////////////////////////////////////////////// // integer chrono literals /////////////////////////////////////////////////////////////////////////////// EA_CONSTEXPR chrono::hours operator"" h(unsigned long long h) { return chrono::hours(h); } EA_CONSTEXPR chrono::minutes operator"" min(unsigned long long m) { return chrono::minutes(m); } EA_CONSTEXPR chrono::seconds operator"" s(unsigned long long s) { return chrono::seconds(s); } EA_CONSTEXPR chrono::milliseconds operator"" ms(unsigned long long ms) { return chrono::milliseconds(ms); } EA_CONSTEXPR chrono::microseconds operator"" us(unsigned long long us) { return chrono::microseconds(us); } EA_CONSTEXPR chrono::nanoseconds operator"" ns(unsigned long long ns) { return chrono::nanoseconds(ns); } /////////////////////////////////////////////////////////////////////////////// // float chrono literals /////////////////////////////////////////////////////////////////////////////// EA_CONSTEXPR chrono::duration> operator"" h(long double h) { return chrono::duration>(h); } EA_CONSTEXPR chrono::duration> operator"" min(long double m) { return chrono::duration>(m); } EA_CONSTEXPR chrono::duration operator"" s(long double s) { return chrono::duration(s); } EA_CONSTEXPR chrono::duration operator"" ms(long double ms) { return chrono::duration(ms); } EA_CONSTEXPR chrono::duration operator"" us(long double us) { return chrono::duration(us); } EA_CONSTEXPR chrono::duration operator"" ns(long double ns) { return chrono::duration(ns); } } // namespace chrono_literals }// namespace literals EA_RESTORE_GCC_WARNING() // -Wliteral-suffix EA_RESTORE_CLANG_WARNING() // -Wuser-defined-literals EA_RESTORE_VC_WARNING() // warning: 4455 #endif } // namespace eastl #if EASTL_USER_LITERALS_ENABLED && EASTL_INLINE_NAMESPACES_ENABLED namespace chrono { using namespace eastl::literals::chrono_literals; } // namespace chrono #endif #endif ================================================ FILE: include/EASTL/compare.h ================================================ /////////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_COMPARE_H #define EASTL_COMPARE_H #include namespace eastl { #if defined(EA_COMPILER_HAS_THREE_WAY_COMPARISON) struct synth_three_way { template constexpr auto operator()(const T& t, const U& u) const requires requires { {t < u}->std::convertible_to; {u < t}->std::convertible_to; } { if constexpr (std::three_way_comparable_with) { return t <=> u; } else { return (t < u) ? std::weak_ordering::less : (u < t) ? std::weak_ordering::greater : std::weak_ordering::equivalent; } } }; template using synth_three_way_result = decltype(synth_three_way{}(declval(), declval())); #endif } // namespace eastl #endif // Header include guard ================================================ FILE: include/EASTL/core_allocator.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// #ifndef EASTL_CORE_ALLOCATOR_H #define EASTL_CORE_ALLOCATOR_H #if EASTL_CORE_ALLOCATOR_ENABLED #include namespace EA { namespace Allocator { /// EASTLCoreAllocatorImpl /// /// EASTL provides an out of the box implementation of the /// ICoreAllocator interface. This is provided as a convenience for /// users who wish to provide ICoreAllocator implementations for EASTL to use. /// /// EASTL has a dependency on coreallocator so to provide an out of /// the box implementation for EASTLCoreAlloctor and EASTLCoreDeleter /// that can be used and tested. Historically we could not test /// ICoreAllocator interface because we relied on the code being linked /// in user code. /// class EASTLCoreAllocatorImpl : public ICoreAllocator { public: virtual void* Alloc(size_t size, const char* name, unsigned int flags) { return ::operator new[](size, name, flags, 0, __FILE__, __LINE__); } virtual void* Alloc(size_t size, const char* name, unsigned int flags, unsigned int alignment, unsigned int alignOffset = 0) { return ::operator new[](size, alignment, alignOffset, name, flags, 0, __FILE__, __LINE__); } virtual void Free(void* ptr, size_t size = 0) { ::operator delete(static_cast(ptr)); } virtual void* AllocDebug(size_t size, const DebugParams debugParams, unsigned int flags) { return Alloc(size, debugParams.mName, flags); } virtual void* AllocDebug(size_t size, const DebugParams debugParams, unsigned int flags, unsigned int align, unsigned int alignOffset = 0) { return Alloc(size, debugParams.mName, flags, align, alignOffset); } static EASTLCoreAllocatorImpl* GetDefaultAllocator(); }; inline EASTLCoreAllocatorImpl* EASTLCoreAllocatorImpl::GetDefaultAllocator() { static EASTLCoreAllocatorImpl allocator; return &allocator; } } } #endif // EASTL_CORE_ALLOCATOR_ENABLED #endif // EASTL_CORE_ALLOCATOR_H ================================================ FILE: include/EASTL/core_allocator_adapter.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Implements an EASTL allocator that uses an ICoreAllocator. // However, this header file is not dependent on ICoreAllocator or its package. /////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_CORE_ALLOCATOR_ADAPTER_H #define EASTL_CORE_ALLOCATOR_ADAPTER_H #if EASTL_CORE_ALLOCATOR_ENABLED #include #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif /// EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR /// /// This allows the application to override the default name for the default global core allocator. /// However, you must be careful in your usage of this, as if this file is shared between uses then /// you will need to be careful that your override of this doesn't conflict with others. /// #ifndef EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR #define EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR AllocatorType::GetDefaultAllocator #endif namespace EA { namespace Allocator { /// CoreAllocatorAdapter /// /// Implements the EASTL allocator interface. /// Allocates memory from an instance of ICoreAllocator or another class with an equivalent interface. /// ICoreAllocator is a pure-virtual memory allocation interface used by a number of EA games and /// shared libraries. It's completely unrelated to EASTL, but it's prevalent enough that it's useful /// for EASTL to have a built-in adapter for this interface. ICoreAllocator is declared in the /// CoreAllocator package icoreallocator_interface.h header, but CoreAllocatorAdapter can work with /// any equivalent interface, as defined below. /// /// Expected interface: /// enum AllocFlags { /// kFlagTempMemory = 0, /// kFlagPermMemory = 1 /// }; /// /// struct CoreAllocator { /// void* Alloc(size_t size, const char* name, unsigned int allocFlags); /// void* Alloc(size_t size, const char* name, unsigned int allocFlags, // Not required unless you are working with types that require custom alignment. /// unsigned int align, unsigned int alignOffset = 0); /// void Free(void* block, size_t size = 0); /// static CoreAllocator* GetDefaultAllocator(); /// }; /// /// Example usage: /// #include /// typedef EA::Allocator::CoreAllocatorAdapter Adapter; /// eastl::list widgetList(Adapter("UI/WidgetList", pSomeCoreAllocator)); /// widgetList.push_back(Widget()); /// /// Example usage: /// #include /// eastl::list > widgetList; /// widgetList.push_back(Widget()); /// /// Example usage: /// #include /// typedef EA::Allocator::CoreAllocatorAdapter Adapter; /// typedef eastl::list WidgetList; /// CoreAllocatorFixed widgetCoreAllocator(pFixedAllocatorForWidgetListValueType); // CoreAllocatorFixed is a hypothetical implementation of the ICoreAllocator interface. /// WidgetList widgetList(Adapter("UI/WidgetList", &widgetCoreAllocator)); // Note that the widgetCoreAllocator is declared before and thus destroyed after the widget list. /// template class CoreAllocatorAdapter { public: typedef CoreAllocatorAdapter this_type; public: // To do: Make this constructor explicit, when there is no known code dependent on it being otherwise. CoreAllocatorAdapter(const char* pName = EASTL_NAME_VAL(EASTL_ALLOCATOR_DEFAULT_NAME), AllocatorType* pAllocator = EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR()); CoreAllocatorAdapter(const char* pName, AllocatorType* pAllocator, int flags); CoreAllocatorAdapter(const CoreAllocatorAdapter& x); CoreAllocatorAdapter(const CoreAllocatorAdapter& x, const char* pName); CoreAllocatorAdapter& operator=(const CoreAllocatorAdapter& x); void* allocate(size_t n, int flags = 0); void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0); void deallocate(void* p, size_t n); AllocatorType* get_allocator() const; void set_allocator(AllocatorType* pAllocator); int get_flags() const; void set_flags(int flags); const char* get_name() const; void set_name(const char* pName); public: // Public because otherwise VC++ generates (possibly invalid) warnings about inline friend template specializations. AllocatorType* mpCoreAllocator; int mnFlags; // Allocation flags. See ICoreAllocator/AllocFlags. #if EASTL_NAME_ENABLED const char* mpName; // Debug name, used to track memory. #endif }; template bool operator==(const CoreAllocatorAdapter& a, const CoreAllocatorAdapter& b); template bool operator!=(const CoreAllocatorAdapter& a, const CoreAllocatorAdapter& b); /// EASTLICoreAllocator /// /// Provides a standardized typedef for ICoreAllocator; /// /// Example usage: /// eastl::list widgetList("UI/WidgetList", pSomeCoreAllocator); /// widgetList.push_back(Widget()); /// class ICoreAllocator; class EASTLCoreAllocatorImpl; typedef CoreAllocatorAdapter EASTLICoreAllocatorAdapter; typedef CoreAllocatorAdapter EASTLCoreAllocatorAdapter; typedef EASTLICoreAllocatorAdapter EASTLICoreAllocator; // for backwards compatibility /// EASTLICoreDeleter /// /// Implements a functor which can free memory from the specified /// ICoreAllocator interface. This is a convenience object provided for /// users who wish to have EASTL containers deallocate memory obtained from /// ICoreAllocator interfaces. /// template class CoreDeleterAdapter { public: typedef CoreDeleterAdapter this_type; AllocatorType* mpCoreAllocator; public: CoreDeleterAdapter(AllocatorType* pAllocator = EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR()) EA_NOEXCEPT : mpCoreAllocator(pAllocator) {} ~CoreDeleterAdapter() EA_NOEXCEPT {} template void operator()(T* p) { p->~T(); mpCoreAllocator->Free(p); } CoreDeleterAdapter(const CoreDeleterAdapter& in) { mpCoreAllocator = in.mpCoreAllocator; } CoreDeleterAdapter(CoreDeleterAdapter&& in) { mpCoreAllocator = in.mpCoreAllocator; in.mpCoreAllocator = nullptr; } CoreDeleterAdapter& operator=(const CoreDeleterAdapter& in) { mpCoreAllocator = in.mpCoreAllocator; return *this; } CoreDeleterAdapter& operator=(CoreDeleterAdapter&& in) { mpCoreAllocator = in.mpCoreAllocator; in.mpCoreAllocator = nullptr; return *this; } }; /// EASTLICoreDeleter /// /// Provides a standardized typedef for ICoreAllocator implementations. /// /// Example usage: /// eastl::shared_ptr foo(pA, EASTLCoreDeleter()); /// typedef CoreDeleterAdapter EASTLICoreDeleterAdapter; typedef CoreDeleterAdapter EASTLCoreDeleterAdapter; } // namespace Allocator } // namespace EA /////////////////////////////////////////////////////////////////////////////// // Inlines /////////////////////////////////////////////////////////////////////////////// namespace EA { namespace Allocator { template inline CoreAllocatorAdapter::CoreAllocatorAdapter(const char* EASTL_NAME(pName), AllocatorType* pCoreAllocator) : mpCoreAllocator(pCoreAllocator), mnFlags(0) { #if EASTL_NAME_ENABLED mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME; #endif } template inline CoreAllocatorAdapter::CoreAllocatorAdapter(const char* EASTL_NAME(pName), AllocatorType* pCoreAllocator, int flags) : mpCoreAllocator(pCoreAllocator), mnFlags(flags) { #if EASTL_NAME_ENABLED mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME; #endif } template inline CoreAllocatorAdapter::CoreAllocatorAdapter(const CoreAllocatorAdapter& x) : mpCoreAllocator(x.mpCoreAllocator), mnFlags(x.mnFlags) { #if EASTL_NAME_ENABLED mpName = x.mpName; #endif } template inline CoreAllocatorAdapter::CoreAllocatorAdapter(const CoreAllocatorAdapter& x, const char* EASTL_NAME(pName)) : mpCoreAllocator(x.mpCoreAllocator), mnFlags(x.mnFlags) { #if EASTL_NAME_ENABLED mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME; #endif } template inline CoreAllocatorAdapter& CoreAllocatorAdapter::operator=(const CoreAllocatorAdapter& x) { mpCoreAllocator = x.mpCoreAllocator; mnFlags = x.mnFlags; #if EASTL_NAME_ENABLED mpName = x.mpName; #endif return *this; } template inline void* CoreAllocatorAdapter::allocate(size_t n, int /*flags*/) { // It turns out that EASTL itself doesn't use the flags parameter, // whereas the user here might well want to specify a flags // parameter. So we use ours instead of the one passed in. return mpCoreAllocator->Alloc(n, EASTL_NAME_VAL(mpName), (unsigned)mnFlags); } template inline void* CoreAllocatorAdapter::allocate(size_t n, size_t alignment, size_t offset, int /*flags*/) { // It turns out that EASTL itself doesn't use the flags parameter, // whereas the user here might well want to specify a flags // parameter. So we use ours instead of the one passed in. return mpCoreAllocator->Alloc(n, EASTL_NAME_VAL(mpName), (unsigned)mnFlags, (unsigned)alignment, (unsigned)offset); } template inline void CoreAllocatorAdapter::deallocate(void* p, size_t n) { return mpCoreAllocator->Free(p, n); } template inline AllocatorType* CoreAllocatorAdapter::get_allocator() const { return mpCoreAllocator; } template inline void CoreAllocatorAdapter::set_allocator(AllocatorType* pAllocator) { mpCoreAllocator = pAllocator; } template inline int CoreAllocatorAdapter::get_flags() const { return mnFlags; } template inline void CoreAllocatorAdapter::set_flags(int flags) { mnFlags = flags; } template inline const char* CoreAllocatorAdapter::get_name() const { #if EASTL_NAME_ENABLED return mpName; #else return EASTL_ALLOCATOR_DEFAULT_NAME; #endif } template inline void CoreAllocatorAdapter::set_name(const char* pName) { #if EASTL_NAME_ENABLED mpName = pName; #else (void)pName; #endif } template inline bool operator==(const CoreAllocatorAdapter& a, const CoreAllocatorAdapter& b) { return (a.mpCoreAllocator == b.mpCoreAllocator) && (a.mnFlags == b.mnFlags); } template inline bool operator!=(const CoreAllocatorAdapter& a, const CoreAllocatorAdapter& b) { return (a.mpCoreAllocator != b.mpCoreAllocator) || (a.mnFlags != b.mnFlags); } } // namespace Allocator } // namespace EA #endif // EASTL_CORE_ALLOCATOR_ENABLED #endif // Header include guard ================================================ FILE: include/EASTL/deque.h ================================================ ///////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. ///////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // deque design // // A deque (pronounced "deck") is a double-ended queue, though this is partially // of a misnomer. A deque does indeed let you add and remove values from both ends // of the container, but it's not usually used for such a thing and instead is used // as a more flexible version of a vector. It provides operator[] (random access) // and can insert items anywhere and not just at the front and back. // // While you can implement a double-ended queue via a doubly-linked list, deque is // instead implemented as a list of arrays. The benefit of this is that memory usage // is lower and that random access can be had with decent efficiency. // // Our implementation of deque is just like every other implementation of deque, // as the C++ standard all but dictates that you make it work this way. Below // we have a depiction of an array (or vector) of 48 items, with each node being // a '+' character and extra capacity being a '-' character. What we have is one // contiguous block of memory: // // ++++++++++++++++++++++++++++++++++++++++++++++++----------------- // 0 47 // // With a deque, the same array of 48 items would be implemented as multiple smaller // arrays of contiguous memory, each of fixed size. We will call these "sub-arrays." // In the case here, we have six arrays of 8 nodes: // // ++++++++ ++++++++ ++++++++ ++++++++ ++++++++ ++++++++ // // With an vector, item [0] is the first item and item [47] is the last item. With a // deque, item [0] is usually not the first item and neither is item [47]. There is // extra capacity on both the front side and the back side of the deque. So a deque // (of 24 items) actually looks like this: // // -------- -----+++ ++++++++ ++++++++ +++++--- -------- // 0 23 // // To insert items at the front, you move into the capacity on the left, and to insert // items at the back, you append items on the right. As you can see, inserting an item // at the front doesn't require allocating new memory nor does it require moving any // items in the container. It merely involves moving the pointer to the [0] item to // the left by one node. // // We keep track of these sub-arrays by having an array of pointers, with each array // entry pointing to each of the sub-arrays. We could alternatively use a linked // list of pointers, but it turns out we can implement our deque::operator[] more // efficiently if we use an array of pointers instead of a list of pointers. // // To implement deque::iterator, we could keep a struct which is essentially this: // struct iterator { // int subArrayIndex; // int subArrayOffset; // } // // In practice, we implement iterators a little differently, but in reality our // implementation isn't much different from the above. It turns out that it's most // simple if we also manage the location of item [0] and item [end] by using these // same iterators. // // To consider: Implement the deque as a circular deque instead of a linear one. // This would use a similar subarray layout but iterators would // wrap around when they reached the end of the subarray pointer list. // ////////////////////////////////////////////////////////////////////////////// #ifndef EASTL_DEQUE_H #define EASTL_DEQUE_H #include #include #include #include #include #include #include EA_DISABLE_ALL_VC_WARNINGS() #include #include EA_RESTORE_ALL_VC_WARNINGS() #if EASTL_EXCEPTIONS_ENABLED EA_DISABLE_ALL_VC_WARNINGS() #include // std::out_of_range, std::length_error, std::logic_error. EA_RESTORE_ALL_VC_WARNINGS() #endif // 4267 - 'argument' : conversion from 'size_t' to 'const uint32_t', possible loss of data. This is a bogus warning resulting from a bug in VC++. // 4345 - Behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized // 4480 - nonstandard extension used: specifying underlying type for enum // 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc // 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught. EA_DISABLE_VC_WARNING(4267 4345 4480 4530 4571); #if EASTL_EXCEPTIONS_ENABLED // 4703 - potentially uninitialized local pointer variable used. VC++ is mistakenly analyzing the possibility of uninitialized variables, though it's not easy for it to do so. // 4701 - potentially uninitialized local variable used. EA_DISABLE_VC_WARNING(4703 4701) #endif #if defined(EA_PRAGMA_ONCE_SUPPORTED) #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. #endif namespace eastl { /// EASTL_DEQUE_DEFAULT_NAME /// /// Defines a default container name in the absence of a user-provided name. /// #ifndef EASTL_DEQUE_DEFAULT_NAME #define EASTL_DEQUE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " deque" // Unless the user overrides something, this is "EASTL deque". #endif /// EASTL_DEQUE_DEFAULT_ALLOCATOR /// #ifndef EASTL_DEQUE_DEFAULT_ALLOCATOR #define EASTL_DEQUE_DEFAULT_ALLOCATOR allocator_type(EASTL_DEQUE_DEFAULT_NAME) #endif /// DEQUE_DEFAULT_SUBARRAY_SIZE /// /// Defines the default number of items in a subarray. /// Note that the user has the option of specifying the subarray size /// in the deque template declaration. /// #if !defined(__GNUC__) || (__GNUC__ >= 3) // GCC 2.x can't handle the declaration below. #define DEQUE_DEFAULT_SUBARRAY_SIZE(T) ((sizeof(T) <= 4) ? 64 : ((sizeof(T) <= 8) ? 32 : ((sizeof(T) <= 16) ? 16 : ((sizeof(T) <= 32) ? 8 : 4)))) #else #define DEQUE_DEFAULT_SUBARRAY_SIZE(T) 16 #endif /// DequeIterator /// /// The DequeIterator provides both const and non-const iterators for deque. /// It also is used for the tracking of the begin and end for the deque. /// template struct DequeIterator { typedef DequeIterator this_type; typedef DequeIterator iterator; typedef DequeIterator const_iterator; typedef ptrdiff_t difference_type; typedef eastl::random_access_iterator_tag iterator_category; typedef T value_type; typedef T* pointer; typedef T& reference; public: DequeIterator(); DequeIterator(const iterator& x); DequeIterator& operator=(const iterator& x); pointer operator->() const; reference operator*() const; this_type& operator++(); this_type operator++(int); this_type& operator--(); this_type operator--(int); this_type& operator+=(difference_type n); this_type& operator-=(difference_type n); this_type operator+(difference_type n) const; this_type operator-(difference_type n) const; protected: template friend struct DequeIterator; template friend struct DequeBase; template friend class deque; template friend bool operator==(const DequeIterator&, const DequeIterator&); template friend bool operator!=(const DequeIterator&, const DequeIterator&); template friend bool operator!=(const DequeIterator& a, const DequeIterator& b); template friend bool operator< (const DequeIterator&, const DequeIterator&); template friend bool operator> (const DequeIterator&, const DequeIterator&); template friend bool operator<=(const DequeIterator&, const DequeIterator&); template friend bool operator>=(const DequeIterator&, const DequeIterator&); template friend typename DequeIterator::difference_type operator-(const DequeIterator& a, const DequeIterator& b); protected: T* mpCurrent; // Where we currently point. Declared first because it's used most often. T* mpBegin; // The beginning of the current subarray. T* mpEnd; // The end of the current subarray. To consider: remove this member, as it is always equal to 'mpBegin + kDequeSubarraySize'. Given that deque subarrays usually consist of hundreds of bytes, this isn't a massive win. Also, now that we are implementing a zero-allocation new deque policy, mpEnd may in fact not be equal to 'mpBegin + kDequeSubarraySize'. T** mpCurrentArrayPtr; // Pointer to current subarray. We could alternatively implement this as a list node iterator if the deque used a linked list. struct Increment {}; struct Decrement {}; struct FromConst {}; DequeIterator(T** pCurrentArrayPtr, T* pCurrent); DequeIterator(const const_iterator& x, FromConst) : mpCurrent(x.mpCurrent), mpBegin(x.mpBegin), mpEnd(x.mpEnd), mpCurrentArrayPtr(x.mpCurrentArrayPtr){} DequeIterator(const iterator& x, Increment); DequeIterator(const iterator& x, Decrement); this_type move(const iterator& first, const iterator& last, true_type); // true means that value_type has the type_trait is_trivially_copyable, this_type move(const iterator& first, const iterator& last, false_type); // false means it does not. void move_backward(const iterator& first, const iterator& last, true_type); // true means that value_type has the type_trait is_trivially_copyable, void move_backward(const iterator& first, const iterator& last, false_type); // false means it does not. void SetSubarray(T** pCurrentArrayPtr); }; /// DequeBase /// /// The DequeBase implements memory allocation for deque. /// See VectorBase (class vector) for an explanation of why we /// create this separate base class. /// template struct DequeBase { typedef T value_type; typedef Allocator allocator_type; typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. typedef ptrdiff_t difference_type; typedef DequeIterator iterator; typedef DequeIterator const_iterator; static const size_type npos = (size_type)-1; /// 'npos' means non-valid position or simply non-position. static const size_type kMaxSize = (size_type)-2; /// -1 is reserved for 'npos'. It also happens to be slightly beneficial that kMaxSize is a value less than -1, as it helps us deal with potential integer wraparound issues. enum { kMinPtrArraySize = 8, /// A new empty deque has a ptrArraySize of 0, but any allocated ptrArrays use this min size. kSubarraySize = kDequeSubarraySize /// //kNodeSize = kDequeSubarraySize * sizeof(T) /// Disabled because it prevents the ability to do this: struct X{ eastl::deque mDequeOfSelf; }; }; protected: enum Side /// Defines the side of the deque: front or back. { kSideFront, /// Identifies the front side of the deque. kSideBack /// Identifies the back side of the deque. }; T** mpPtrArray; // Array of pointers to subarrays. size_type mnPtrArraySize; // Possibly we should store this as T** mpArrayEnd. iterator mItBegin; // Where within the subarrays is our beginning. iterator mItEnd; // Where within the subarrays is our end. allocator_type mAllocator; // To do: Use base class optimization to make this go away. public: DequeBase(const allocator_type& allocator); DequeBase(size_type n); DequeBase(size_type n, const allocator_type& allocator); ~DequeBase(); const allocator_type& get_allocator() const EA_NOEXCEPT; allocator_type& get_allocator() EA_NOEXCEPT; void set_allocator(const allocator_type& allocator); protected: T* DoAllocateSubarray(); void DoFreeSubarray(T* p); void DoFreeSubarrays(T** pBegin, T** pEnd); T** DoAllocatePtrArray(size_type n); void DoFreePtrArray(T** p, size_t n); iterator DoReallocSubarray(size_type nAdditionalCapacity, Side allocationSide); void DoReallocPtrArray(size_type nAdditionalCapacity, Side allocationSide); void DoInit(size_type n); }; // DequeBase /// deque /// /// Implements a conventional C++ double-ended queue. The implementation used here /// is very much like any other deque implementations you may have seen, as it /// follows the standard algorithm for deque design. /// /// Note: /// As of this writing, deque does not support zero-allocation initial emptiness. /// A newly created deque with zero elements will still allocate a subarray /// pointer set. We are looking for efficient and clean ways to get around this, /// but current efforts have resulted in less efficient and more fragile code. /// The logic of this class doesn't lend itself to a clean implementation. /// It turns out that deques are one of the least likely classes you'd want this /// behaviour in, so until this functionality becomes very important to somebody, /// we will leave it as-is. It can probably be solved by adding some extra code to /// the Do* functions and adding good comments explaining the situation. /// template class deque : public DequeBase { public: typedef DequeBase base_type; typedef deque this_type; typedef T value_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; typedef DequeIterator iterator; typedef DequeIterator const_iterator; typedef eastl::reverse_iterator reverse_iterator; typedef eastl::reverse_iterator const_reverse_iterator; typedef typename base_type::size_type size_type; typedef typename base_type::difference_type difference_type; typedef typename base_type::allocator_type allocator_type; using base_type::npos; static_assert(!is_const::value, "deque::value_type must be non-const."); static_assert(!is_volatile::value, "deque::value_type must be non-volatile."); protected: using base_type::kSideFront; using base_type::kSideBack; using base_type::mpPtrArray; using base_type::mnPtrArraySize; using base_type::mItBegin; using base_type::mItEnd; using base_type::mAllocator; using base_type::DoAllocateSubarray; using base_type::DoFreeSubarray; using base_type::DoFreeSubarrays; using base_type::DoAllocatePtrArray; using base_type::DoFreePtrArray; using base_type::DoReallocSubarray; using base_type::DoReallocPtrArray; public: deque(); explicit deque(const allocator_type& allocator); explicit deque(size_type n, const allocator_type& allocator = EASTL_DEQUE_DEFAULT_ALLOCATOR); deque(size_type n, const value_type& value, const allocator_type& allocator = EASTL_DEQUE_DEFAULT_ALLOCATOR); deque(const this_type& x); deque(this_type&& x); deque(this_type&& x, const allocator_type& allocator); deque(std::initializer_list ilist, const allocator_type& allocator = EASTL_DEQUE_DEFAULT_ALLOCATOR); // note: this has pre-C++11 semantics: // this constructor is equivalent to the constructor deque(static_cast(first), static_cast(last)) if InputIterator is an integral type. template deque(InputIterator first, InputIterator last); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg. ~deque(); this_type& operator=(const this_type& x); this_type& operator=(std::initializer_list ilist); this_type& operator=(this_type&& x); void swap(this_type& x); void assign(size_type n, const value_type& value); void assign(std::initializer_list ilist); template // It turns out that the C++ std::deque specifies a two argument void assign(InputIterator first, InputIterator last); // version of assign that takes (int size, int value). These are not // iterators, so we need to do a template compiler trick to do the right thing. iterator begin() EA_NOEXCEPT; const_iterator begin() const EA_NOEXCEPT; const_iterator cbegin() const EA_NOEXCEPT; iterator end() EA_NOEXCEPT; const_iterator end() const EA_NOEXCEPT; const_iterator cend() const EA_NOEXCEPT; reverse_iterator rbegin() EA_NOEXCEPT; const_reverse_iterator rbegin() const EA_NOEXCEPT; const_reverse_iterator crbegin() const EA_NOEXCEPT; reverse_iterator rend() EA_NOEXCEPT; const_reverse_iterator rend() const EA_NOEXCEPT; const_reverse_iterator crend() const EA_NOEXCEPT; bool empty() const EA_NOEXCEPT; size_type size() const EA_NOEXCEPT; void resize(size_type n, const value_type& value); void resize(size_type n); void shrink_to_fit(); void set_capacity(size_type n = base_type::npos); reference operator[](size_type n); const_reference operator[](size_type n) const; reference at(size_type n); const_reference at(size_type n) const; reference front(); const_reference front() const; reference back(); const_reference back() const; void push_front(const value_type& value); reference push_front(); void push_front(value_type&& value); void push_back(const value_type& value); reference push_back(); void push_back(value_type&& value); void pop_front(); void pop_back(); template iterator emplace(const_iterator position, Args&&... args); template reference emplace_front(Args&&... args); template reference emplace_back(Args&&... args); iterator insert(const_iterator position, const value_type& value); iterator insert(const_iterator position, value_type&& value); iterator insert(const_iterator position, size_type n, const value_type& value); iterator insert(const_iterator position, std::initializer_list ilist); // note: this has pre-C++11 semantics: // this function is equivalent to insert(const_iterator position, static_cast(first), static_cast(last)) if InputIterator is an integral type. // ie. same as insert(const_iterator position, size_type n, const value_type& value) template iterator insert(const_iterator position, InputIterator first, InputIterator last); iterator erase(const_iterator position); iterator erase(const_iterator first, const_iterator last); reverse_iterator erase(reverse_iterator position); reverse_iterator erase(reverse_iterator first, reverse_iterator last); void clear(); //void reset_lose_memory(); // Disabled until it can be implemented efficiently and cleanly. // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. bool validate() const; int validate_iterator(const_iterator i) const; protected: template void DoInit(Integer n, Integer value, true_type); template void DoInit(InputIterator first, InputIterator last, false_type); template void DoInitFromIterator(InputIterator first, InputIterator last, eastl::input_iterator_tag); template void DoInitFromIterator(ForwardIterator first, ForwardIterator last, eastl::forward_iterator_tag); void DoFillInit(const value_type& value); template void DoAssign(Integer n, Integer value, true_type); template void DoAssign(InputIterator first, InputIterator last, false_type); void DoAssignValues(size_type n, const value_type& value); template iterator DoInsert(const const_iterator& position, Integer n, Integer value, true_type); template iterator DoInsert(const const_iterator& position, const InputIterator& first, const InputIterator& last, false_type); template iterator DoInsertFromIterator(const_iterator position, const InputIterator& first, const InputIterator& last, eastl::input_iterator_tag); template iterator DoInsertFromIterator(const_iterator position, const ForwardIterator& first, const ForwardIterator& last, eastl::forward_iterator_tag); iterator DoInsertValues(const_iterator position, size_type n, const value_type& value); void DoSwap(this_type& x); }; // class deque /////////////////////////////////////////////////////////////////////// // DequeBase /////////////////////////////////////////////////////////////////////// template DequeBase::DequeBase(const allocator_type& allocator) : mpPtrArray(NULL), mnPtrArraySize(0), mItBegin(), mItEnd(), mAllocator(allocator) { // It is assumed here that the deque subclass will init us when/as needed. } template DequeBase::DequeBase(size_type n) : mpPtrArray(NULL), mnPtrArraySize(0), mItBegin(), mItEnd(), mAllocator(EASTL_DEQUE_DEFAULT_NAME) { // It's important to note that DoInit creates space for elements and assigns // mItBegin/mItEnd to point to them, but these elements are not constructed. // You need to immediately follow this constructor with code that constructs the values. DoInit(n); } template DequeBase::DequeBase(size_type n, const allocator_type& allocator) : mpPtrArray(NULL), mnPtrArraySize(0), mItBegin(), mItEnd(), mAllocator(allocator) { // It's important to note that DoInit creates space for elements and assigns // mItBegin/mItEnd to point to them, but these elements are not constructed. // You need to immediately follow this constructor with code that constructs the values. DoInit(n); } template DequeBase::~DequeBase() { if(mpPtrArray) { DoFreeSubarrays(mItBegin.mpCurrentArrayPtr, mItEnd.mpCurrentArrayPtr + 1); DoFreePtrArray(mpPtrArray, mnPtrArraySize); mpPtrArray = nullptr; } } template const typename DequeBase::allocator_type& DequeBase::get_allocator() const EA_NOEXCEPT { return mAllocator; } template typename DequeBase::allocator_type& DequeBase::get_allocator() EA_NOEXCEPT { return mAllocator; } template void DequeBase::set_allocator(const allocator_type& allocator) { // The only time you can set an allocator is with an empty unused container, such as right after construction. if(EASTL_LIKELY(mAllocator != allocator)) { // our deque implementation always has allocations for mpPtrArray. this set_allocator() is unlike other container's set_allocator() member function // in that it actually frees allocations when assigning the allocator. this lack of consistency is unfortunate. if(EASTL_LIKELY(mpPtrArray && (mItBegin.mpCurrent == mItEnd.mpCurrent))) // is the container empty? { DoFreeSubarrays(mItBegin.mpCurrentArrayPtr, mItEnd.mpCurrentArrayPtr + 1); DoFreePtrArray(mpPtrArray, mnPtrArraySize); mAllocator = allocator; DoInit(0); } else { EASTL_THROW_MSG_OR_ASSERT(std::logic_error, "deque::set_allocator -- attempt to change allocator after inserting elements."); } } } template T* DequeBase::DoAllocateSubarray() { T* p = (T*)allocate_memory(mAllocator, kDequeSubarraySize * sizeof(T), EASTL_ALIGN_OF(T), 0); EASTL_ASSERT_MSG(p != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined."); #if EASTL_DEBUG memset((void*)p, 0, kDequeSubarraySize * sizeof(T)); #endif return (T*)p; } template void DequeBase::DoFreeSubarray(T* p) { if(p) EASTLFree(mAllocator, p, kDequeSubarraySize * sizeof(T)); } template void DequeBase::DoFreeSubarrays(T** pBegin, T** pEnd) { while(pBegin < pEnd) DoFreeSubarray(*pBegin++); } template T** DequeBase::DoAllocatePtrArray(size_type n) { #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(n >= 0x80000000)) EASTL_FAIL_MSG("deque::DoAllocatePtrArray -- improbably large request."); #endif T** pp = (T**)allocate_memory(mAllocator, n * sizeof(T*), EASTL_ALIGN_OF(T), 0); EASTL_ASSERT_MSG(pp != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined."); #if EASTL_DEBUG memset((void*)pp, 0, n * sizeof(T*)); #endif return pp; } template void DequeBase::DoFreePtrArray(T** pp, size_t n) { if(pp) EASTLFree(mAllocator, pp, n * sizeof(T*)); } template typename DequeBase::iterator DequeBase::DoReallocSubarray(size_type nAdditionalCapacity, Side allocationSide) { // nAdditionalCapacity refers to the amount of additional space we need to be // able to store in this deque. Typically this function is called as part of // an insert or append operation. This is the function that makes sure there // is enough capacity for the new elements to be copied into the deque. // The new capacity here is always at the front or back of the deque. // This function returns an iterator to that points to the new begin or // the new end of the deque space, depending on allocationSide. if(allocationSide == kSideFront) { // There might be some free space (nCurrentAdditionalCapacity) at the front of the existing subarray. const size_type nCurrentAdditionalCapacity = (size_type)(mItBegin.mpCurrent - mItBegin.mpBegin); if(EASTL_UNLIKELY(nCurrentAdditionalCapacity < nAdditionalCapacity)) // If we need to grow downward into a new subarray... { const difference_type nSubarrayIncrease = (difference_type)(((nAdditionalCapacity - nCurrentAdditionalCapacity) + kDequeSubarraySize - 1) / kDequeSubarraySize); difference_type i; if(nSubarrayIncrease > (mItBegin.mpCurrentArrayPtr - mpPtrArray)) // If there are not enough pointers in front of the current (first) one... DoReallocPtrArray((size_type)(nSubarrayIncrease - (mItBegin.mpCurrentArrayPtr - mpPtrArray)), kSideFront); #if EASTL_EXCEPTIONS_ENABLED try { #endif for(i = 1; i <= nSubarrayIncrease; ++i) mItBegin.mpCurrentArrayPtr[-i] = DoAllocateSubarray(); #if EASTL_EXCEPTIONS_ENABLED } catch(...) { for(difference_type j = 1; j < i; ++j) DoFreeSubarray(mItBegin.mpCurrentArrayPtr[-j]); throw; } #endif } return mItBegin - (difference_type)nAdditionalCapacity; } else // else kSideBack { const size_type nCurrentAdditionalCapacity = (size_type)((mItEnd.mpEnd - 1) - mItEnd.mpCurrent); if(EASTL_UNLIKELY(nCurrentAdditionalCapacity < nAdditionalCapacity)) // If we need to grow forward into a new subarray... { const difference_type nSubarrayIncrease = (difference_type)(((nAdditionalCapacity - nCurrentAdditionalCapacity) + kDequeSubarraySize - 1) / kDequeSubarraySize); difference_type i; if(nSubarrayIncrease > ((mpPtrArray + mnPtrArraySize) - mItEnd.mpCurrentArrayPtr) - 1) // If there are not enough pointers after the current (last) one... DoReallocPtrArray((size_type)(nSubarrayIncrease - (((mpPtrArray + mnPtrArraySize) - mItEnd.mpCurrentArrayPtr) - 1)), kSideBack); #if EASTL_EXCEPTIONS_ENABLED try { #endif for(i = 1; i <= nSubarrayIncrease; ++i) mItEnd.mpCurrentArrayPtr[i] = DoAllocateSubarray(); #if EASTL_EXCEPTIONS_ENABLED } catch(...) { for(difference_type j = 1; j < i; ++j) DoFreeSubarray(mItEnd.mpCurrentArrayPtr[j]); throw; } #endif } return mItEnd + (difference_type)nAdditionalCapacity; } } template void DequeBase::DoReallocPtrArray(size_type nAdditionalCapacity, Side allocationSide) { // This function is not called unless the capacity is known to require a resize. // // We have an array of pointers (mpPtrArray), of which a segment of them are in use and // at either end of the array are zero or more unused pointers. This function is being // called because we need to extend the capacity on either side of this array by // nAdditionalCapacity pointers. However, it's possible that if the user is continually // using push_back and pop_front then the pointer array will continue to be extended // on the back side and unused on the front side. So while we are doing this resizing // here we also take the opportunity to recenter the pointers and thus be balanced. // It man turn out that we don't even need to reallocate the pointer array in order // to increase capacity on one side, as simply moving the pointers to the center may // be enough to open up the requires space. // // Balanced pointer array Unbalanced pointer array (unused space at front, no free space at back) // ----++++++++++++---- ---------+++++++++++ const size_type nUnusedPtrCountAtFront = (size_type)(mItBegin.mpCurrentArrayPtr - mpPtrArray); const size_type nUsedPtrCount = (size_type)(mItEnd.mpCurrentArrayPtr - mItBegin.mpCurrentArrayPtr) + 1; const size_type nUsedPtrSpace = nUsedPtrCount * sizeof(void*); const size_type nUnusedPtrCountAtBack = (mnPtrArraySize - nUnusedPtrCountAtFront) - nUsedPtrCount; value_type** pPtrArrayBegin; if((allocationSide == kSideBack) && (nAdditionalCapacity <= nUnusedPtrCountAtFront)) // If we can take advantage of unused pointers at the front without doing any reallocation... { if(nAdditionalCapacity < (nUnusedPtrCountAtFront / 2)) // Possibly use more space than required, if there's a lot of extra space. nAdditionalCapacity = (nUnusedPtrCountAtFront / 2); pPtrArrayBegin = mpPtrArray + (nUnusedPtrCountAtFront - nAdditionalCapacity); memmove(pPtrArrayBegin, mItBegin.mpCurrentArrayPtr, nUsedPtrSpace); #if EASTL_DEBUG memset(pPtrArrayBegin + nUsedPtrCount, 0, (size_t)(mpPtrArray + mnPtrArraySize) - (size_t)(pPtrArrayBegin + nUsedPtrCount)); #endif } else if((allocationSide == kSideFront) && (nAdditionalCapacity <= nUnusedPtrCountAtBack)) // If we can take advantage of unused pointers at the back without doing any reallocation... { if(nAdditionalCapacity < (nUnusedPtrCountAtBack / 2)) // Possibly use more space than required, if there's a lot of extra space. nAdditionalCapacity = (nUnusedPtrCountAtBack / 2); pPtrArrayBegin = mItBegin.mpCurrentArrayPtr + nAdditionalCapacity; memmove(pPtrArrayBegin, mItBegin.mpCurrentArrayPtr, nUsedPtrSpace); #if EASTL_DEBUG memset(mpPtrArray, 0, (size_t)((uintptr_t)pPtrArrayBegin - (uintptr_t)mpPtrArray)); #endif } else { // In this case we will have to do a reallocation. const size_type nNewPtrArraySize = mnPtrArraySize + eastl::max_alt(mnPtrArraySize, nAdditionalCapacity) + 2; // Allocate extra capacity. value_type** const pNewPtrArray = DoAllocatePtrArray(nNewPtrArraySize); pPtrArrayBegin = pNewPtrArray + (mItBegin.mpCurrentArrayPtr - mpPtrArray) + ((allocationSide == kSideFront) ? nAdditionalCapacity : 0); // The following is equivalent to: eastl::copy(mItBegin.mpCurrentArrayPtr, mItEnd.mpCurrentArrayPtr + 1, pPtrArrayBegin); // It's OK to use memcpy instead of memmove because the destination is guaranteed to non-overlap the source. if(mpPtrArray) // Could also say: 'if(mItBegin.mpCurrentArrayPtr)' memcpy(pPtrArrayBegin, mItBegin.mpCurrentArrayPtr, nUsedPtrSpace); DoFreePtrArray(mpPtrArray, mnPtrArraySize); mpPtrArray = pNewPtrArray; mnPtrArraySize = nNewPtrArraySize; } // We need to reset the begin and end iterators, as code that calls this expects them to *not* be invalidated. mItBegin.SetSubarray(pPtrArrayBegin); mItEnd.SetSubarray((pPtrArrayBegin + nUsedPtrCount) - 1); } template void DequeBase::DoInit(size_type n) { // This code is disabled because it doesn't currently work properly. // We are trying to make it so that a deque can have a zero allocation // initial empty state, but we (OK, I) am having a hard time making // this elegant and efficient. //if(n) //{ const size_type nNewPtrArraySize = (size_type)((n / kDequeSubarraySize) + 1); // Always have at least one, even if n is zero. const size_type kMinPtrArraySize_ = kMinPtrArraySize; mnPtrArraySize = eastl::max_alt(kMinPtrArraySize_, (nNewPtrArraySize + 2)); mpPtrArray = DoAllocatePtrArray(mnPtrArraySize); value_type** const pPtrArrayBegin = (mpPtrArray + ((mnPtrArraySize - nNewPtrArraySize) / 2)); // Try to place it in the middle. value_type** const pPtrArrayEnd = pPtrArrayBegin + nNewPtrArraySize; value_type** pPtrArrayCurrent = pPtrArrayBegin; #if EASTL_EXCEPTIONS_ENABLED try { try { #endif while(pPtrArrayCurrent < pPtrArrayEnd) *pPtrArrayCurrent++ = DoAllocateSubarray(); #if EASTL_EXCEPTIONS_ENABLED } catch(...) { DoFreeSubarrays(pPtrArrayBegin, pPtrArrayCurrent); throw; } } catch(...) { DoFreePtrArray(mpPtrArray, mnPtrArraySize); mpPtrArray = NULL; mnPtrArraySize = 0; throw; } #endif mItBegin.SetSubarray(pPtrArrayBegin); mItBegin.mpCurrent = mItBegin.mpBegin; mItEnd.SetSubarray(pPtrArrayEnd - 1); mItEnd.mpCurrent = mItEnd.mpBegin + (difference_type)(n % kDequeSubarraySize); //} //else // Else we do a zero-allocation initialization. //{ // mpPtrArray = NULL; // mnPtrArraySize = 0; // // mItBegin.mpCurrentArrayPtr = NULL; // mItBegin.mpBegin = NULL; // mItBegin.mpEnd = NULL; // We intentionally create a situation whereby the subarray that has no capacity. // mItBegin.mpCurrent = NULL; // // mItEnd = mItBegin; //} } /////////////////////////////////////////////////////////////////////// // DequeIterator /////////////////////////////////////////////////////////////////////// template DequeIterator::DequeIterator() : mpCurrent(NULL), mpBegin(NULL), mpEnd(NULL), mpCurrentArrayPtr(NULL) { // Empty } template DequeIterator::DequeIterator(T** pCurrentArrayPtr, T* pCurrent) : mpCurrent(pCurrent), mpBegin(*pCurrentArrayPtr), mpEnd(pCurrent + kDequeSubarraySize), mpCurrentArrayPtr(pCurrentArrayPtr) { // Empty } template DequeIterator::DequeIterator(const iterator& x) : mpCurrent(x.mpCurrent), mpBegin(x.mpBegin), mpEnd(x.mpEnd), mpCurrentArrayPtr(x.mpCurrentArrayPtr) { // Empty } template DequeIterator& DequeIterator::operator=(const iterator& x) { mpCurrent = x.mpCurrent; mpBegin = x.mpBegin; mpEnd = x.mpEnd; mpCurrentArrayPtr = x.mpCurrentArrayPtr; return *this; } template DequeIterator::DequeIterator(const iterator& x, Increment) : mpCurrent(x.mpCurrent), mpBegin(x.mpBegin), mpEnd(x.mpEnd), mpCurrentArrayPtr(x.mpCurrentArrayPtr) { operator++(); } template DequeIterator::DequeIterator(const iterator& x, Decrement) : mpCurrent(x.mpCurrent), mpBegin(x.mpBegin), mpEnd(x.mpEnd), mpCurrentArrayPtr(x.mpCurrentArrayPtr) { operator--(); } template typename DequeIterator::pointer DequeIterator::operator->() const { return mpCurrent; } template typename DequeIterator::reference DequeIterator::operator*() const { return *mpCurrent; } template typename DequeIterator::this_type& DequeIterator::operator++() { if(EASTL_UNLIKELY(++mpCurrent == mpEnd)) { mpBegin = *++mpCurrentArrayPtr; mpEnd = mpBegin + kDequeSubarraySize; mpCurrent = mpBegin; } return *this; } template typename DequeIterator::this_type DequeIterator::operator++(int) { const this_type temp(*this); operator++(); return temp; } template typename DequeIterator::this_type& DequeIterator::operator--() { if(EASTL_UNLIKELY(mpCurrent == mpBegin)) { mpBegin = *--mpCurrentArrayPtr; mpEnd = mpBegin + kDequeSubarraySize; mpCurrent = mpEnd; // fall through... } --mpCurrent; return *this; } template typename DequeIterator::this_type DequeIterator::operator--(int) { const this_type temp(*this); operator--(); return temp; } template typename DequeIterator::this_type& DequeIterator::operator+=(difference_type n) { const difference_type subarrayPosition = (mpCurrent - mpBegin) + n; // Cast from signed to unsigned (size_t) in order to obviate the need to compare to < 0. if((size_t)subarrayPosition < (size_t)kDequeSubarraySize) // If the new position is within the current subarray (i.e. >= 0 && < kSubArraySize)... mpCurrent += n; else { // This implementation is a branchless version which works by offsetting // the math to always be in the positive range. Much of the values here // reduce to constants and both the multiplication and division are of // power of two sizes and so this calculation ends up compiling down to // just one addition, one shift and one subtraction. This algorithm has // a theoretical weakness in that on 32 bit systems it will fail if the // value of n is >= (2^32 - 2^24) or 4,278,190,080 of if kDequeSubarraySize // is >= 2^24 or 16,777,216. EASTL_CT_ASSERT((kDequeSubarraySize & (kDequeSubarraySize - 1)) == 0); // Verify that it is a power of 2. const difference_type subarrayIndex = (((16777216 + subarrayPosition) / (difference_type)kDequeSubarraySize)) - (16777216 / (difference_type)kDequeSubarraySize); SetSubarray(mpCurrentArrayPtr + subarrayIndex); mpCurrent = mpBegin + (subarrayPosition - (subarrayIndex * (difference_type)kDequeSubarraySize)); } return *this; } template typename DequeIterator::this_type& DequeIterator::operator-=(difference_type n) { return (*this).operator+=(-n); } template typename DequeIterator::this_type DequeIterator::operator+(difference_type n) const { return this_type(*this).operator+=(n); } template typename DequeIterator::this_type DequeIterator::operator-(difference_type n) const { return this_type(*this).operator+=(-n); } template typename DequeIterator::this_type DequeIterator::move(const iterator& first, const iterator& last, true_type) { // To do: Implement this as a loop which does memcpys between subarrays appropriately. // Currently we only do memcpy if the entire operation occurs within a single subarray. if((first.mpBegin == last.mpBegin) && (first.mpBegin == mpBegin)) // If all operations are within the same subarray, implement the operation as a memmove. { memmove(mpCurrent, first.mpCurrent, (size_t)((uintptr_t)last.mpCurrent - (uintptr_t)first.mpCurrent)); return *this + (last.mpCurrent - first.mpCurrent); } return eastl::move(first, last, *this); } template typename DequeIterator::this_type DequeIterator::move(const iterator& first, const iterator& last, false_type) { return eastl::move(first, last, *this); } template void DequeIterator::move_backward(const iterator& first, const iterator& last, true_type) { // To do: Implement this as a loop which does memmoves between subarrays appropriately. // Currently we only do memcpy if the entire operation occurs within a single subarray. if((first.mpBegin == last.mpBegin) && (first.mpBegin == mpBegin)) // If all operations are within the same subarray, implement the operation as a memcpy. memmove(mpCurrent - (last.mpCurrent - first.mpCurrent), first.mpCurrent, (size_t)((uintptr_t)last.mpCurrent - (uintptr_t)first.mpCurrent)); else eastl::move_backward(first, last, *this); } template void DequeIterator::move_backward(const iterator& first, const iterator& last, false_type) { eastl::move_backward(first, last, *this); } template void DequeIterator::SetSubarray(T** pCurrentArrayPtr) { mpCurrentArrayPtr = pCurrentArrayPtr; mpBegin = *pCurrentArrayPtr; mpEnd = mpBegin + kDequeSubarraySize; } // The C++ defect report #179 requires that we support comparisons between const and non-const iterators. // Thus we provide additional template paremeters here to support this. The defect report does not // require us to support comparisons between reverse_iterators and const_reverse_iterators. template inline bool operator==(const DequeIterator& a, const DequeIterator& b) { return a.mpCurrent == b.mpCurrent; } template inline bool operator!=(const DequeIterator& a, const DequeIterator& b) { return a.mpCurrent != b.mpCurrent; } // We provide a version of operator!= for the case where the iterators are of the // same type. This helps prevent ambiguity errors in the presence of rel_ops. template inline bool operator!=(const DequeIterator& a, const DequeIterator& b) { return a.mpCurrent != b.mpCurrent; } template inline bool operator<(const DequeIterator& a, const DequeIterator& b) { return (a.mpCurrentArrayPtr == b.mpCurrentArrayPtr) ? (a.mpCurrent < b.mpCurrent) : (a.mpCurrentArrayPtr < b.mpCurrentArrayPtr); } template inline bool operator>(const DequeIterator& a, const DequeIterator& b) { return (a.mpCurrentArrayPtr == b.mpCurrentArrayPtr) ? (a.mpCurrent > b.mpCurrent) : (a.mpCurrentArrayPtr > b.mpCurrentArrayPtr); } template inline bool operator<=(const DequeIterator& a, const DequeIterator& b) { return (a.mpCurrentArrayPtr == b.mpCurrentArrayPtr) ? (a.mpCurrent <= b.mpCurrent) : (a.mpCurrentArrayPtr <= b.mpCurrentArrayPtr); } template inline bool operator>=(const DequeIterator& a, const DequeIterator& b) { return (a.mpCurrentArrayPtr == b.mpCurrentArrayPtr) ? (a.mpCurrent >= b.mpCurrent) : (a.mpCurrentArrayPtr >= b.mpCurrentArrayPtr); } // Random access iterators must support operator + and operator -. // You can only add an integer to an iterator, and you cannot add two iterators. template inline DequeIterator operator+(ptrdiff_t n, const DequeIterator& x) { return x + n; // Implement (n + x) in terms of (x + n). } // You can only add an integer to an iterator, but you can subtract two iterators. // The C++ defect report #179 mentioned above specifically refers to // operator - and states that we support the subtraction of const and non-const iterators. template inline typename DequeIterator::difference_type operator-(const DequeIterator& a, const DequeIterator& b) { // This is a fairly clever algorithm that has been used in STL deque implementations since the original HP STL: typedef typename DequeIterator::difference_type difference_type; return ((difference_type)kDequeSubarraySize * ((a.mpCurrentArrayPtr - b.mpCurrentArrayPtr) - 1)) + (a.mpCurrent - a.mpBegin) + (b.mpEnd - b.mpCurrent); } /////////////////////////////////////////////////////////////////////// // deque /////////////////////////////////////////////////////////////////////// template inline deque::deque() : base_type((size_type)0) { // Empty } template inline deque::deque(const allocator_type& allocator) : base_type((size_type)0, allocator) { // Empty } template inline deque::deque(size_type n, const allocator_type& allocator) : base_type(n, allocator) { DoFillInit(value_type()); } template inline deque::deque(size_type n, const value_type& value, const allocator_type& allocator) : base_type(n, allocator) { DoFillInit(value); } template inline deque::deque(const this_type& x) : base_type(x.size(), x.mAllocator) { eastl::uninitialized_copy(x.mItBegin, x.mItEnd, mItBegin); } template inline deque::deque(this_type&& x) : base_type((size_type)0, x.mAllocator) { swap(x); } template inline deque::deque(this_type&& x, const allocator_type& allocator) : base_type((size_type)0, allocator) { swap(x); // member swap handles the case that x has a different allocator than our allocator by doing a copy. } template inline deque::deque(std::initializer_list ilist, const allocator_type& allocator) : base_type(allocator) { DoInit(ilist.begin(), ilist.end(), false_type()); } template template inline deque::deque(InputIterator first, InputIterator last) : base_type(EASTL_DEQUE_DEFAULT_ALLOCATOR) // Call the empty base constructor, which does nothing. We need to do all the work in our own DoInit. { DoInit(first, last, is_integral()); } template inline deque::~deque() { // Call destructors. Parent class will free the memory. for(iterator itCurrent(mItBegin); itCurrent != mItEnd; ++itCurrent) itCurrent.mpCurrent->~value_type(); } template typename deque::this_type& deque::operator=(const this_type& x) { if(&x != this) // If not assigning to ourselves... { // If (EASTL_ALLOCATOR_COPY_ENABLED == 1) and the current contents are allocated by an // allocator that's unequal to x's allocator, we need to reallocate our elements with // our current allocator and reallocate it with x's allocator. If the allocators are // equal then we can use a more optimal algorithm that doesn't reallocate our elements // but instead can copy them in place. #if EASTL_ALLOCATOR_COPY_ENABLED bool bSlowerPathwayRequired = (mAllocator != x.mAllocator); #else bool bSlowerPathwayRequired = false; #endif if(bSlowerPathwayRequired) { // We can't currently use set_capacity(0) or shrink_to_fit, because they // leave a remaining allocation with our old allocator. So we do a similar // thing but set our allocator to x.mAllocator while doing so. this_type temp(x.mAllocator); DoSwap(temp); // Now we have an empty container with an allocator equal to x.mAllocator, ready to assign from x. } DoAssign(x.begin(), x.end(), eastl::false_type()); } return *this; } template inline typename deque::this_type& deque::operator=(this_type&& x) { if(this != &x) { this_type temp(mAllocator); swap(temp); swap(x); // member swap handles the case that x has a different allocator than our allocator by doing a copy. } return *this; } template inline typename deque::this_type& deque::operator=(std::initializer_list ilist) { DoAssign(ilist.begin(), ilist.end(), false_type()); return *this; } template inline void deque::assign(size_type n, const value_type& value) { DoAssignValues(n, value); } template inline void deque::assign(std::initializer_list ilist) { DoAssign(ilist.begin(), ilist.end(), false_type()); } // It turns out that the C++ std::deque specifies a two argument // version of assign that takes (int size, int value). These are not // iterators, so we need to do a template compiler trick to do the right thing. template template inline void deque::assign(InputIterator first, InputIterator last) { DoAssign(first, last, is_integral()); } template inline typename deque::iterator deque::begin() EA_NOEXCEPT { return mItBegin; } template inline typename deque::const_iterator deque::begin() const EA_NOEXCEPT { return mItBegin; } template inline typename deque::const_iterator deque::cbegin() const EA_NOEXCEPT { return mItBegin; } template inline typename deque::iterator deque::end() EA_NOEXCEPT { return mItEnd; } template typename deque::const_iterator deque::end() const EA_NOEXCEPT { return mItEnd; } template inline typename deque::const_iterator deque::cend() const EA_NOEXCEPT { return mItEnd; } template inline typename deque::reverse_iterator deque::rbegin() EA_NOEXCEPT { return reverse_iterator(mItEnd); } template inline typename deque::const_reverse_iterator deque::rbegin() const EA_NOEXCEPT { return const_reverse_iterator(mItEnd); } template inline typename deque::const_reverse_iterator deque::crbegin() const EA_NOEXCEPT { return const_reverse_iterator(mItEnd); } template inline typename deque::reverse_iterator deque::rend() EA_NOEXCEPT { return reverse_iterator(mItBegin); } template inline typename deque::const_reverse_iterator deque::rend() const EA_NOEXCEPT { return const_reverse_iterator(mItBegin); } template inline typename deque::const_reverse_iterator deque::crend() const EA_NOEXCEPT { return const_reverse_iterator(mItBegin); } template inline bool deque::empty() const EA_NOEXCEPT { return mItBegin.mpCurrent == mItEnd.mpCurrent; } template typename deque::size_type inline deque::size() const EA_NOEXCEPT { return (size_type)(mItEnd - mItBegin); } template inline void deque::resize(size_type n, const value_type& value) { const size_type nSizeCurrent = size(); if(n > nSizeCurrent) // We expect that more often than not, resizes will be upsizes. insert(mItEnd, n - nSizeCurrent, value); else erase(mItBegin + (difference_type)n, mItEnd); } template inline void deque::resize(size_type n) { resize(n, value_type()); } template inline void deque::shrink_to_fit() { this_type x(eastl::make_move_iterator(begin()), eastl::make_move_iterator(end())); swap(x); } template inline void deque::set_capacity(size_type n) { // Currently there isn't a way to remove all allocations from a deque, as it // requires a single starting allocation for the subarrays. So we can't just // free all memory without leaving it in a bad state. So the best means of // implementing set_capacity() is to do what we do below. if(n == 0) { this_type temp(mAllocator); DoSwap(temp); } else if(n < size()) { // We currently ignore the request to reduce capacity. To do: Implement this // and do it in a way that doesn't result in temporarily ~doubling our memory usage. // That might involve trimming unused subarrays from the front or back of // the container. resize(n); } } template typename deque::reference deque::operator[](size_type n) { #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED if (EASTL_UNLIKELY(n >= (size_type)(mItEnd - mItBegin))) EASTL_FAIL_MSG("deque::operator[] -- out of range"); #elif EASTL_ASSERT_ENABLED // We allow taking a reference to deque[0] if (EASTL_UNLIKELY((n != 0) && n >= (size_type)(mItEnd - mItBegin))) EASTL_FAIL_MSG("deque::operator[] -- out of range"); #endif // See DequeIterator::operator+=() for an explanation of the code below. iterator it(mItBegin); const difference_type subarrayPosition = (difference_type)((it.mpCurrent - it.mpBegin) + (difference_type)n); const difference_type subarrayIndex = (((16777216 + subarrayPosition) / (difference_type)kDequeSubarraySize)) - (16777216 / (difference_type)kDequeSubarraySize); return *(*(it.mpCurrentArrayPtr + subarrayIndex) + (subarrayPosition - (subarrayIndex * (difference_type)kDequeSubarraySize))); } template typename deque::const_reference deque::operator[](size_type n) const { #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED if (EASTL_UNLIKELY(n >= (size_type)(mItEnd - mItBegin))) EASTL_FAIL_MSG("deque::operator[] -- out of range"); #elif EASTL_ASSERT_ENABLED // We allow the user to use a reference to deque[0] of an empty container. if (EASTL_UNLIKELY((n != 0) && n >= (size_type)(mItEnd - mItBegin))) EASTL_FAIL_MSG("deque::operator[] -- out of range"); #endif // See DequeIterator::operator+=() for an explanation of the code below. iterator it(mItBegin); const difference_type subarrayPosition = (it.mpCurrent - it.mpBegin) + (difference_type)n; const difference_type subarrayIndex = (((16777216 + subarrayPosition) / (difference_type)kDequeSubarraySize)) - (16777216 / (difference_type)kDequeSubarraySize); return *(*(it.mpCurrentArrayPtr + subarrayIndex) + (subarrayPosition - (subarrayIndex * (difference_type)kDequeSubarraySize))); } template typename deque::reference deque::at(size_type n) { #if EASTL_EXCEPTIONS_ENABLED if(n >= (size_type)(mItEnd - mItBegin)) throw std::out_of_range("deque::at -- out of range"); #elif EASTL_ASSERT_ENABLED if(n >= (size_type)(mItEnd - mItBegin)) EASTL_FAIL_MSG("deque::at -- out of range"); #endif return *(mItBegin.operator+((difference_type)n)); } template typename deque::const_reference deque::at(size_type n) const { #if EASTL_EXCEPTIONS_ENABLED if(n >= (size_type)(mItEnd - mItBegin)) throw std::out_of_range("deque::at -- out of range"); #elif EASTL_ASSERT_ENABLED if(n >= (size_type)(mItEnd - mItBegin)) EASTL_FAIL_MSG("deque::at -- out of range"); #endif return *(mItBegin.operator+((difference_type)n)); } template typename deque::reference deque::front() { #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED if (EASTL_UNLIKELY((size_type)(mItEnd == mItBegin))) EASTL_FAIL_MSG("deque::front -- empty deque"); #else // We allow the user to reference an empty container. #endif return *mItBegin; } template typename deque::const_reference deque::front() const { #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED if (EASTL_UNLIKELY((size_type)(mItEnd == mItBegin))) EASTL_FAIL_MSG("deque::front -- empty deque"); #else // We allow the user to reference an empty container. #endif return *mItBegin; } template typename deque::reference deque::back() { #if EASTL_ASSERT_ENABLED // Decrementing an iterator with an empty container will result in undefined behaviour. // specifically: the iterator decrement will apply pointer arithmetic to a nullptr (depending on the situation either mpCurrentArrayPtr or mpBegin). if (EASTL_UNLIKELY((size_type)(mItEnd == mItBegin))) EASTL_FAIL_MSG("deque::back -- empty deque"); #endif return *iterator(mItEnd, typename iterator::Decrement()); } template typename deque::const_reference deque::back() const { #if EASTL_ASSERT_ENABLED // Decrementing an iterator with an empty container will result in undefined behaviour. // specifically: the iterator decrement will apply pointer arithmetic to a nullptr (depending on the situation either mpCurrentArrayPtr or mpBegin). if (EASTL_UNLIKELY((size_type)(mItEnd == mItBegin))) EASTL_FAIL_MSG("deque::back -- empty deque"); #endif return *iterator(mItEnd, typename iterator::Decrement()); } template void deque::push_front(const value_type& value) { emplace_front(value); } template void deque::push_front(value_type&& value) { emplace_front(eastl::move(value)); } template typename deque::reference deque::push_front() { emplace_front(value_type()); return *mItBegin; // Same as return front(); } template void deque::push_back(const value_type& value) { emplace_back(value); } template void deque::push_back(value_type&& value) { emplace_back(eastl::move(value)); } template typename deque::reference deque::push_back() { emplace_back(value_type()); return *iterator(mItEnd, typename iterator::Decrement()); // Same thing as return back(); } template void deque::pop_front() { #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY((size_type)(mItEnd == mItBegin))) EASTL_FAIL_MSG("deque::pop_front -- empty deque"); #endif if((mItBegin.mpCurrent + 1) != mItBegin.mpEnd) // If the operation is very simple... (mItBegin.mpCurrent++)->~value_type(); else { // This is executed only when we are popping the end (last) item off the front-most subarray. // In this case we need to free the subarray and point mItBegin to the next subarray. #ifdef EA_DEBUG value_type** pp = mItBegin.mpCurrentArrayPtr; #endif mItBegin.mpCurrent->~value_type(); // mpCurrent == mpEnd - 1 DoFreeSubarray(mItBegin.mpBegin); mItBegin.SetSubarray(mItBegin.mpCurrentArrayPtr + 1); mItBegin.mpCurrent = mItBegin.mpBegin; #ifdef EA_DEBUG *pp = NULL; #endif } } template void deque::pop_back() { #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY((size_type)(mItEnd == mItBegin))) EASTL_FAIL_MSG("deque::pop_back -- empty deque"); #endif if(mItEnd.mpCurrent != mItEnd.mpBegin) // If the operation is very simple... (--mItEnd.mpCurrent)->~value_type(); else { // This is executed only when we are popping the first item off the last subarray. // In this case we need to free the subarray and point mItEnd to the previous subarray. #ifdef EA_DEBUG value_type** pp = mItEnd.mpCurrentArrayPtr; #endif DoFreeSubarray(mItEnd.mpBegin); mItEnd.SetSubarray(mItEnd.mpCurrentArrayPtr - 1); mItEnd.mpCurrent = mItEnd.mpEnd - 1; // Recall that mItEnd points to one-past the last item in the container. mItEnd.mpCurrent->~value_type(); // Thus we need to call the destructor on the item *before* that last item. #ifdef EA_DEBUG *pp = NULL; #endif } } template template typename deque::iterator deque::emplace(const_iterator position, Args&&... args) { if(EASTL_UNLIKELY(position.mpCurrent == mItEnd.mpCurrent)) // If we are doing the same thing as push_back... { emplace_back(eastl::forward(args)...); return iterator(mItEnd, typename iterator::Decrement()); // Unfortunately, we need to make an iterator here, as the above push_back is an operation that can invalidate existing iterators. } else if(EASTL_UNLIKELY(position.mpCurrent == mItBegin.mpCurrent)) // If we are doing the same thing as push_front... { emplace_front(eastl::forward(args)...); return mItBegin; } iterator itPosition(position, typename iterator::FromConst()); value_type valueSaved(eastl::forward(args)...); // We need to save this because value may come from within our container. It would be somewhat tedious to make a workaround that could avoid this. const difference_type i(itPosition - mItBegin); #if EASTL_ASSERT_ENABLED EASTL_ASSERT(!empty()); // The push_front and push_back calls below assume that we are non-empty. It turns out this is never called unless so. if(EASTL_UNLIKELY(!(validate_iterator(itPosition) & isf_valid))) EASTL_FAIL_MSG("deque::emplace -- invalid iterator"); #endif if(i < (difference_type)(size() / 2)) // Should we insert at the front or at the back? We divide the range in half. { emplace_front(eastl::move(*mItBegin)); // This operation potentially invalidates all existing iterators and so we need to assign them anew relative to mItBegin below. itPosition = mItBegin + i; const iterator newPosition (itPosition, typename iterator::Increment()); iterator oldBegin (mItBegin, typename iterator::Increment()); const iterator oldBeginPlus1(oldBegin, typename iterator::Increment()); oldBegin.move(oldBeginPlus1, newPosition, eastl::is_trivially_copyable()); } else { emplace_back(eastl::move(*iterator(mItEnd, typename iterator::Decrement()))); itPosition = mItBegin + i; iterator oldBack (mItEnd, typename iterator::Decrement()); const iterator oldBackMinus1(oldBack, typename iterator::Decrement()); oldBack.move_backward(itPosition, oldBackMinus1, eastl::is_trivially_copyable()); } *itPosition = eastl::move(valueSaved); return itPosition; } template template typename deque::reference deque::emplace_front(Args&&... args) { if(mItBegin.mpCurrent != mItBegin.mpBegin) // If we have room in the first subarray... we hope that usually this 'new' pathway gets executed, as it is slightly faster. detail::allocator_construct(mAllocator, --mItBegin.mpCurrent, eastl::forward(args)...); else { // To consider: Detect if value isn't coming from within this container and handle that efficiently. value_type valueSaved(eastl::forward(args)...); // We need to make a temporary, because args may be a value_type that comes from within our container and the operations below may change the container. But we can use move instead of copy. if(mItBegin.mpCurrentArrayPtr == mpPtrArray) // If there are no more pointers in front of the current (first) one... DoReallocPtrArray(1, kSideFront); mItBegin.mpCurrentArrayPtr[-1] = DoAllocateSubarray(); #if EASTL_EXCEPTIONS_ENABLED try { #endif mItBegin.SetSubarray(mItBegin.mpCurrentArrayPtr - 1); mItBegin.mpCurrent = mItBegin.mpEnd - 1; detail::allocator_construct(mAllocator, mItBegin.mpCurrent, eastl::move(valueSaved)); #if EASTL_EXCEPTIONS_ENABLED } catch(...) { ++mItBegin; // The exception could only occur in the new operation above, after we have incremented mItBegin. So we need to undo it. DoFreeSubarray(mItBegin.mpCurrentArrayPtr[-1]); throw; } #endif } return *mItBegin; // Same as return front(); } template template typename deque::reference deque::emplace_back(Args&&... args) { if ((mItEnd.mpCurrent + 1) != mItEnd.mpEnd) // If we have room in the last subarray... we hope that usually this 'new' pathway gets executed, as it is slightly faster. { reference back = *mItEnd.mpCurrent; detail::allocator_construct(mAllocator, mItEnd.mpCurrent++, eastl::forward(args)...); return back; } else { // To consider: Detect if value isn't coming from within this container and handle that efficiently. value_type valueSaved(eastl::forward(args)...); // We need to make a temporary, because args may be a value_type that comes from within our container and the operations below may change the container. But we can use move instead of copy. if(((mItEnd.mpCurrentArrayPtr - mpPtrArray) + 1) >= (difference_type)mnPtrArraySize) // If there are no more pointers after the current (last) one. DoReallocPtrArray(1, kSideBack); mItEnd.mpCurrentArrayPtr[1] = DoAllocateSubarray(); #if EASTL_EXCEPTIONS_ENABLED try { #endif detail::allocator_construct(mAllocator, mItEnd.mpCurrent, eastl::move(valueSaved)); mItEnd.SetSubarray(mItEnd.mpCurrentArrayPtr + 1); mItEnd.mpCurrent = mItEnd.mpBegin; #if EASTL_EXCEPTIONS_ENABLED } catch(...) { // No need to execute '--mItEnd', as the exception could only occur in the new operation above before we set mItEnd. DoFreeSubarray(mItEnd.mpCurrentArrayPtr[1]); throw; } #endif return *iterator(mItEnd, typename iterator::Decrement()); // Same as return back(); } } template typename deque::iterator deque::insert(const_iterator position, const value_type& value) { return emplace(position, value); } template typename deque::iterator deque::insert(const_iterator position, value_type&& value) { return emplace(position, eastl::move(value)); } template typename deque::iterator deque::insert(const_iterator position, size_type n, const value_type& value) { return DoInsertValues(position, n, value); } template template typename deque::iterator deque::insert(const_iterator position, InputIterator first, InputIterator last) { return DoInsert(position, first, last, is_integral()); // The C++ standard requires this sort of behaviour, as InputIterator might actually be Integer and 'first' is really 'count' and 'last' is really 'value'. } template typename deque::iterator deque::insert(const_iterator position, std::initializer_list ilist) { const difference_type i(position - mItBegin); DoInsert(position, ilist.begin(), ilist.end(), false_type()); return (mItBegin + i); } template typename deque::iterator deque::erase(const_iterator position) { #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(!(validate_iterator(position) & isf_valid))) EASTL_FAIL_MSG("deque::erase -- invalid iterator"); if(EASTL_UNLIKELY(position == end())) EASTL_FAIL_MSG("deque::erase -- end() iterator is an invalid iterator for erase"); #endif iterator itPosition(position, typename iterator::FromConst()); iterator itNext(itPosition, typename iterator::Increment()); const difference_type i(itPosition - mItBegin); if(i < (difference_type)(size() / 2)) // Should we move the front entries forward or the back entries backward? We divide the range in half. { itNext.move_backward(mItBegin, itPosition, eastl::is_trivially_copyable()); pop_front(); } else { itPosition.move(itNext, mItEnd, eastl::is_trivially_copyable()); pop_back(); } return mItBegin + i; } template typename deque::iterator deque::erase(const_iterator first, const_iterator last) { iterator itFirst(first, typename iterator::FromConst()); iterator itLast(last, typename iterator::FromConst()); #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(!(validate_iterator(itFirst) & isf_valid))) EASTL_FAIL_MSG("deque::erase -- invalid iterator"); if(EASTL_UNLIKELY(!(validate_iterator(itLast) & isf_valid))) EASTL_FAIL_MSG("deque::erase -- invalid iterator"); #endif if((itFirst != mItBegin) || (itLast != mItEnd)) // If not erasing everything... (We expect that the user won't call erase(begin, end) because instead the user would just call clear.) { const difference_type n(itLast - itFirst); const difference_type i(itFirst - mItBegin); if(i < (difference_type)((size() - n) / 2)) // Should we move the front entries forward or the back entries backward? We divide the range in half. { const iterator itNewBegin(mItBegin + n); value_type** const pPtrArrayBegin = mItBegin.mpCurrentArrayPtr; itLast.move_backward(mItBegin, itFirst, eastl::is_trivially_copyable()); for(; mItBegin != itNewBegin; ++mItBegin) // Question: If value_type is a POD type, will the compiler generate this loop at all? mItBegin.mpCurrent->~value_type(); // If so, then we need to make a specialization for destructing PODs. DoFreeSubarrays(pPtrArrayBegin, itNewBegin.mpCurrentArrayPtr); // mItBegin = itNewBegin; <-- Not necessary, as the above loop makes it so already. } else // Else we will be moving back entries backward. { iterator itNewEnd(mItEnd - n); value_type** const pPtrArrayEnd = itNewEnd.mpCurrentArrayPtr + 1; itFirst.move(itLast, mItEnd, eastl::is_trivially_copyable()); for(iterator itTemp(itNewEnd); itTemp != mItEnd; ++itTemp) itTemp.mpCurrent->~value_type(); DoFreeSubarrays(pPtrArrayEnd, mItEnd.mpCurrentArrayPtr + 1); mItEnd = itNewEnd; } return mItBegin + i; } clear(); return mItEnd; } template typename deque::reverse_iterator deque::erase(reverse_iterator position) { return reverse_iterator(erase((++position).base())); } template typename deque::reverse_iterator deque::erase(reverse_iterator first, reverse_iterator last) { // Version which erases in order from first to last. // difference_type i(first.base() - last.base()); // while(i--) // first = erase(first); // return first; // Version which erases in order from last to first, but is slightly more efficient: return reverse_iterator(erase(last.base(), first.base())); } template void deque::clear() { // Destroy all values and all subarrays they belong to, except for the first one, // as we need to reserve some space for a valid mItBegin/mItEnd. if(mItBegin.mpCurrentArrayPtr != mItEnd.mpCurrentArrayPtr) // If there are multiple subarrays (more often than not, this will be so)... { for(value_type* p1 = mItBegin.mpCurrent; p1 < mItBegin.mpEnd; ++p1) p1->~value_type(); for(value_type* p2 = mItEnd.mpBegin; p2 < mItEnd.mpCurrent; ++p2) p2->~value_type(); DoFreeSubarray(mItEnd.mpBegin); // Leave mItBegin with a valid subarray. } else { for(value_type* p = mItBegin.mpCurrent; p < mItEnd.mpCurrent; ++p) p->~value_type(); // Don't free the one existing subarray, as we need it for mItBegin/mItEnd. } for(value_type** pPtrArray = mItBegin.mpCurrentArrayPtr + 1; pPtrArray < mItEnd.mpCurrentArrayPtr; ++pPtrArray) { for(value_type* p = *pPtrArray, *pEnd = *pPtrArray + kDequeSubarraySize; p < pEnd; ++p) p->~value_type(); DoFreeSubarray(*pPtrArray); } mItEnd = mItBegin; // mItBegin/mItEnd will not be dereferencable. } //template //void deque::reset_lose_memory() //{ // // The reset_lose_memory function is a special extension function which unilaterally // // resets the container to an empty state without freeing the memory of // // the contained objects. This is useful for very quickly tearing down a // // container built into scratch memory. // // // Currently we are unable to get this reset_lose_memory operation to work correctly // // as we haven't been able to find a good way to have a deque initialize // // without allocating memory. We can lose the old memory, but DoInit // // would necessarily do a ptrArray allocation. And this is not within // // our definition of how reset_lose_memory works. // base_type::DoInit(0); // //} template void deque::swap(deque& x) { #if defined(EASTL_DEQUE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR) && EASTL_DEQUE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR if(mAllocator == x.mAllocator) // If allocators are equivalent... DoSwap(x); else // else swap the contents. { const this_type temp(*this); // Can't call eastl::swap because that would *this = x; // itself call this member swap function. x = temp; } #else // NOTE(rparolin): The previous implementation required T to be copy-constructible in the fall-back case where // allocators with unique instances copied elements. This was an unnecessary restriction and prevented the common // usage of deque with non-copyable types (eg. eastl::deque or eastl::deque). // // The previous implementation violated the following requirements of deque::swap so the fall-back code has // been removed. EASTL implicitly defines 'propagate_on_container_swap = true' therefore the fall-back case is // not required. We simply swap the contents and the allocator as that is the common expectation of // users and does not put the container into an invalid state since it can not free its memory via its current // allocator instance. // DoSwap(x); #endif } template template void deque::DoInit(Integer n, Integer value, true_type) { base_type::DoInit(n); // Call the base uninitialized init function. DoFillInit(value); } template template void deque::DoInit(InputIterator first, InputIterator last, false_type) { typedef typename eastl::iterator_traits::iterator_category IC; DoInitFromIterator(first, last, IC()); } template template void deque::DoInitFromIterator(InputIterator first, InputIterator last, eastl::input_iterator_tag) { base_type::DoInit(0); // Call the base uninitialized init function, but don't actually allocate any values. #if EASTL_EXCEPTIONS_ENABLED try { #endif // We have little choice but to iterate through the source iterator and call // push_back for each item. It can be slow because it will keep reallocating the // container memory as we go (every kDequeSubarraySize elements). We are not allowed to use distance() on an InputIterator. for(; first != last; ++first) // InputIterators by definition actually only allow you to iterate through them once. { // Thus the standard *requires* that we do this (inefficient) implementation. push_back(*first); // Luckily, InputIterators are in practice almost never used, so this code will likely never get executed. } #if EASTL_EXCEPTIONS_ENABLED } catch(...) { clear(); throw; } #endif } template template void deque::DoInitFromIterator(ForwardIterator first, ForwardIterator last, eastl::forward_iterator_tag) { typedef typename eastl::remove_const::type non_const_iterator_type; // If T is a const type (e.g. const int) then we need to initialize it as if it were non-const. typedef typename eastl::remove_const::type non_const_value_type; const size_type n = (size_type)eastl::distance(first, last); value_type** pPtrArrayCurrent; base_type::DoInit(n); // Call the base uninitialized init function. #if EASTL_EXCEPTIONS_ENABLED try { #endif for(pPtrArrayCurrent = mItBegin.mpCurrentArrayPtr; pPtrArrayCurrent < mItEnd.mpCurrentArrayPtr; ++pPtrArrayCurrent) // Copy to the known-to-be-completely-used subarrays. { // We implment an algorithm here whereby we use uninitialized_copy() and advance() instead of just iterating from first to last and constructing as we go. // The reason for this is that we can take advantage of trivially copyable data types and implement construction as memcpy operations. ForwardIterator current(first); // To do: Implement a specialization of this algorithm for non-trivially copyable types which eliminates the need for 'current'. eastl::advance(current, kDequeSubarraySize); eastl::uninitialized_copy((non_const_iterator_type)first, (non_const_iterator_type)current, (non_const_value_type*)*pPtrArrayCurrent); first = current; } eastl::uninitialized_copy((non_const_iterator_type)first, (non_const_iterator_type)last, (non_const_value_type*)mItEnd.mpBegin); #if EASTL_EXCEPTIONS_ENABLED } catch(...) { for(iterator itCurrent(mItBegin), itEnd(pPtrArrayCurrent, *pPtrArrayCurrent); itCurrent != itEnd; ++itCurrent) itCurrent.mpCurrent->~value_type(); throw; } #endif } template void deque::DoFillInit(const value_type& value) { value_type** pPtrArrayCurrent = mItBegin.mpCurrentArrayPtr; #if EASTL_EXCEPTIONS_ENABLED try { #endif while(pPtrArrayCurrent < mItEnd.mpCurrentArrayPtr) { eastl::uninitialized_fill(*pPtrArrayCurrent, *pPtrArrayCurrent + kDequeSubarraySize, value); ++pPtrArrayCurrent; } eastl::uninitialized_fill(mItEnd.mpBegin, mItEnd.mpCurrent, value); #if EASTL_EXCEPTIONS_ENABLED } catch(...) { for(iterator itCurrent(mItBegin), itEnd(pPtrArrayCurrent, *pPtrArrayCurrent); itCurrent != itEnd; ++itCurrent) itCurrent.mpCurrent->~value_type(); throw; } #endif } template template void deque::DoAssign(Integer n, Integer value, true_type) // false_type means this is the integer version instead of iterator version. { DoAssignValues(static_cast(n), static_cast(value)); } template template void deque::DoAssign(InputIterator first, InputIterator last, false_type) // false_type means this is the iterator version instead of integer version. { // Actually, the implementation below requires first/last to be a ForwardIterator and not just an InputIterator. // But Paul Pedriana if you somehow need to work with an InputIterator and we can deal with it. const size_type n = (size_type)eastl::distance(first, last); const size_type nSize = size(); if(n > nSize) // If we are increasing the size... { InputIterator atEnd(first); eastl::advance(atEnd, (difference_type)nSize); eastl::copy(first, atEnd, mItBegin); insert(mItEnd, atEnd, last); } else // n is <= size. { iterator itEnd(eastl::copy(first, last, mItBegin)); if(n < nSize) // If we need to erase any trailing elements... erase(itEnd, mItEnd); } } template void deque::DoAssignValues(size_type n, const value_type& value) { const size_type nSize = size(); if(n > nSize) // If we are increasing the size... { eastl::fill(mItBegin, mItEnd, value); insert(mItEnd, n - nSize, value); } else { erase(mItBegin + (difference_type)n, mItEnd); eastl::fill(mItBegin, mItEnd, value); } } template template typename deque::iterator deque::DoInsert(const const_iterator& position, Integer n, Integer value, true_type) { return DoInsertValues(position, (size_type)n, (value_type)value); } template template typename deque::iterator deque::DoInsert(const const_iterator& position, const InputIterator& first, const InputIterator& last, false_type) { typedef typename eastl::iterator_traits::iterator_category IC; return DoInsertFromIterator(position, first, last, IC()); } template template typename deque::iterator deque::DoInsertFromIterator(const_iterator position, const InputIterator& first, const InputIterator& last, eastl::input_iterator_tag) { const difference_type index = eastl::distance(cbegin(), position); #if EASTL_EXCEPTIONS_ENABLED try { #endif // We have little choice but to iterate through the source iterator and call // insert for each item. It can be slow because it will keep reallocating the // container memory as we go (every kDequeSubarraySize elements). We are not // allowed to use distance() on an InputIterator. InputIterators by definition // actually only allow you to iterate through them once. Thus the standard // *requires* that we do this (inefficient) implementation. Luckily, // InputIterators are in practice almost never used, so this code will likely // never get executed. for (InputIterator iter = first; iter != last; ++iter) { position = insert(position, *iter) + 1; } #if EASTL_EXCEPTIONS_ENABLED } catch (...) { erase(cbegin() + index, position); throw; } #endif return begin() + index; } template template typename deque::iterator deque::DoInsertFromIterator(const_iterator position, const ForwardIterator& first, const ForwardIterator& last, eastl::forward_iterator_tag) { const size_type n = (size_type)eastl::distance(first, last); // This implementation is nearly identical to DoInsertValues below. // If you make a bug fix to one, you will likely want to fix the other. if(position.mpCurrent == mItBegin.mpCurrent) // If inserting at the beginning or into an empty container... { iterator itNewBegin(DoReallocSubarray(n, kSideFront)); // itNewBegin to mItBegin refers to memory that isn't initialized yet; so it's not truly a valid iterator. Or at least not a dereferencable one. #if EASTL_EXCEPTIONS_ENABLED try { #endif // We would like to use move here instead of copy when possible, which would be useful for // when inserting from a std::initializer_list, for example. // To do: solve this by having a template or runtime parameter which specifies move vs copy. eastl::uninitialized_copy(first, last, itNewBegin); mItBegin = itNewBegin; #if EASTL_EXCEPTIONS_ENABLED } catch(...) { DoFreeSubarrays(itNewBegin.mpCurrentArrayPtr, mItBegin.mpCurrentArrayPtr); throw; } #endif return mItBegin; } else if(EASTL_UNLIKELY(position.mpCurrent == mItEnd.mpCurrent)) // If inserting at the end (i.e. appending)... { const iterator itNewEnd(DoReallocSubarray(n, kSideBack)); // mItEnd to itNewEnd refers to memory that isn't initialized yet; so it's not truly a valid iterator. Or at least not a dereferencable one. const iterator itFirstInserted(mItEnd); #if EASTL_EXCEPTIONS_ENABLED try { #endif // We would like to use move here instead of copy when possible, which would be useful for // when inserting from a std::initializer_list, for example. // To do: solve this by having a template or runtime parameter which specifies move vs copy. eastl::uninitialized_copy(first, last, mItEnd); mItEnd = itNewEnd; #if EASTL_EXCEPTIONS_ENABLED } catch(...) { DoFreeSubarrays(mItEnd.mpCurrentArrayPtr + 1, itNewEnd.mpCurrentArrayPtr + 1); throw; } #endif return itFirstInserted; } else { const difference_type nInsertionIndex = position - mItBegin; const size_type nSize = size(); if(nInsertionIndex < (difference_type)(nSize / 2)) // If the insertion index is in the front half of the deque... grow the deque at the front. { const iterator itNewBegin(DoReallocSubarray(n, kSideFront)); // itNewBegin to mItBegin refers to memory that isn't initialized yet; so it's not truly a valid iterator. Or at least not a dereferencable one. const iterator itOldBegin(mItBegin); const iterator itPosition(mItBegin + nInsertionIndex); // We need to reset this value because the reallocation above can invalidate iterators. #if EASTL_EXCEPTIONS_ENABLED try { #endif // We have a problem here: we would like to use move instead of copy, but it may be that the range to be inserted comes from // this container and comes from the segment we need to move. So we can't use move operations unless we are careful to handle // that situation. The newly inserted contents must be contents that were moved to and not moved from. To do: solve this. if(nInsertionIndex >= (difference_type)n) // If the newly inserted items will be entirely within the old area... { iterator itUCopyEnd(mItBegin + (difference_type)n); eastl::uninitialized_copy(mItBegin, itUCopyEnd, itNewBegin); // This can throw. itUCopyEnd = eastl::copy(itUCopyEnd, itPosition, itOldBegin); // Recycle 'itUCopyEnd' to mean something else. eastl::copy(first, last, itUCopyEnd); } else // Else the newly inserted items are going within the newly allocated area at the front. { ForwardIterator mid(first); eastl::advance(mid, (difference_type)n - nInsertionIndex); eastl::uninitialized_copy_copy(mItBegin, itPosition, first, mid, itNewBegin); // This can throw. eastl::copy(mid, last, itOldBegin); } mItBegin = itNewBegin; #if EASTL_EXCEPTIONS_ENABLED } catch(...) { DoFreeSubarrays(itNewBegin.mpCurrentArrayPtr, mItBegin.mpCurrentArrayPtr); throw; } #endif } else { const iterator itNewEnd(DoReallocSubarray(n, kSideBack)); const iterator itOldEnd(mItEnd); const difference_type nPushedCount = (difference_type)nSize - nInsertionIndex; const iterator itPosition(mItEnd - nPushedCount); // We need to reset this value because the reallocation above can invalidate iterators. #if EASTL_EXCEPTIONS_ENABLED try { #endif // We have a problem here: we would like to use move instead of copy, but it may be that the range to be inserted comes from // this container and comes from the segment we need to move. So we can't use move operations unless we are careful to handle // that situation. The newly inserted contents must be contents that were moved to and not moved from. To do: solve this. if(nPushedCount > (difference_type)n) { const iterator itUCopyEnd(mItEnd - (difference_type)n); eastl::uninitialized_copy(itUCopyEnd, mItEnd, mItEnd); eastl::copy_backward(itPosition, itUCopyEnd, itOldEnd); eastl::copy(first, last, itPosition); } else { ForwardIterator mid(first); eastl::advance(mid, nPushedCount); eastl::uninitialized_copy_copy(mid, last, itPosition, mItEnd, mItEnd); eastl::copy(first, mid, itPosition); } mItEnd = itNewEnd; #if EASTL_EXCEPTIONS_ENABLED } catch(...) { DoFreeSubarrays(mItEnd.mpCurrentArrayPtr + 1, itNewEnd.mpCurrentArrayPtr + 1); throw; } #endif } return iterator(mItBegin + nInsertionIndex); } } template typename deque::iterator deque::DoInsertValues(const_iterator position, size_type n, const value_type& value) { #if EASTL_ASSERT_ENABLED if(EASTL_UNLIKELY(!(validate_iterator(position) & isf_valid))) EASTL_FAIL_MSG("deque::insert -- invalid iterator"); #endif // This implementation is nearly identical to DoInsertFromIterator above. // If you make a bug fix to one, you will likely want to fix the other. if(position.mpCurrent == mItBegin.mpCurrent) // If inserting at the beginning... { const iterator itNewBegin(DoReallocSubarray(n, kSideFront)); #if EASTL_EXCEPTIONS_ENABLED try { #endif // Note that we don't make a temp copy of 'value' here. This is because in a // deque, insertion at either the front or back doesn't cause a reallocation // or move of data in the middle. That's a key feature of deques, in fact. eastl::uninitialized_fill(itNewBegin, mItBegin, value); mItBegin = itNewBegin; #if EASTL_EXCEPTIONS_ENABLED } catch(...) { DoFreeSubarrays(itNewBegin.mpCurrentArrayPtr, mItBegin.mpCurrentArrayPtr); throw; } #endif return mItBegin; } else if(EASTL_UNLIKELY(position.mpCurrent == mItEnd.mpCurrent)) // If inserting at the end (i.e. appending)... { const iterator itNewEnd(DoReallocSubarray(n, kSideBack)); const iterator itFirstInserted(mItEnd); #if EASTL_EXCEPTIONS_ENABLED try { #endif // Note that we don't make a temp copy of 'value' here. This is because in a // deque, insertion at either the front or back doesn't cause a reallocation // or move of data in the middle. That's a key feature of deques, in fact. eastl::uninitialized_fill(mItEnd, itNewEnd, value); mItEnd = itNewEnd; #if EASTL_EXCEPTIONS_ENABLED } catch(...) { DoFreeSubarrays(mItEnd.mpCurrentArrayPtr + 1, itNewEnd.mpCurrentArrayPtr + 1); throw; } #endif return itFirstInserted; } else { // A key purpose of a deque is to implement insertions and removals more efficiently // than with a vector. We are inserting into the middle of the deque here. A quick and // dirty implementation of this would be to reallocate the subarrays and simply push // all values in the middle upward like you would do with a vector. Instead we implement // the minimum amount of reallocations needed but may need to do some value moving, // as the subarray sizes need to remain constant and can have no holes in them. const difference_type nInsertionIndex = position - mItBegin; const size_type nSize = size(); const value_type valueSaved(value); if(nInsertionIndex < (difference_type)(nSize / 2)) // If the insertion index is in the front half of the deque... grow the deque at the front. { const iterator itNewBegin(DoReallocSubarray(n, kSideFront)); const iterator itOldBegin(mItBegin); const iterator itPosition(mItBegin + nInsertionIndex); // We need to reset this value because the reallocation above can invalidate iterators. #if EASTL_EXCEPTIONS_ENABLED try { #endif if(nInsertionIndex >= (difference_type)n) // If the newly inserted items will be entirely within the old area... { iterator itUCopyEnd(mItBegin + (difference_type)n); eastl::uninitialized_move_if_noexcept(mItBegin, itUCopyEnd, itNewBegin); // This can throw. itUCopyEnd = eastl::move(itUCopyEnd, itPosition, itOldBegin); // Recycle 'itUCopyEnd' to mean something else. eastl::fill(itUCopyEnd, itPosition, valueSaved); } else // Else the newly inserted items are going within the newly allocated area at the front. { eastl::uninitialized_move_fill(mItBegin, itPosition, itNewBegin, mItBegin, valueSaved); // This can throw. eastl::fill(itOldBegin, itPosition, valueSaved); } mItBegin = itNewBegin; #if EASTL_EXCEPTIONS_ENABLED } catch(...) { DoFreeSubarrays(itNewBegin.mpCurrentArrayPtr, mItBegin.mpCurrentArrayPtr); throw; } #endif return iterator(mItBegin + nInsertionIndex); } else // Else the insertion index is in the back half of the deque, so grow the deque at the back. { const iterator itNewEnd(DoReallocSubarray(n, kSideBack)); const iterator itOldEnd(mItEnd); const difference_type nPushedCount = (difference_type)nSize - nInsertionIndex; const iterator itPosition(mItEnd - nPushedCount); // We need to reset this value because the reallocation above can invalidate iterators. #if EASTL_EXCEPTIONS_ENABLED try { #endif if(nPushedCount > (difference_type)n) // If the newly inserted items will be entirely within the old area... { iterator itUCopyEnd(mItEnd - (difference_type)n); eastl::uninitialized_move_if_noexcept(itUCopyEnd, mItEnd, mItEnd); // This can throw. itUCopyEnd = eastl::move_backward(itPosition, itUCopyEnd, itOldEnd); // Recycle 'itUCopyEnd' to mean something else. eastl::fill(itPosition, itUCopyEnd, valueSaved); } else // Else the newly inserted items are going within the newly allocated area at the back. { eastl::uninitialized_fill_move(mItEnd, itPosition + (difference_type)n, valueSaved, itPosition, mItEnd); // This can throw. eastl::fill(itPosition, itOldEnd, valueSaved); } mItEnd = itNewEnd; #if EASTL_EXCEPTIONS_ENABLED } catch(...) { DoFreeSubarrays(mItEnd.mpCurrentArrayPtr + 1, itNewEnd.mpCurrentArrayPtr + 1); throw; } #endif return iterator(mItBegin + nInsertionIndex); } } } template inline void deque::DoSwap(this_type& x) { eastl::swap(mpPtrArray, x.mpPtrArray); eastl::swap(mnPtrArraySize, x.mnPtrArraySize); eastl::swap(mItBegin, x.mItBegin); eastl::swap(mItEnd, x.mItEnd); eastl::swap(mAllocator, x.mAllocator); // We do this even if EASTL_ALLOCATOR_COPY_ENABLED is 0. } template inline bool deque::validate() const { // To do: More detailed validation. // To do: Try to make the validation resistant to crashes if the data is invalid. if((end() - begin()) < 0) return false; return true; } template inline int deque::validate_iterator(const_iterator i) const { // To do: We don't currently track isf_current, will need to make it do so. // To do: Fix the validation below, as it will not catch all invalid iterators. if((i - begin()) < 0) return isf_none; if((end() - i) < 0) return isf_none; if(i == end()) return (isf_valid | isf_current); return (isf_valid | isf_current | isf_can_dereference); } /////////////////////////////////////////////////////////////////////// // global operators /////////////////////////////////////////////////////////////////////// template inline bool operator==(const deque& a, const deque& b) { return ((a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin())); } #if defined(EA_COMPILER_HAS_THREE_WAY_COMPARISON) template inline synth_three_way_result operator<=>(const deque& a, const deque& b) { return eastl::lexicographical_compare_three_way(a.begin(), a.end(), b.begin(), b.end(), synth_three_way{}); } #else template inline bool operator!=(const deque& a, const deque& b) { return ((a.size() != b.size()) || !eastl::equal(a.begin(), a.end(), b.begin())); } template inline bool operator<(const deque& a, const deque& b) { return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); } template inline bool operator>(const deque& a, const deque& b) { return b < a; } template inline bool operator<=(const deque& a, const deque& b) { return !(b < a); } template inline bool operator>=(const deque& a, const deque& b) { return !(a < b); } #endif template inline void swap(deque& a, deque& b) { a.swap(b); } /////////////////////////////////////////////////////////////////////// // erase / erase_if // // https://en.cppreference.com/w/cpp/container/deque/erase2 /////////////////////////////////////////////////////////////////////// template typename deque::size_type erase(deque& c, const U& value) { // Erases all elements that compare equal to value from the container. auto origEnd = c.end(); auto newEnd = eastl::remove(c.begin(), origEnd, value); auto numRemoved = eastl::distance(newEnd, origEnd); c.erase(newEnd, origEnd); // Note: This is technically a lossy conversion when size_type // is 32bits and ptrdiff_t is 64bits (could happen on 64bit // systems when EASTL_SIZE_T_32BIT is set). In practice this // is fine because if EASTL_SIZE_T_32BIT is set then the deque // should not have more elements than fit in a uint32_t and so // the distance here should fit in a size_type. return static_cast::size_type>(numRemoved); } template typename deque::size_type erase_if(deque& c, Predicate predicate) { // Erases all elements that satisfy the predicate pred from the container. auto origEnd = c.end(); auto newEnd = eastl::remove_if(c.begin(), origEnd, predicate); auto numRemoved = eastl::distance(newEnd, origEnd); c.erase(newEnd, origEnd); // Note: This is technically a lossy conversion when size_type // is 32bits and ptrdiff_t is 64bits (could happen on 64bit // systems when EASTL_SIZE_T_32BIT is set). In practice this // is fine because if EASTL_SIZE_T_32BIT is set then the deque // should not have more elements than fit in a uint32_t and so // the distance here should fit in a size_type. return static_cast::size_type>(numRemoved); } /////////////////////////////////////////////////////////////////////// // erase_unsorted // // This serves a similar purpose as erase above but with the difference // that it doesn't preserve the relative order of what is left in the // deque. // // Effects: Removes all elements equal to value from the deque while // optimizing for speed with the potential reordering of elements as a // side effect. // // Complexity: Linear // /////////////////////////////////////////////////////////////////////// template typename deque::size_type erase_unsorted(deque& c, const U& value) { auto itRemove = c.begin(); auto ritMove = c.rbegin(); while(true) { itRemove = eastl::find(itRemove, ritMove.base(), value); if (itRemove == ritMove.base()) // any elements to remove? break; ritMove = eastl::find_if(ritMove, eastl::make_reverse_iterator(itRemove), [&value](const T& elem) { return elem != value; }); if (itRemove == ritMove.base()) // any elements that can be moved into place? break; *itRemove = eastl::move(*ritMove); ++itRemove; ++ritMove; } // now all elements in the range [itRemove, c.end()) are either to be removed or have already been moved from. auto origEnd = end(c); auto numRemoved = distance(itRemove, origEnd); c.erase(itRemove, origEnd); // Note: This is technically a lossy conversion when size_type // is 32bits and ptrdiff_t is 64bits (could happen on 64bit // systems when EASTL_SIZE_T_32BIT is set). In practice this // is fine because if EASTL_SIZE_T_32BIT is set then the deque // should not have more elements than fit in a uint32_t and so // the distance here should fit in a size_type. return static_cast::size_type>(numRemoved); } /////////////////////////////////////////////////////////////////////// // erase_unsorted_if // // This serves a similar purpose as erase_if above but with the // difference that it doesn't preserve the relative order of what is // left in the deque. // // Effects: Removes all elements that return true for the predicate // while optimizing for speed with the potential reordering of elements // as a side effect. // // Complexity: Linear // /////////////////////////////////////////////////////////////////////// template typename deque::size_type erase_unsorted_if(deque& c, Predicate predicate) { // Erases all elements that satisfy predicate from the container. auto itRemove = c.begin(); auto ritMove = c.rbegin(); while(true) { itRemove = eastl::find_if(itRemove, ritMove.base(), predicate); if (itRemove == ritMove.base()) // any elements to remove? break; ritMove = eastl::find_if(ritMove, eastl::make_reverse_iterator(itRemove), not_fn(predicate)); if (itRemove == ritMove.base()) // any elements that can be moved into place? break; *itRemove = eastl::move(*ritMove); ++itRemove; ++ritMove; } // now all elements in the range [itRemove, c.end()) are either to be removed or have already been moved from. auto origEnd = end(c); auto numRemoved = distance(itRemove, origEnd); c.erase(itRemove, origEnd); // Note: This is technically a lossy conversion when size_type // is 32bits and ptrdiff_t is 64bits (could happen on 64bit // systems when EASTL_SIZE_T_32BIT is set). In practice this // is fine because if EASTL_SIZE_T_32BIT is set then the deque // should not have more elements than fit in a uint32_t and so // the distance here should fit in a size_type. return static_cast::size_type>(numRemoved); } } // namespace eastl EA_RESTORE_VC_WARNING(); #if EASTL_EXCEPTIONS_ENABLED EA_RESTORE_VC_WARNING(); #endif #endif // Header include guard ================================================ FILE: include/EASTL/expected.h ================================================ /////////////////////////////////////////////////////////////////////////////// // Copyright (c) Electronic Arts Inc. All rights reserved. /////////////////////////////////////////////////////////////////////////////// #pragma once // We use a few c++17 features in the implementation of eastl::expceted, so we only provide // it from c++17 onwards. #if EA_COMPILER_CPP17_ENABLED #include EA_DISABLE_VC_WARNING(4623) // warning C4623: default constructor was implicitly defined as deleted EA_DISABLE_VC_WARNING(4625) // warning C4625: copy constructor was implicitly defined as deleted EA_DISABLE_VC_WARNING(4510) // warning C4510: default constructor could not be generated #include #include #include #include #include #include // for std::initializer_list #if EASTL_EXCEPTIONS_ENABLED #include // for std::exception in bad_exception_access. #endif namespace eastl { template class expected; template class unexpected; // Some helper type traits: namespace internal { // TODO: move this somewhere else? It doesn't handle // templates with non-type template parameters so it isn't // really generic... template class Template> struct is_specialization : eastl::false_type { }; template