[
  {
    "path": ".gitignore",
    "content": ".clang-format\n.editorconfig\n\n#############################\n#Spirv\n#############################\n*.spv\n*.spva\n*.sass\n*.sassbin\n*.bat\n\n#############################\n#specific to the project\n#############################\ncmake_built\ncmake_build\nbuild\n_install\nbin_x64\nNVPRO_EXTERNAL\nnvpro_core"
  },
  {
    "path": "CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 3.5)\nget_filename_component(PROJNAME ${CMAKE_CURRENT_SOURCE_DIR} NAME)\nProject(${PROJNAME})\nMessage(STATUS \"-------------------------------\")\nMessage(STATUS \"Processing Project ${PROJNAME}:\")\n\n#####################################################################################\n# look for nvpro_core 1) as a sub-folder 2) at some other locations\n# this cannot be put anywhere else since we still didn't find setup.cmake yet\n#\nif(NOT BASE_DIRECTORY)\n\n  find_path(BASE_DIRECTORY\n    NAMES nvpro_core/cmake/setup.cmake\n    PATHS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/../.. \n    REQUIRED\n    DOC \"Directory containing nvpro_core\"\n    )\nendif()\nif(EXISTS ${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake)\n  include(${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake)\n  include(${BASE_DIRECTORY}/nvpro_core/cmake/utilities.cmake)\nelse()\n  message(FATAL_ERROR \"could not find base directory, please set BASE_DIRECTORY to folder containing nvpro_core\")\nendif()\n\n_add_project_definitions(${PROJNAME})\n\n#--------------------------------------------------------------------------------------------------\n# Resources\n#\ndownload_files(FILENAMES geforce.csf.gz)\n\n#####################################################################################\n# additions from packages needed for this sample\n# add refs  in LIBRARIES_OPTIMIZED\n# add refs  in LIBRARIES_DEBUG\n# add files in PACKAGE_SOURCE_FILES\n#\n_add_package_OpenGL()\n_add_package_ImGUI()\n_add_package_ZLIB()\n\nadd_definitions(-DCSF_SUPPORT_ZLIB=1)\n\n#####################################################################################\n# process the rest of some cmake code that needs to be done *after* the packages add\n_add_nvpro_core_lib()\n\n#####################################################################################\n# Source files for this project\n#\nfile(GLOB SOURCE_FILES *.cpp *.hpp *.inl *.h *.c)\nfile(GLOB GLSL_FILES *.glsl)\n\n\n#####################################################################################\n# Executable\n#\nif(WIN32)\n  add_definitions(-D_CRT_SECURE_NO_WARNINGS)\nendif()\n\nadd_executable(${PROJNAME} ${SOURCE_FILES} ${COMMON_SOURCE_FILES} ${PACKAGE_SOURCE_FILES} ${GLSL_FILES})\n\n#####################################################################################\n# common source code needed for this sample\n#\nsource_group(common FILES \n  ${COMMON_SOURCE_FILES}\n  ${PACKAGE_SOURCE_FILES}\n)\nsource_group(shaders FILES \n  ${GLSL_FILES}\n)\n\n#####################################################################################\n# Linkage\n#\ntarget_link_libraries(${PROJNAME} ${PLATFORM_LIBRARIES} nvpro_core)\n\nforeach(DEBUGLIB ${LIBRARIES_DEBUG})\n  target_link_libraries(${PROJNAME} debug ${DEBUGLIB})\nendforeach(DEBUGLIB)\n\nforeach(RELEASELIB ${LIBRARIES_OPTIMIZED})\n  target_link_libraries(${PROJNAME} optimized ${RELEASELIB})\nendforeach(RELEASELIB)\n\n#####################################################################################\n# copies binaries that need to be put next to the exe files (ZLib, etc.)\n#\n_finalize_target( ${PROJNAME} )\nLIST(APPEND GLSL_FILES \"common.h\")\ninstall(FILES ${GLSL_FILES} CONFIGURATIONS Release DESTINATION \"bin_${ARCH}/GLSL_${PROJNAME}\")\ninstall(FILES ${GLSL_FILES} CONFIGURATIONS Debug DESTINATION \"bin_${ARCH}_debug/GLSL_${PROJNAME}\")\n"
  },
  {
    "path": "CONTRIBUTING",
    "content": "https://developercertificate.org/\n\nDeveloper Certificate of Origin\nVersion 1.1\n\nCopyright (C) 2004, 2006 The Linux Foundation and its contributors.\n\nEveryone is permitted to copy and distribute verbatim copies of this\nlicense document, but changing it is not allowed.\n\n\nDeveloper's Certificate of Origin 1.1\n\nBy making a contribution to this project, I certify that:\n\n(a) The contribution was created in whole or in part by me and I\n    have the right to submit it under the open source license\n    indicated in the file; or\n\n(b) The contribution is based upon previous work that, to the best\n    of my knowledge, is covered under an appropriate open source\n    license and I have the right under that license to submit that\n    work with modifications, whether created in whole or in part\n    by me, under the same open source license (unless I am\n    permitted to submit under a different license), as indicated\n    in the file; or\n\n(c) The contribution was provided directly to me by some other\n    person who certified (a), (b) or (c) and I have not modified\n    it.\n\n(d) I understand and agree that this project and the contribution\n    are public and that a record of the contribution (including all\n    personal information I submit with it, including my sign-off) is\n    maintained indefinitely and may be redistributed consistent with\n    this project or the open source license(s) involved."
  },
  {
    "path": "LICENSE",
    "content": "\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS"
  },
  {
    "path": "README.md",
    "content": "# gl cadscene render techniques\n\nThis sample implements several scene rendering techniques that target mostly static data, such as often found in CAD or DCC applications. In this context, 'static' means that the vertex and index buffers for the scene's objects rarely change. This can include editing the geometry of a few scene objects, but the matrix and material values are the properties that are modified the most across frames. Imagine making edits to the wheel topology of a car, or positioning an engine; the rest of the assembly remains the same.\n\nThe principal OpenGL mechanisms that are used here are described in the [SIGGRAPH 2014 presentation slides](http://on-demand.gputechconf.com/siggraph/2014/presentation/SG4117-OpenGL-Scene-Rendering-Techniques.pdf). It is highly recommended to go through the slides first.\n\nThe sample makes use of multiple OpenGL 4 core features, such as **ARB_multi_draw_indirect**, but also showcases OpenGL 3 style rendering techniques.\n\nThere are also several techniques built around the **NV_command_list** extension. Please refer to [gl commandlist basic](https://github.com/nvpro-samples/gl_commandlist_basic) for an introduction to NV_command_list.\n\n> Note: This is just a sample to illustrate several techniques and possibilities for how to approach rendering. Its purpose is not to provide production-level, highly optimized implementations.\n\n### Scene Setup\n\nThe sample loads a cadscene file (csf). This file format is inspired by CAD applications' data organization, but (for simplicity) everything is stored in a single RAW file.\n\nThe scene is organized into:\n\n * Matrices: object transforms as well as concatenated world matrices \n * TreeNodes: a tree consisting hierarchical information, mapping to Matrix indices\n\n * Materials: just classic two-sided OpenGL Blinn-Phong material parameters\n * Geometries: storing vertex and index information, organized into\n  * GeometryParts, which reference a sub-range within index buffer, for either \"wireframe\" or \"solid\" surfaces\n\n * Objects, that reference Geometry and have corresponding\n  * ObjectParts, that encode part-level Material and Matrix assignment. Typically, an object uses just one Matrix for all its parts.\n\n### Shademodes\n\n![sample screenshot](https://github.com/nvpro-samples/gl_cadscene_rendertechniques/blob/master/doc/sample.jpg)\n\n- **solid**: only triangles are drawn\n- **solid with edges**: triangles and edge outlines on top (using PolygonOffset to push triangles back). When no global sorting (see later) is performed, this means we toggle between the two modes for every object.\n- **solid with edges (split test, only in sorted)**: an artificial mode that also separates triangles and edges into different FBOs, and is available in \"sorted\" and \"token\" renderers. The implementation has no real use-case character and is more or less for internal benchmarking of FBO toggles.\n\n### Strategies\n\nThese influence the number of drawcalls we generate for the hardware and software. Using OpenGL's MultiDraw* functions we can have less software calls than hardware drawcalls, which helps trigger faster paths in the driver as there is less validation overhead. A strategy is applied on a per-object level.\n\nImagine an object whose parts use two materials, red and blue:\n\n```\nmaterial: r b b r\nparts:    A B C D\n```\n\n- **materialgroups**\nHere we create a per-object cache of drawcall ranges for MultiDraw* based on the object's material and matrix assignments. We also \"grow\" drawcalls if subsequent ranges in the index buffer have the same assignments. Our sample object would be drawn using 2 states one glMultiDrawElements each, which are creating 3 hardware drawcalls: red are ranges A, D and blue is B+C joined together as they are next to each other in the indexbuffer.\n- **drawcall join**\nAs we traverse we combine drawcalls under same state, this means 3 drawcalls for hardware, and 3 for software as well as 3 states: red A, blue B+C, red D.\n- **drawcall individual**\nWe render each piece individually:\nred A, blue B, C, red D.\n\nTypically we do all rendering with basic state redundancy filtering so we don't setup a matrix/material change if the same is still active. To keep things simple for state redundancy filtering, you should not go too fine-grained, otherwise all the tracking causes too much memory hopping. In our case we have 3 indices we track: geometry (handles vertex / index buffer setup), material, and matrix.\n\n### Renderers\nMost renderers will traverse the scene data every frame. The organization of the data is cache-friendly foremost, everything is stored in arrays, without too much memory hopping. Some renderers may implement additional caching for rendering.\n\n#### Variants:\n\n - **bindless**: these variants make use of NVIDIA's bindless extensions NV_vertex_buffer_unified_memory and NV_uniform_buffer_unified_memory, which allows a lower-overhead path in the driver for faster drawcall submission. Classic glBindVertexBuffer or glBindBufferRange are replaced with glBufferAddressRangeNV.\n - **sorted**: indicates we do a global scene sort once, to minimize state changes in subsequent frames.\n - **cullsorted**: next to global sorting by state, we also apply occlusion culling as presented in [end of the slides](http://on-demand.gputechconf.com/siggraph/2014/presentation/SG4117-OpenGL-Scene-Rendering-Techniques.pdf) or in the [gl occlusion culling](https://github.com/nvpro-samples/gl_occlusion_culling) sample.\n - **emulated**: several of the NV_command_list techniques can be run in emulated mode.\n\n#### Techniques:\n\nWe are mostly looking into accelerating our matrix and material parameter switching performance.\n\n- **uborange**\nAll matrices and materials are stored in big buffer objects, which allows us to efficiently bind the required sub-range for a drawcall via glBindBufferRange(GL_UNIFORM_BUFFER, usageSlot, buffer, index * itemSize, itemSize). NVIDIA provides optimized paths if you keep the buffer and itemSize for a usageSlot constant for many glBindBufferRange calls. Be aware of GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, which is 256 bytes for most current NVIDIA hardware (Fermi, Kepler, Maxwell).\n\n- **ubosub**\nNot as efficient as the above, but maybe appropriate if you cannot afford to cache parameter data. We make use of one streaming buffer per usage slot and continously update it via glBufferSubData. NVIDIA's drivers do particularly well if you never bind this buffer as anything but a GL_UNIFORM_BUFFER and keep size and offsets a multiple of 4.\n\n- **indexedmdi**\nSimilar to uborange we make use of all data stored in a bigger buffers in advance. It doesn't make this data \"static\"; you can always update the portions you need, but there is a high chance a lot of data is the same frame to frame. This time, we do not bind memory ranges through the OpenGL API, but let the shader do an indirection and only pass the required matrix and material indices. \nFor the matrix data we use GL_TEXTURE_BUFFER as it's particularly performant for high frequency / potentially divergent access. We typically have far more matrices than materials in our scene. For material data, it's a bit \"ugly\" to use lots of texelFetch instructions decoding all our parameters; it's much easier to write them as structs and store the array either as GL_UNIFORM_BUFFER or GL_SHADER_STORAGE_BUFFER. The latter is only recommended if you have divergent shader access or exceed the 64 KB limit of UBOs.\nTo pass the indices per-drawcall we make use of GL_ARB_multi_draw_indirect and \"instanced\" vertex attributes as described at [GTC 2013 on slide 27](http://on-demand.gputechconf.com/gtc/2013/presentations/S3032-Advanced-Scenegraph-Rendering-Pipeline.pdf).\nTherefore this renderer requires two additional buffers: one encoding our object's matrix and material index assignments, and one encoding the scene's drawcalls as GL_DRAW_INDIRECT_BUFFER. \n\nA hybrid approach, where the parameter index like \"indexedmdi\" is used for matrices and uborange bind is used for materials, is not yet implemented, but would be a good compromise.\n\nThe following renderers make use of the **NV_command_list** extension. In principle they **behave as \"uborange\"**, however all buffer bindings and drawcalls are encoded into binary tokens that are submitted in bulk. In preparation for drawing, the appropriate stateobjects are created and reused when rendering (one for lines and for triangles). While stateobject capturing is not extremely expensive, it is still best to cache it across frames.\n\n- **tokenbuffer**\nSimilar to indexedmdi we create a buffer that describes our scene by storing all the relevant token commands. This buffer is filled only once and then later reused.\n- **tokenlist**\nInstead of storing the tokens inside a buffer we make use of the commandlist object, and create and compile one for each shademode for later reuse. Every time our state changes (for instance, when resizing FBOs), we have to recreate these lists, which makes it less flexible than buffer but faster when there are lots of statechanges within the list.\n- **tokenstream**\nThis approach does not reuse the tokens across frames, but instead dynamically creates the tokenstream every frame. By default, the demo fills and submits tokens in chunks of 256 KB; better values may exist depending on the scene.\n\n### Performance\n\nAll timings are preliminary results for *Timer Draw* on a win7-64, i7-860, Quadro K5000 system. \n\n**Important Note About Timer Query Results:** The GPU time reported below is measured via timer queries, those values however can be skewed by CPU bottlenecks. The \"begin\" timestamp may be part of a different command submission to the GPU than the \"end\" timestamp. That means a long delay on the CPU side between those submissions will also increase the reported GPU time. That is why in CPU-bottlenecked scenarios with tons of OpenGL commands, the GPU times below are close to the CPU time.\n\n```\nscene statistics:\ngeometries:    110\nmaterials:      66\nnodes:        5004\nobjects:      2497\n\ntokenbuffer/glstream complexities:\ntype: solid              materialgroups | drawcall individual\ncommandsize:                     347292 | 1301692\nstatetoggles:                         1 | 1\ntokens:                 \nGL_DRAW_ELEMENTS_COMMAND_NV:      11103 |   68452\nGL_ELEMENT_ADDRESS_COMMAND_NV:      807 |     807\nGL_ATTRIBUTE_ADDRESS_COMMAND_NV:    807 |     807\nGL_UNIFORM_ADDRESS_COMMAND_NV:     8988 |   11289\nGL_POLYGON_OFFSET_COMMAND_NV:         1 |       1\n\ntype: solid w edges\ncommandsize:                     629644 | 2534412\nstatetoggles:                      4994 |    4994\ntokens:\nGL_DRAW_ELEMENTS_COMMAND_NV:      22281 |  136750\nGL_ELEMENT_ADDRESS_COMMAND_NV:      807 |     807\nGL_ATTRIBUTE_ADDRESS_COMMAND_NV:    807 |     807\nGL_UNIFORM_ADDRESS_COMMAND_NV:    15457 |   20036\nGL_POLYGON_OFFSET_COMMAND_NV:         1 |       1\n```\n\nAs one can see from the statistics the key difference is the number of drawcalls for the hardware:\n- **materialgroups**: ~ 10 000 drawcalls (inner two columns)\n- **drawcall individual**: ~ 70 000 drawcalls (rightmost two columns)\n\n*shademode: solid*\n\nrenderer | GPU time | CPU time | GPU time | CPU time (microseconds)\n------------ | ------------- | ------------- | ------------- | -------------\n**strategy** | **material-** | **-groups** | **drawcall-** | **-individual**\nubosub | 1550 | 1870 |  6000 | 7420\nuborange | 1010| 1890 | 3720 | 7660\nuborange_bindless | 1010 | 1200 | 2560 | 4900\nindexedmdi | 1120 | 1200 | 2080 | 1100\ntokenstream | 860 | 300 | 1520 | 1400\ntokenbuffer | 780 | <10 | 1230 | <10\ntokenlist | 780 | <10 | 880 | <10\ntokenbuffer_cullsorted | 540 | 120 | 760 | 120\n\nThe results are of course very scene dependent; this model was specifically chosen as it is made of many parts with very few triangles. If the complexity per drawcall were higher (say more triangles or complex shading), then the CPU impact would be lower and we would be GPU-bound. However the CPU time recovered by faster submission mechanisms can always be used elsewhere. So even if we are GPU-bound, time should not be wasted.\n\nWe can see that the \"token\" techniques do very well and are never CPU-bound, and the \"indexedmdi\" technique is also quite good. This technique is especially useful for very high-frequency parameters, for example when rendering \"id-buffers\" for selection, but also for matrix indices. For general use-cases, working with uborange binds is recommended. \n\n*shademode: solid with edges*\n\nUnless \"sorted\", around 5000 toggles are done between triangles/line rendering. The shader\nis manipulated through an immediate vertex attribute to toggle between lit/unlit rendering respectively.\n\nrenderer | GPU time | CPU time | GPU time | CPU time (microseconds)\n------------ | ------------- | ------------- | ------------- | -------------\n**strategy** | **material-** | **-groups** | **drawcall-** | **-individual**\nubosub | 2890 | 3350 | 13000 | 15000 | \nuborange | 2150 | 3700 | 12500 | 15200 | \nuborange_bindless | 2150 | 2640 | 8300 | 10000\nindexedmdi | 2340 | 2200 | 4050 | 2050\ntokenstream | 1860 | 1250 | 3360 | 3200\ntokenbuffer | 1750 | 450 | 2650 | 350\ntokenlist | 1650 | <10 | 1890 | <10\ntokenbuffer_cullsorted | 770 | 120 | 1250 | 120\n\nCompared to the \"solid\" results, the tokenbuffer and tokenlist techniques show a greater difference in CPU time.\n\n\n### Model Explosion View\n\nThe simple viewer allows you to add animation to the scene and artificially increase scene complexity via \"clones\".\n\n![xplodeclones](https://github.com/nvpro-samples/gl_cadscene_rendertechniques/blob/master/doc/xplodeclones.jpg)\n\nTo \"emulate\" typical interaction where users might move objects around or have animated scenes, the sample also implements the matrix transform system sketched on [slide 30](http://on-demand.gputechconf.com/siggraph/2014/presentation/SG4117-OpenGL-Scene-Rendering-Techniques.pdf). \n\nThe effect works by first moving all object matrices a bit (*xplode-animation.comp.glsl*), and afterwards the transform hierarchy is updated via a system that is implemented in the *transformsystem.cpp / hpp* files.\n\nThe code is not particularly tuned but naively assumes that upper levels of the hierarchy contain fewer nodes than lower levels (pyramid). Therefore it uses leaf-processing (which redundantly calculates matrices) instead of level-wise processing for the first 10 levels, to avoid dependencies (one small compute task waiting for the previous). Later levels are always processed level-wise. A better strategy would be to switch between the two approaches based on the actual number of nodes per level. The shaders for this are *transform-leaves.comp.glsl* and *transform-level.comp.glsl*. \n\nThe hierarchy is managed by *nodetree.cpp/hpp*, which stores the tree as array of 32bit values. Each value represents a node, and encodes the \"level\" in the hierarchy in 8 bits and their parent index in the rest of the bits. Which means you can traverse a node up to the root:\n\n``` cpp\n// sample traversal of \"idx\" node to root\nself = array[idx];\nwhile( self.level != 0) {\n  self = array[self.parent];\n}\n// self is now the top root for the idx node\n```\n\nThe nodetree also stores two node index lists for each level: one storing all nodes of a level, and one for all leaves in this level. We feed these two index lists to the appropriate shader. When leaf processing is used we append the leaves level-wise, which should minimize divergence within a warp (ideally most threads have the same number of levels to ascend in the hierarchy).\n\nMany CAD applications tend to use double-precision matrices, and the system could be adjusted for this. For rendering, however, float matrices should be used. To account for large translation values, one could run a concatenation of view-projection (double) and object-world-matrix (double) per-frame and generate the matrices (float) for actual vertex transforms. To improve memory performance, it might be beneficial to use double only for storing translations within the matrices.\n\n> Note: Only the GPU matrices are updated. CPU techniques such as \"ubosub\" will not show animations.\n\n### Sample Highlights\n\nThis sample is a bit more complex than most others as it contains several subsystems. Don't hesitate to contact the author if something is unclear (commenting was not a priority ;) ).\n\n#### csfviewer.cpp\nThe principle setup of the sample is in this main file. However, most of the interesting bits happen in the renderers.\n\n- Sample::think - prepares the frame and calls the renderer's draw function\n\n#### renderer... and tokenbase...\nEach renderer has its own file and is derived from the **Renderer** class in *renderer.hpp*\n\n- Renderer::init - some renderers may allocate extra buffers or create their own data structures for the scene.\n- Renderer::deinit \n- Renderer::draw\n\nThe renderers may have additional functions. The \"token\" renderers using NV_command_list or \"indexedmdi\", for instance, must create their own scene representation.\n\n#### cadscene...\nThe \"csf\" (cadscene file) format is a simple binary format that encodes a scene as is typical for CAD. It closely matches the description at the beginning of the readme. It is not very sophisticated, and is meant for demo purposes.\n\n> *Note*: The **geforce.csf.gz** assembly binary file that ships with this sample **may NOT be redistributed.**\n\n#### nodetree... and transform...\nImplement the matrix hierarchy updates as described in the \"model explosion view\" section.\n\n#### cull... and scan...\nFor files related to culling, it is best to refer to the [gl occlusion cullling](https://github.com/nvpro-samples/gl_occlusion_cullling) sample, as it leverages the same system and focuses on just that topic.\n\n*renderertokensortcull.cpp* implements *RendererCullSortToken::CullJobToken::resultFromBits*, which contains the details of how the occlusion results are handled in this sample. The implementation uses the \"raster\" \"temporal\" approach.\n\n#### statesystem... nvtoken... and nvcommandlist...\nThese files contain helpers when using the NV_command_list extension. Please see [gl commandlist basic](https://github.com/nvpro-samples/gl_commandlist_basic) for a smaller sample.\n\n### Building\nIdeally, clone this and other interesting [nvpro-samples](https://github.com/nvpro-samples) repositories into a common subdirectory. You will always need [nvpro_core](https://github.com/nvpro-samples/nvpro_core). The nvpro_core is searched either as a subdirectory of the sample, or one directory up.\n\nIf you are interested in multiple samples, you can use the [build_all](https://github.com/nvpro-samples/build_all) CMAKE as entry point. This will also give you options to enable or disable individual samples when creating the solutions.\n\n### Related Samples\n[gl commandlist basic](https://github.com/nvpro-samples/gl_commandlist_basic) illustrates the core principle of the NV_command_list extension.\n[gl occlusion cullling](https://github.com/nvpro-samples/gl_occlusion_cullling) also uses the occlusion system of this sample, but in a simpler usage scenario.\n\nWhen using classic scenegraphs, there is typically a lot of overhead in traversing the scene. For this reason, it is highly recommended to use simpler representations for actual rendering. Consider using flattened hierarchies, arrays, memory-friendly data structures, data-oriented design patterns, and similar techniques.\nIf you are still working with a classic scenegraph, then [nvpro-pipeline](https://github.com/nvpro-pipeline/pipeline) may provide some acceleration strategies to avoid full scenegraph traversal. Some of these strategies are also described in this [GTC 2013 presentation](http://on-demand.gputechconf.com/gtc/2013/presentations/S3032-Advanced-Scenegraph-Rendering-Pipeline.pdf).\n"
  },
  {
    "path": "cadscene.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include \"cadscene.hpp\"\n#include <fileformats/cadscenefile.h>\n\n#include <algorithm>\n#include <assert.h>\n#include <cstddef>\n#include \"glm/gtc/type_ptr.hpp\"\n\n#define USE_CACHECOMBINE 1\n\n\nglm::vec4 randomVector(float from, float to)\n{\n  glm::vec4 vec;\n  float     width = to - from;\n  for(int i = 0; i < 4; i++)\n  {\n    vec[i] = from + (float(rand()) / float(RAND_MAX)) * width;\n  }\n  return vec;\n}\n\nstatic void recursiveHierarchy(NodeTree& tree, CSFile* csf, int idx, int cloneoffset)\n{\n  for(int i = 0; i < csf->nodes[idx].numChildren; i++)\n  {\n    tree.setNodeParent((NodeTree::nodeID)csf->nodes[idx].children[i] + cloneoffset, (NodeTree::nodeID)idx + cloneoffset);\n  }\n\n  for(int i = 0; i < csf->nodes[idx].numChildren; i++)\n  {\n    recursiveHierarchy(tree, csf, csf->nodes[idx].children[i], cloneoffset);\n  }\n}\n\nbool CadScene::loadCSF(const char* filename, int clones, int cloneaxis)\n{\n  CSFile*         csf;\n  CSFileMemoryPTR mem = CSFileMemory_new();\n  if(CSFile_loadExt(&csf, filename, mem) != CADSCENEFILE_NOERROR || !(csf->fileFlags & CADSCENEFILE_FLAG_UNIQUENODES))\n  {\n    CSFileMemory_delete(mem);\n    return false;\n  }\n\n  int copies = clones + 1;\n\n  CSFile_transform(csf);\n\n  srand(234525);\n\n  // materials\n  m_materials.resize(csf->numMaterials);\n  for(int n = 0; n < csf->numMaterials; n++)\n  {\n    CSFMaterial* csfmaterial = &csf->materials[n];\n    Material&    material    = m_materials[n];\n\n    for(int i = 0; i < 2; i++)\n    {\n      material.sides[i].ambient  = randomVector(0.0f, 0.1f);\n      material.sides[i].diffuse  = glm::make_vec4(csf->materials[n].color) + randomVector(0.0f, 0.07f);\n      material.sides[i].specular = randomVector(0.25f, 0.55f);\n      material.sides[i].emissive = randomVector(0.0f, 0.05f);\n    }\n  }\n\n  glCreateBuffers(1, &m_materialsGL);\n  glNamedBufferStorage(m_materialsGL, sizeof(Material) * m_materials.size(), &m_materials[0], 0);\n  //glMapNamedBufferRange(m_materialsGL, 0, sizeof(Material) * m_materials.size(), GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT);\n\n  // geometry\n  int numGeoms = csf->numGeometries;\n  m_geometry.resize(csf->numGeometries * copies);\n  m_geometryBboxes.resize(csf->numGeometries * copies);\n  for(int n = 0; n < csf->numGeometries; n++)\n  {\n    CSFGeometry* csfgeom = &csf->geometries[n];\n    Geometry&    geom    = m_geometry[n];\n\n    geom.cloneIdx = -1;\n\n    geom.numVertices   = csfgeom->numVertices;\n    geom.numIndexSolid = csfgeom->numIndexSolid;\n    geom.numIndexWire  = csfgeom->numIndexWire;\n\n    std::vector<Vertex> vertices(csfgeom->numVertices);\n    for(int i = 0; i < csfgeom->numVertices; i++)\n    {\n      vertices[i].position[0] = csfgeom->vertex[3 * i + 0];\n      vertices[i].position[1] = csfgeom->vertex[3 * i + 1];\n      vertices[i].position[2] = csfgeom->vertex[3 * i + 2];\n      vertices[i].position[3] = 1.0f;\n      if(csfgeom->normal)\n      {\n        vertices[i].normal[0] = csfgeom->normal[3 * i + 0];\n        vertices[i].normal[1] = csfgeom->normal[3 * i + 1];\n        vertices[i].normal[2] = csfgeom->normal[3 * i + 2];\n        vertices[i].normal[3] = 0.0f;\n      }\n      else\n      {\n        vertices[i].normal = glm::vec4(normalize(glm::vec3(vertices[i].position)), 0.0f);\n      }\n\n\n      m_geometryBboxes[n].merge(vertices[i].position);\n    }\n\n    geom.vboSize = sizeof(Vertex) * vertices.size();\n\n    glCreateBuffers(1, &geom.vboGL);\n    glNamedBufferStorage(geom.vboGL, geom.vboSize, &vertices[0], 0);\n\n    std::vector<GLuint> indices(csfgeom->numIndexSolid + csfgeom->numIndexWire);\n    memcpy(&indices[0], csfgeom->indexSolid, sizeof(GLuint) * csfgeom->numIndexSolid);\n    if(csfgeom->indexWire)\n    {\n      memcpy(&indices[csfgeom->numIndexSolid], csfgeom->indexWire, sizeof(GLuint) * csfgeom->numIndexWire);\n    }\n\n    geom.iboSize = sizeof(GLuint) * indices.size();\n\n    glCreateBuffers(1, &geom.iboGL);\n    glNamedBufferStorage(geom.iboGL, geom.iboSize, &indices[0], 0);\n\n    if(has_GL_NV_vertex_buffer_unified_memory)\n    {\n      glGetNamedBufferParameterui64vNV(geom.vboGL, GL_BUFFER_GPU_ADDRESS_NV, &geom.vboADDR);\n      glMakeNamedBufferResidentNV(geom.vboGL, GL_READ_ONLY);\n\n      glGetNamedBufferParameterui64vNV(geom.iboGL, GL_BUFFER_GPU_ADDRESS_NV, &geom.iboADDR);\n      glMakeNamedBufferResidentNV(geom.iboGL, GL_READ_ONLY);\n    }\n\n    geom.parts.resize(csfgeom->numParts);\n\n    size_t offsetSolid = 0;\n    size_t offsetWire  = csfgeom->numIndexSolid * sizeof(GLuint);\n    for(int i = 0; i < csfgeom->numParts; i++)\n    {\n      geom.parts[i].indexWire.count  = csfgeom->parts[i].numIndexWire;\n      geom.parts[i].indexSolid.count = csfgeom->parts[i].numIndexSolid;\n\n      geom.parts[i].indexWire.offset  = offsetWire;\n      geom.parts[i].indexSolid.offset = offsetSolid;\n\n      offsetWire += csfgeom->parts[i].numIndexWire * sizeof(GLuint);\n      offsetSolid += csfgeom->parts[i].numIndexSolid * sizeof(GLuint);\n    }\n  }\n  for(int c = 1; c <= clones; c++)\n  {\n    for(int n = 0; n < numGeoms; n++)\n    {\n      m_geometryBboxes[n + numGeoms * c] = m_geometryBboxes[n];\n\n      const Geometry& geomorig = m_geometry[n];\n      Geometry&       geom     = m_geometry[n + numGeoms * c];\n\n      geom = geomorig;\n\n#if 1\n      geom.cloneIdx = n;\n#else\n      geom.cloneIdx = -1;\n      glCreateBuffers(1, &geom.vboGL);\n      glNamedBufferStorage(geom.vboGL, geom.vboSize, 0, 0);\n\n      glCreateBuffers(1, &geom.iboGL);\n      glNamedBufferStorage(geom.iboGL, geom.iboSize, 0, 0);\n\n      if(has_GL_NV_vertex_buffer_unified_memory)\n      {\n        glGetNamedBufferParameterui64vNV(geom.vboGL, GL_BUFFER_GPU_ADDRESS_NV, &geom.vboADDR);\n        glMakeNamedBufferResidentNV(geom.vboGL, GL_READ_ONLY);\n\n        glGetNamedBufferParameterui64vNV(geom.iboGL, GL_BUFFER_GPU_ADDRESS_NV, &geom.iboADDR);\n        glMakeNamedBufferResidentNV(geom.iboGL, GL_READ_ONLY);\n      }\n\n      glCopyNamedBufferSubData(geomorig.vboGL, geom.vboGL, 0, 0, geom.vboSize);\n      glCopyNamedBufferSubData(geomorig.iboGL, geom.iboGL, 0, 0, geom.iboSize);\n#endif\n    }\n  }\n\n\n  glCreateBuffers(1, &m_geometryBboxesGL);\n  glNamedBufferStorage(m_geometryBboxesGL, sizeof(BBox) * m_geometryBboxes.size(), &m_geometryBboxes[0], 0);\n  glCreateTextures(GL_TEXTURE_BUFFER, 1, &m_geometryBboxesTexGL);\n  glTextureBuffer(m_geometryBboxesTexGL, GL_RGBA32F, m_geometryBboxesGL);\n\n  // nodes\n  int numObjects = 0;\n  m_matrices.resize(csf->numNodes * copies);\n\n  for(int n = 0; n < csf->numNodes; n++)\n  {\n    CSFNode* csfnode = &csf->nodes[n];\n\n    memcpy(glm::value_ptr(m_matrices[n].objectMatrix), csfnode->objectTM, sizeof(float) * 16);\n    memcpy(glm::value_ptr(m_matrices[n].worldMatrix), csfnode->worldTM, sizeof(float) * 16);\n\n    m_matrices[n].objectMatrixIT = glm::transpose(glm::inverse(m_matrices[n].objectMatrix));\n    m_matrices[n].worldMatrixIT  = glm::transpose(glm::inverse(m_matrices[n].worldMatrix));\n\n    if(csfnode->geometryIDX < 0)\n      continue;\n\n    numObjects++;\n  }\n\n\n  // objects\n  m_objects.resize(numObjects * copies);\n  m_objectAssigns.resize(numObjects * copies);\n  numObjects = 0;\n  for(int n = 0; n < csf->numNodes; n++)\n  {\n    CSFNode* csfnode = &csf->nodes[n];\n\n    if(csfnode->geometryIDX < 0)\n      continue;\n\n    Object& object = m_objects[numObjects];\n\n    object.matrixIndex   = n;\n    object.geometryIndex = csfnode->geometryIDX;\n\n    m_objectAssigns[numObjects] = glm::ivec2(object.matrixIndex, object.geometryIndex);\n\n    object.parts.resize(csfnode->numParts);\n    for(int i = 0; i < csfnode->numParts; i++)\n    {\n      object.parts[i].active        = 1;\n      object.parts[i].matrixIndex   = csfnode->parts[i].nodeIDX < 0 ? object.matrixIndex : csfnode->parts[i].nodeIDX;\n      object.parts[i].materialIndex = csfnode->parts[i].materialIDX;\n    }\n\n    BBox bbox = m_geometryBboxes[object.geometryIndex].transformed(m_matrices[n].worldMatrix);\n    m_bbox.merge(bbox);\n\n    updateObjectDrawCache(object);\n\n    numObjects++;\n  }\n\n  // compute clone move delta based on m_bbox;\n\n  glm::vec4 dim = m_bbox.max - m_bbox.min;\n\n  int sq      = 1;\n  int numAxis = 0;\n  for(int i = 0; i < 3; i++)\n  {\n    numAxis += (cloneaxis & (1 << i)) ? 1 : 0;\n  }\n\n  assert(numAxis);\n\n  switch(numAxis)\n  {\n    case 1:\n      sq = copies;\n      break;\n    case 2:\n      while(sq * sq < copies)\n      {\n        sq++;\n      }\n      break;\n    case 3:\n      while(sq * sq * sq < copies)\n      {\n        sq++;\n      }\n      break;\n  }\n\n\n  for(int c = 1; c <= clones; c++)\n  {\n    int numNodes = csf->numNodes;\n\n    glm::vec4 shift = dim * 1.05f;\n\n    float u = 0;\n    float v = 0;\n    float w = 0;\n\n    switch(numAxis)\n    {\n      case 1:\n        u = float(c);\n        break;\n      case 2:\n        u = float(c % sq);\n        v = float(c / sq);\n        break;\n      case 3:\n        u = float(c % sq);\n        v = float((c / sq) % sq);\n        w = float(c / (sq * sq));\n        break;\n    }\n\n    float use = u;\n\n    if(cloneaxis & (1 << 0))\n    {\n      shift.x *= -use;\n      if(numAxis > 1)\n        use = v;\n    }\n    else\n    {\n      shift.x = 0;\n    }\n\n    if(cloneaxis & (1 << 1))\n    {\n      shift.y *= use;\n      if(numAxis > 2)\n        use = w;\n      else if(numAxis > 1)\n        use = v;\n    }\n    else\n    {\n      shift.y = 0;\n    }\n\n    if(cloneaxis & (1 << 2))\n    {\n      shift.z *= -use;\n    }\n    else\n    {\n      shift.z = 0;\n    }\n\n    shift.w = 0;\n\n    // move all world matrices\n    for(int n = 0; n < numNodes; n++)\n    {\n      MatrixNode& node     = m_matrices[n + numNodes * c];\n      MatrixNode& nodeOrig = m_matrices[n];\n      node                 = nodeOrig;\n      node.worldMatrix[3]  = node.worldMatrix[3] + shift;\n      node.worldMatrixIT   = glm::transpose(glm::inverse(node.worldMatrix));\n    }\n\n    {\n      // patch object matrix of root\n      MatrixNode& node     = m_matrices[csf->rootIDX + numNodes * c];\n      node.objectMatrix[3] = node.objectMatrix[3] + shift;\n      node.objectMatrixIT  = glm::transpose(glm::inverse(node.objectMatrix));\n    }\n\n    // clone objects\n    for(int n = 0; n < numObjects; n++)\n    {\n      const Object& objectorig = m_objects[n];\n      Object&       object     = m_objects[n + numObjects * c];\n\n      object = objectorig;\n      object.geometryIndex += c * numGeoms;\n      object.matrixIndex += c * numNodes;\n      for(size_t i = 0; i < object.parts.size(); i++)\n      {\n        object.parts[i].matrixIndex += c * numNodes;\n      }\n      for(size_t i = 0; i < object.cacheSolid.state.size(); i++)\n      {\n        object.cacheSolid.state[i].matrixIndex += c * numNodes;\n      }\n      for(size_t i = 0; i < object.cacheWire.state.size(); i++)\n      {\n        object.cacheWire.state[i].matrixIndex += c * numNodes;\n      }\n\n      m_objectAssigns[n + numObjects * c] = glm::ivec2(object.matrixIndex, object.geometryIndex);\n    }\n  }\n\n  glCreateBuffers(1, &m_matricesGL);\n  glNamedBufferStorage(m_matricesGL, sizeof(MatrixNode) * m_matrices.size(), &m_matrices[0], 0);\n  //glMapNamedBufferRange(m_matricesGL, 0, sizeof(MatrixNode) * m_matrices.size(), GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT);\n\n  glCreateTextures(GL_TEXTURE_BUFFER, 1, &m_matricesTexGL);\n  glTextureBuffer(m_matricesTexGL, GL_RGBA32F, m_matricesGL);\n\n  glCreateBuffers(1, &m_objectAssignsGL);\n  glNamedBufferStorage(m_objectAssignsGL, sizeof(glm::ivec2) * m_objectAssigns.size(), &m_objectAssigns[0], 0);\n\n  if(has_GL_NV_vertex_buffer_unified_memory)\n  {\n    glGetNamedBufferParameterui64vNV(m_materialsGL, GL_BUFFER_GPU_ADDRESS_NV, &m_materialsADDR);\n    glMakeNamedBufferResidentNV(m_materialsGL, GL_READ_ONLY);\n\n    glGetNamedBufferParameterui64vNV(m_matricesGL, GL_BUFFER_GPU_ADDRESS_NV, &m_matricesADDR);\n    glMakeNamedBufferResidentNV(m_matricesGL, GL_READ_ONLY);\n\n    if(has_GL_ARB_bindless_texture)\n    {\n      m_matricesTexGLADDR = glGetTextureHandleARB(m_matricesTexGL);\n      glMakeTextureHandleResidentARB(m_matricesTexGLADDR);\n    }\n  }\n\n  m_nodeTree.create(copies * csf->numNodes);\n  for(int i = 0; i < copies; i++)\n  {\n    int cloneoffset = (csf->numNodes) * i;\n    int root        = csf->rootIDX + cloneoffset;\n    recursiveHierarchy(m_nodeTree, csf, csf->rootIDX, cloneoffset);\n\n    m_nodeTree.setNodeParent((NodeTree::nodeID)root, m_nodeTree.getTreeRoot());\n    m_nodeTree.addToTree((NodeTree::nodeID)root);\n  }\n\n  glCreateBuffers(1, &m_parentIDsGL);\n  glNamedBufferStorage(m_parentIDsGL, m_nodeTree.getTreeCompactNodes().size() * sizeof(GLuint),\n                       &m_nodeTree.getTreeCompactNodes()[0], 0);\n\n  glCreateBuffers(1, &m_matricesOrigGL);\n  glNamedBufferStorage(m_matricesOrigGL, sizeof(MatrixNode) * m_matrices.size(), &m_matrices[0], 0);\n  glCreateTextures(GL_TEXTURE_BUFFER, 1, &m_matricesOrigTexGL);\n  glTextureBuffer(m_matricesOrigTexGL, GL_RGBA32F, m_matricesOrigGL);\n\n  CSFileMemory_delete(mem);\n  return true;\n}\n\n\nstruct ListItem\n{\n  CadScene::DrawStateInfo state;\n  CadScene::DrawRange     range;\n};\n\nstatic bool ListItem_compare(const ListItem& a, const ListItem& b)\n{\n  int diff = 0;\n  diff     = diff != 0 ? diff : (a.state.materialIndex - b.state.materialIndex);\n  diff     = diff != 0 ? diff : (a.state.matrixIndex - b.state.matrixIndex);\n  diff     = diff != 0 ? diff : int(a.range.offset - b.range.offset);\n\n  return diff < 0;\n}\n\nstatic void fillCache(CadScene::DrawRangeCache& cache, const std::vector<ListItem>& list)\n{\n  cache = CadScene::DrawRangeCache();\n\n  if(!list.size())\n    return;\n\n  CadScene::DrawStateInfo state = list[0].state;\n  CadScene::DrawRange     range = list[0].range;\n\n  int stateCount = 0;\n\n  for(size_t i = 1; i < list.size() + 1; i++)\n  {\n    bool newrange = false;\n    if(i == list.size() || list[i].state != state)\n    {\n      // push range\n      stateCount++;\n      cache.offsets.push_back(range.offset);\n      cache.counts.push_back(range.count);\n\n      // emit\n      cache.state.push_back(state);\n      cache.stateCount.push_back(stateCount);\n\n      stateCount = 0;\n\n      if(i == list.size())\n      {\n        break;\n      }\n      else\n      {\n        state        = list[i].state;\n        range.offset = list[i].range.offset;\n        range.count  = 0;\n        newrange     = true;\n      }\n    }\n\n    const CadScene::DrawRange& currange = list[i].range;\n    if(newrange || (USE_CACHECOMBINE && currange.offset == (range.offset + sizeof(GLuint) * range.count)))\n    {\n      // merge\n      range.count += currange.count;\n    }\n    else\n    {\n      // push\n      stateCount++;\n      cache.offsets.push_back(range.offset);\n      cache.counts.push_back(range.count);\n\n      range = currange;\n    }\n  }\n}\n\nvoid CadScene::updateObjectDrawCache(Object& object)\n{\n  Geometry& geom = m_geometry[object.geometryIndex];\n\n  std::vector<ListItem> listSolid;\n  std::vector<ListItem> listWire;\n\n  listSolid.reserve(geom.parts.size());\n  listWire.reserve(geom.parts.size());\n\n  for(size_t i = 0; i < geom.parts.size(); i++)\n  {\n    if(!object.parts[i].active)\n      continue;\n\n    ListItem item;\n    item.state.materialIndex = object.parts[i].materialIndex;\n\n    item.range             = geom.parts[i].indexSolid;\n    item.state.matrixIndex = object.parts[i].matrixIndex;\n    listSolid.push_back(item);\n\n    item.range             = geom.parts[i].indexWire;\n    item.state.matrixIndex = object.parts[i].matrixIndex;\n    listWire.push_back(item);\n  }\n\n  std::sort(listSolid.begin(), listSolid.end(), ListItem_compare);\n  std::sort(listWire.begin(), listWire.end(), ListItem_compare);\n\n  fillCache(object.cacheSolid, listSolid);\n  fillCache(object.cacheWire, listWire);\n}\n\nvoid CadScene::enableVertexFormat(int attrPos, int attrNormal)\n{\n  glVertexAttribFormat(attrPos, 3, GL_FLOAT, GL_FALSE, 0);\n  glVertexAttribFormat(attrNormal, 3, GL_FLOAT, GL_FALSE, offsetof(CadScene::Vertex, normal));\n  glVertexAttribBinding(attrPos, 0);\n  glVertexAttribBinding(attrNormal, 0);\n  glEnableVertexAttribArray(attrPos);\n  glEnableVertexAttribArray(attrNormal);\n  glBindVertexBuffer(0, 0, 0, sizeof(CadScene::Vertex));\n}\n\nvoid CadScene::disableVertexFormat(int attrPos, int attrNormal)\n{\n  glDisableVertexAttribArray(attrPos);\n  glDisableVertexAttribArray(attrNormal);\n  glBindVertexBuffer(0, 0, 0, sizeof(CadScene::Vertex));\n}\n\nvoid CadScene::unload()\n{\n  if(m_geometry.empty())\n    return;\n\n  glFinish();\n\n  if(has_GL_NV_vertex_buffer_unified_memory)\n  {\n    if(has_GL_ARB_bindless_texture)\n    {\n      glMakeTextureHandleNonResidentARB(m_matricesTexGLADDR);\n    }\n\n    glMakeNamedBufferNonResidentNV(m_matricesGL);\n    glMakeNamedBufferNonResidentNV(m_materialsGL);\n  }\n\n  glDeleteTextures(1, &m_matricesOrigTexGL);\n  glDeleteTextures(1, &m_matricesTexGL);\n  glDeleteTextures(1, &m_geometryBboxesTexGL);\n\n  glDeleteBuffers(1, &m_matricesOrigGL);\n  glDeleteBuffers(1, &m_matricesGL);\n  glDeleteBuffers(1, &m_materialsGL);\n  glDeleteBuffers(1, &m_objectAssignsGL);\n  glDeleteBuffers(1, &m_geometryBboxesGL);\n  glDeleteBuffers(1, &m_parentIDsGL);\n\n\n  for(size_t i = 0; i < m_geometry.size(); i++)\n  {\n    if(m_geometry[i].cloneIdx >= 0)\n      continue;\n\n    if(has_GL_NV_vertex_buffer_unified_memory)\n    {\n      glMakeNamedBufferNonResidentNV(m_geometry[i].iboGL);\n      glMakeNamedBufferNonResidentNV(m_geometry[i].vboGL);\n    }\n    glDeleteBuffers(1, &m_geometry[i].iboGL);\n    glDeleteBuffers(1, &m_geometry[i].vboGL);\n  }\n\n  m_matrices.clear();\n  m_geometryBboxes.clear();\n  m_geometry.clear();\n  m_objectAssigns.clear();\n  m_objects.clear();\n  m_geometryBboxes.clear();\n  m_nodeTree.clear();\n\n  glFinish();\n}\n\nvoid CadScene::resetMatrices()\n{\n  glCopyNamedBufferSubData(m_matricesOrigGL, m_matricesGL, 0, 0, sizeof(CadScene::MatrixNode) * m_matrices.size());\n}\n"
  },
  {
    "path": "cadscene.hpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n#ifndef CADSCENE_H__\n#define CADSCENE_H__\n\n#include <cstring> // memset\n#include <nvgl/extensions_gl.hpp>\n#include <glm/glm.hpp>\n#include <vector>\n#include \"nodetree.hpp\"\n\nclass CadScene {\n\npublic:\n\n  struct BBox {\n    glm::vec4    min;\n    glm::vec4    max;\n\n    BBox() : min(FLT_MAX), max(-FLT_MAX) {}\n\n    inline void merge( const glm::vec4& point )\n    {\n      min = glm::min(min, point);\n      max = glm::max(max, point);\n    }\n\n    inline void merge( const BBox& bbox )\n    {\n      min = glm::min(min, bbox.min);\n      max = glm::max(max, bbox.max);\n    }\n\n    inline BBox transformed ( const glm::mat4 &matrix, int dim=3)\n    {\n      int i;\n      glm::vec4 box[16];\n      // create box corners\n      box[0] = glm::vec4(min.x,min.y,min.z,min.w);\n      box[1] = glm::vec4(max.x,min.y,min.z,min.w);\n      box[2] = glm::vec4(min.x,max.y,min.z,min.w);\n      box[3] = glm::vec4(max.x,max.y,min.z,min.w);\n      box[4] = glm::vec4(min.x,min.y,max.z,min.w);\n      box[5] = glm::vec4(max.x,min.y,max.z,min.w);\n      box[6] = glm::vec4(min.x,max.y,max.z,min.w);\n      box[7] = glm::vec4(max.x,max.y,max.z,min.w);\n\n      box[8] = glm::vec4(min.x,min.y,min.z,max.w);\n      box[9] = glm::vec4(max.x,min.y,min.z,max.w);\n      box[10] = glm::vec4(min.x,max.y,min.z,max.w);\n      box[11] = glm::vec4(max.x,max.y,min.z,max.w);\n      box[12] = glm::vec4(min.x,min.y,max.z,max.w);\n      box[13] = glm::vec4(max.x,min.y,max.z,max.w);\n      box[14] = glm::vec4(min.x,max.y,max.z,max.w);\n      box[15] = glm::vec4(max.x,max.y,max.z,max.w);\n\n      // transform box corners\n      // and find new mins,maxs\n      BBox bbox;\n\n      for (i = 0; i < (1<<dim) ; i++){\n        glm::vec4 point = matrix * box[i];\n        bbox.merge(point);\n      }\n\n      return bbox;\n    }\n  };\n\n  struct MaterialSide {\n    glm::vec4 ambient;\n    glm::vec4 diffuse;\n    glm::vec4 specular;\n    glm::vec4 emissive;\n  };\n\n  // need to keep this 256 byte aligned (UBO range)\n  struct Material {\n    MaterialSide  sides[2];\n    GLuint64      texturesADDR[4];\n    GLuint        textures[4];\n    GLuint        _pad[4+16];\n\n    Material() {\n      memset(this,0,sizeof(Material));\n    }\n  };\n\n  // need to keep this 256 byte aligned (UBO range)\n  struct MatrixNode {\n    glm::mat4  worldMatrix;\n    glm::mat4  worldMatrixIT;\n    glm::mat4  objectMatrix;\n    glm::mat4  objectMatrixIT;\n  };\n\n  struct Vertex {\n    glm::vec4 position;\n    glm::vec4 normal;\n  };\n\n  struct DrawRange {\n    size_t        offset;\n    int           count;\n\n    DrawRange() : offset(0) , count(0) {}\n  };\n\n  struct DrawStateInfo {\n    int           materialIndex;\n    int           matrixIndex;\n\n    friend bool operator != ( const DrawStateInfo &lhs,  const DrawStateInfo &rhs){\n      return lhs.materialIndex != rhs.materialIndex || lhs.matrixIndex != rhs.matrixIndex;\n    }\n\n    friend bool operator == ( const DrawStateInfo &lhs,  const DrawStateInfo &rhs){\n      return lhs.materialIndex == rhs.materialIndex && lhs.matrixIndex == rhs.matrixIndex;\n    }\n  };\n\n  struct DrawRangeCache {\n    std::vector<DrawStateInfo>    state;\n    std::vector<int>          stateCount;\n\n    std::vector<size_t>       offsets;\n    std::vector<int>          counts;\n  };\n\n  struct GeometryPart {\n    DrawRange     indexSolid;\n    DrawRange     indexWire;\n  };\n\n  struct Geometry {\n    GLuint    vboGL;\n    GLuint    iboGL;\n    GLuint64  vboADDR;\n    GLuint64  iboADDR;\n    size_t    vboSize;\n    size_t    iboSize;\n\n    std::vector<GeometryPart> parts;\n\n    int       numVertices;\n    int       numIndexSolid;\n    int       numIndexWire;\n    \n    int       cloneIdx;\n  };\n\n  struct ObjectPart {\n    int   active;\n    int   materialIndex;\n    int   matrixIndex;\n  };\n\n  struct Object {\n    int             matrixIndex;\n    int             geometryIndex;\n\n    std::vector<ObjectPart> parts;\n\n    DrawRangeCache  cacheSolid;\n    DrawRangeCache  cacheWire;\n  };\n\n  std::vector<Material>       m_materials;\n  std::vector<BBox>           m_geometryBboxes;\n  std::vector<Geometry>       m_geometry;\n  std::vector<MatrixNode>     m_matrices;\n  std::vector<Object>         m_objects;\n  std::vector<glm::ivec2>  m_objectAssigns;\n\n\n  BBox      m_bbox;\n\n  GLuint    m_materialsGL;\n  GLuint64  m_materialsADDR;\n  GLuint    m_matricesGL;\n  GLuint64  m_matricesADDR;\n  GLuint    m_matricesTexGL;\n  GLuint64  m_matricesTexGLADDR;\n  GLuint    m_geometryBboxesGL;\n  GLuint    m_geometryBboxesTexGL;\n  GLuint    m_objectAssignsGL;\n\n  GLuint    m_parentIDsGL;\n\n  GLuint    m_matricesOrigGL;\n  GLuint    m_matricesOrigTexGL;\n\n  NodeTree  m_nodeTree;\n\n  void  updateObjectDrawCache(Object& object);\n  \n  bool  loadCSF(const char* filename, int clones = 0, int cloneaxis=3);\n  void  unload();\n\n  static void enableVertexFormat(int attrPos, int attrNormal);\n  static void disableVertexFormat(int attrPos, int attrNormal);\n  void resetMatrices();\n};\n\n\n#endif\n\n"
  },
  {
    "path": "common.h",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n\n#define VERTEX_POS      0\n#define VERTEX_NORMAL   1\n#define VERTEX_ASSIGNS  2\n#define VERTEX_WIREMODE 3\n\n#define UBO_SCENE     0\n#define UBO_MATRIX    1\n#define UBO_MATERIAL  2\n\n#define TEX_MATRICES  0\n\n#define USE_BASEINSTANCE  0\n\n//#define UNI_WIREFRAME 0\n\n\n#ifdef __cplusplus\nnamespace csfviewer\n{\n  using namespace glm;\n#endif\n\nstruct SceneData {\n  mat4  viewProjMatrix;\n  mat4  viewMatrix;\n  mat4  viewMatrixIT;\n\n  vec4  viewPos;\n  vec4  viewDir;\n  \n  vec4  wLightPos;\n  \n  ivec2 viewport;\n  uvec2 tboMatrices;\n};\n\n#ifdef __cplusplus\n}\n#endif\n\n\n#if defined(GL_core_profile) || defined(GL_compatibility_profile) || defined(GL_es_profile)\n\n#extension GL_NV_command_list : enable\n#if GL_NV_command_list\nlayout(commandBindableNV) uniform;\n#endif\n\n// prevent this to be used by c++\n\nlayout(std140,binding=UBO_SCENE) uniform sceneBuffer {\n  SceneData   scene;\n};\n\n// must match cadscene!\nlayout(std140,binding=UBO_MATRIX) uniform matrixBuffer {\n  mat4 worldMatrix;\n  mat4 worldMatrixIT;\n  mat4 objectMatrix;\n  mat4 objectMatrixIT;\n} object;\n\n#extension GL_ARB_bindless_texture : enable\n#extension GL_NV_bindless_texture : enable\n#if GL_NV_bindless_texture\n#define matricesBuffer  samplerBuffer(scene.tboMatrices)\n#else\nlayout(binding=TEX_MATRICES) uniform samplerBuffer matricesBuffer;\n#endif\n// must match cadscene!\n#define NODE_MATRIX_WORLD     0\n#define NODE_MATRIX_WORLDIT   1\n#define NODE_MATRIX_OBJECT    2\n#define NODE_MATRIX_OBJECTIT  3\n#define NODE_MATRICES         4\n\nmat4 getIndexedMatrix(int idx, int what)\n{\n  int i = idx * NODE_MATRICES + what;\n  return mat4(  texelFetch(matricesBuffer, i*4 + 0),\n                texelFetch(matricesBuffer, i*4 + 1),\n                texelFetch(matricesBuffer, i*4 + 2),\n                texelFetch(matricesBuffer, i*4 + 3));\n}\n\n#endif"
  },
  {
    "path": "csf.cpp",
    "content": "/*\n * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n#define CSF_IMPLEMENTATION\n#define CSF_SUPPORT_GLTF2       1\n#define CSF_SUPPORT_FILEMAPPING 1\n\n#include <fileformats/cadscenefile.h>\n\n#define CGLTF_IMPLEMENTATION\n#include <cgltf.h>\n\n\n\n"
  },
  {
    "path": "csfviewer.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#define DEBUG_FILTER 1\n\n#include <nvgl/extensions_gl.hpp>\n\n#include <imgui/backends/imgui_impl_gl.h>\n#include <imgui/imgui_helper.h>\n\n#include <nvgl/glsltypes_gl.hpp>\n\n#include <nvh/cameracontrol.hpp>\n#include <nvh/fileoperations.hpp>\n#include <nvh/geometry.hpp>\n#include <nvh/misc.hpp>\n\n#include <nvgl/appwindowprofiler_gl.hpp>\n#include <nvgl/base_gl.hpp>\n#include <nvgl/error_gl.hpp>\n#include <nvgl/programmanager_gl.hpp>\n\n#include \"transformsystem.hpp\"\n\n#include \"cadscene.hpp\"\n#include \"renderer.hpp\"\n\n#include <algorithm>\n\n#include \"common.h\"\n#include \"glm/gtc/matrix_access.hpp\"\n#include \"glm/gtc/type_ptr.hpp\"\n\n\nnamespace csfviewer {\nint const SAMPLE_SIZE_WIDTH(800);\nint const SAMPLE_SIZE_HEIGHT(600);\nint const SAMPLE_MAJOR_VERSION(4);\nint const SAMPLE_MINOR_VERSION(5);\n\n\nclass Sample : public nvgl::AppWindowProfilerGL\n{\npublic:\n  enum GuiEnums\n  {\n    GUI_RENDERER,\n    GUI_MSAA,\n    GUI_SHADE,\n    GUI_STRATEGY,\n  };\n\n  struct\n  {\n    nvgl::ProgramID draw_object, draw_object_tris, draw_object_line, draw_object_indexed, draw_object_indexed_tris,\n        draw_object_indexed_line,\n\n        cull_object_frustum, cull_object_hiz, cull_object_raster, cull_bit_temporallast, cull_bit_temporalnew,\n        cull_bit_regular, cull_depth_mips,\n\n        scan_prefixsum, scan_offsets, scan_combine,\n\n        transform_leaves, transform_level,\n\n        xplode;\n\n  } programs;\n\n  struct\n  {\n    GLuint scene  = 0;\n    GLuint scene2 = 0;\n  } fbos;\n\n  struct\n  {\n    GLuint scene_ubo = 0;\n  } buffers;\n\n  struct\n  {\n    GLuint64 scene_ubo;\n  } addresses;\n\n  struct\n  {\n    GLuint scene_color         = 0;\n    GLuint scene_color2        = 0;\n    GLuint scene_depthstencil  = 0;\n    GLuint scene_depthstencil2 = 0;\n  } textures;\n\n  struct Tweak\n  {\n    int       renderer      = 0;\n    ShadeType shade         = SHADE_SOLID;\n    Strategy  strategy      = STRATEGY_GROUPS;\n    int       clones        = 0;\n    bool      cloneaxisX    = true;\n    bool      cloneaxisY    = true;\n    bool      cloneaxisZ    = false;\n    bool      animateActive = false;\n    float     animateMin    = 1;\n    float     animateDelta  = 1;\n    int       zoom          = 100;\n    int       msaa          = 0;\n    bool      noUI          = false;\n  };\n\n  nvgl::ProgramManager m_progManager;\n\n  ImGuiH::Registry m_ui;\n  double           m_uiTime = 0;\n\n  Tweak m_tweak;\n  Tweak m_lastTweak;\n\n  std::string m_modelFilename;\n\n  SceneData       m_sceneUbo;\n  CadScene        m_scene;\n  TransformSystem m_transformSystem;\n\n  GLuint m_xplodeGroupSize;\n\n  std::vector<unsigned int> m_renderersSorted;\n  std::string               m_rendererName;\n\n  Renderer* NV_RESTRICT m_renderer;\n  Resources             m_resources;\n\n  size_t m_stateChangeID;\n\n\n  void updateProgramDefine();\n  bool initProgram();\n  bool initScene(const char* filename, int clones, int cloneaxis);\n  bool initFramebuffers(int width, int height);\n  void initRenderer(int type, Strategy strategy);\n  void deinitRenderer();\n\n  void getCullPrograms(CullingSystem::Programs& cullprograms);\n  void getScanPrograms(ScanSystem::Programs& scanprograms);\n  void getTransformPrograms(TransformSystem::Programs& xfromPrograms);\n\n  void updatedPrograms();\n\n  void setupConfigParameters();\n  void setRendererFromName();\n\n\npublic:\n  Sample() { setupConfigParameters(); }\n\n  bool validateConfig() override;\n\n  bool begin() override;\n  void think(double time) override;\n  void resize(int width, int height) override;\n\n  void processUI(double time);\n\n  nvh::CameraControl m_control;\n\n  void end() override { ImGui::ShutdownGL(); }\n  // return true to prevent m_windowState updates\n  bool mouse_pos(int x, int y) override\n  {\n    if(m_tweak.noUI)\n      return false;\n    return ImGuiH::mouse_pos(x, y);\n  }\n  bool mouse_button(int button, int action) override\n  {\n    if(m_tweak.noUI)\n      return false;\n    return ImGuiH::mouse_button(button, action);\n  }\n  bool mouse_wheel(int wheel) override\n  {\n    if(m_tweak.noUI)\n      return false;\n    return ImGuiH::mouse_wheel(wheel);\n  }\n  bool key_char(int button) override\n  {\n    if(m_tweak.noUI)\n      return false;\n    return ImGuiH::key_char(button);\n  }\n  bool key_button(int button, int action, int mods) override\n  {\n    if(m_tweak.noUI)\n      return false;\n    return ImGuiH::key_button(button, action, mods);\n  }\n};\n\nvoid Sample::updateProgramDefine() {}\n\nvoid Sample::getTransformPrograms(TransformSystem::Programs& xformPrograms)\n{\n  xformPrograms.transform_leaves = m_progManager.get(programs.transform_leaves);\n  xformPrograms.transform_level  = m_progManager.get(programs.transform_level);\n}\n\nvoid Sample::getCullPrograms(CullingSystem::Programs& cullprograms)\n{\n  cullprograms.bit_regular      = m_progManager.get(programs.cull_bit_regular);\n  cullprograms.bit_temporallast = m_progManager.get(programs.cull_bit_temporallast);\n  cullprograms.bit_temporalnew  = m_progManager.get(programs.cull_bit_temporalnew);\n  cullprograms.depth_mips       = m_progManager.get(programs.cull_depth_mips);\n  cullprograms.object_frustum   = m_progManager.get(programs.cull_object_frustum);\n  cullprograms.object_hiz       = m_progManager.get(programs.cull_object_hiz);\n  cullprograms.object_raster    = m_progManager.get(programs.cull_object_raster);\n}\n\nvoid Sample::getScanPrograms(ScanSystem::Programs& scanprograms)\n{\n  scanprograms.prefixsum = m_progManager.get(programs.scan_prefixsum);\n  scanprograms.offsets   = m_progManager.get(programs.scan_offsets);\n  scanprograms.combine   = m_progManager.get(programs.scan_combine);\n}\n\nbool Sample::initProgram()\n{\n  bool validated(true);\n  m_progManager.m_filetype = nvh::ShaderFileManager::FILETYPE_GLSL;\n  m_progManager.addDirectory(std::string(\"GLSL_\" PROJECT_NAME));\n  m_progManager.addDirectory(exePath() + std::string(PROJECT_RELDIRECTORY));\n\n  m_progManager.registerInclude(\"common.h\");\n\n  updateProgramDefine();\n\n  programs.draw_object =\n      m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"scene.vert.glsl\"),\n                                  nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, \"scene.frag.glsl\"));\n\n  programs.draw_object_tris = m_progManager.createProgram(\n      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"#define WIREMODE 0\\n\", \"scene.vert.glsl\"),\n      nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, \"#define WIREMODE 0\\n\", \"scene.frag.glsl\"));\n\n  programs.draw_object_line = m_progManager.createProgram(\n      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"#define WIREMODE 1\\n\", \"scene.vert.glsl\"),\n      nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, \"#define WIREMODE 1\\n\", \"scene.frag.glsl\"));\n\n  programs.draw_object_indexed = m_progManager.createProgram(\n      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"#define USE_INDEXING 1\\n\", \"scene.vert.glsl\"),\n      nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, \"#define USE_INDEXING 1\\n\", \"scene.frag.glsl\"));\n\n  programs.draw_object_indexed_tris = m_progManager.createProgram(\n      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"#define USE_INDEXING 1\\n#define WIREMODE 0\\n\",\n                                       \"scene.vert.glsl\"),\n      nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, \"#define USE_INDEXING 1\\n#define WIREMODE 0\\n\",\n                                       \"scene.frag.glsl\"));\n\n  programs.draw_object_indexed_line = m_progManager.createProgram(\n      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"#define USE_INDEXING 1\\n#define WIREMODE 1\\n\",\n                                       \"scene.vert.glsl\"),\n      nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, \"#define USE_INDEXING 1\\n#define WIREMODE 1\\n\",\n                                       \"scene.frag.glsl\"));\n\n\n  programs.cull_object_raster = m_progManager.createProgram(\n      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"#define DUALINDEX 1\\n#define MATRICES 4\\n\",\n                                       \"cull-raster.vert.glsl\"),\n      nvgl::ProgramManager::Definition(GL_GEOMETRY_SHADER, \"#define DUALINDEX 1\\n#define MATRICES 4\\n\",\n                                       \"cull-raster.geo.glsl\"),\n      nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, \"#define DUALINDEX 1\\n#define MATRICES 4\\n\",\n                                       \"cull-raster.frag.glsl\"));\n\n  programs.cull_object_frustum = m_progManager.createProgram(nvgl::ProgramManager::Definition(\n      GL_VERTEX_SHADER, \"#define DUALINDEX 1\\n#define MATRICES 4\\n\", \"cull-xfb.vert.glsl\"));\n\n  programs.cull_object_hiz = m_progManager.createProgram(nvgl::ProgramManager::Definition(\n      GL_VERTEX_SHADER, \"#define DUALINDEX 1\\n#define MATRICES 4\\n#define OCCLUSION\\n\", \"cull-xfb.vert.glsl\"));\n\n  programs.cull_bit_regular = m_progManager.createProgram(\n      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"#define TEMPORAL 0\\n\", \"cull-bitpack.vert.glsl\"));\n  programs.cull_bit_temporallast = m_progManager.createProgram(\n      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"#define TEMPORAL TEMPORAL_LAST\\n\", \"cull-bitpack.vert.glsl\"));\n  programs.cull_bit_temporalnew = m_progManager.createProgram(\n      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"#define TEMPORAL TEMPORAL_NEW\\n\", \"cull-bitpack.vert.glsl\"));\n\n  programs.cull_depth_mips =\n      m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"cull-downsample.vert.glsl\"),\n                                  nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, \"cull-downsample.frag.glsl\"));\n\n  programs.scan_prefixsum = m_progManager.createProgram(\n      nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, \"#define TASK TASK_SUM\\n\", \"scan.comp.glsl\"));\n  programs.scan_offsets = m_progManager.createProgram(\n      nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, \"#define TASK TASK_OFFSETS\\n\", \"scan.comp.glsl\"));\n  programs.scan_combine = m_progManager.createProgram(\n      nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, \"#define TASK TASK_COMBINE\\n\", \"scan.comp.glsl\"));\n\n  programs.transform_leaves =\n      m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, \"transform-leaves.comp.glsl\"));\n  programs.transform_level =\n      m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, \"transform-level.comp.glsl\"));\n\n  programs.xplode =\n      m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, \"xplode-animation.comp.glsl\"));\n\n  validated = m_progManager.areProgramsValid();\n\n  return validated;\n}\n\nbool Sample::initScene(const char* filename, int clones, int cloneaxis)\n{\n  m_scene.unload();\n\n  if(buffers.scene_ubo && has_GL_NV_shader_buffer_load)\n  {\n    glMakeNamedBufferNonResidentNV(buffers.scene_ubo);\n  }\n\n  nvgl::newBuffer(buffers.scene_ubo);\n  glNamedBufferStorage(buffers.scene_ubo, sizeof(SceneData), NULL, GL_DYNAMIC_STORAGE_BIT);\n\n  if(has_GL_NV_shader_buffer_load)\n  {\n    glGetNamedBufferParameterui64vNV(buffers.scene_ubo, GL_BUFFER_GPU_ADDRESS_NV, &addresses.scene_ubo);\n    glMakeNamedBufferResidentNV(buffers.scene_ubo, GL_READ_ONLY);\n  }\n\n  m_resources.sceneUbo  = buffers.scene_ubo;\n  m_resources.sceneAddr = addresses.scene_ubo;\n\n  m_resources.stateChangeID++;\n\n  bool status = m_scene.loadCSF(filename, clones, cloneaxis);\n\n  LOGI(\"\\nscene %s\\n\", filename);\n  LOGI(\"geometries: %6d\\n\", (uint32_t)m_scene.m_geometry.size());\n  LOGI(\"materials:  %6d\\n\", (uint32_t)m_scene.m_materials.size());\n  LOGI(\"nodes:      %6d\\n\", (uint32_t)m_scene.m_matrices.size());\n  LOGI(\"objects:    %6d\\n\", (uint32_t)m_scene.m_objects.size());\n  LOGI(\"\\n\");\n\n  return status;\n}\n\nbool Sample::initFramebuffers(int width, int height)\n{\n  bool layered = true;\n\n  if(!fbos.scene || m_tweak.msaa != m_lastTweak.msaa)\n  {\n    nvgl::newFramebuffer(fbos.scene);\n    nvgl::newFramebuffer(fbos.scene2);\n\n    m_resources.fbo  = fbos.scene;\n    m_resources.fbo2 = fbos.scene2;\n\n    m_resources.stateChangeID++;\n  }\n\n  if(layered)\n  {\n\n    if(has_GL_NV_bindless_texture && textures.scene_color)\n    {\n      glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_color));\n      glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_depthstencil));\n    }\n\n    nvgl::newTexture(textures.scene_color, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY);\n    nvgl::newTexture(textures.scene_depthstencil, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY);\n\n    if(m_tweak.msaa)\n    {\n      glTextureStorage3DMultisample(textures.scene_color, m_tweak.msaa, GL_RGBA8, width, height, 2, GL_TRUE);\n      glTextureStorage3DMultisample(textures.scene_depthstencil, m_tweak.msaa, GL_DEPTH24_STENCIL8, width, height, 2, GL_TRUE);\n    }\n    else\n    {\n      glTextureStorage3D(textures.scene_color, 1, GL_RGBA8, width, height, 2);\n      glTextureStorage3D(textures.scene_depthstencil, 1, GL_DEPTH24_STENCIL8, width, height, 2);\n    }\n\n    glNamedFramebufferTextureLayer(fbos.scene, GL_COLOR_ATTACHMENT0, textures.scene_color, 0, 0);\n    glNamedFramebufferTextureLayer(fbos.scene, GL_DEPTH_STENCIL_ATTACHMENT, textures.scene_depthstencil, 0, 0);\n\n    glNamedFramebufferTextureLayer(fbos.scene2, GL_COLOR_ATTACHMENT0, textures.scene_color, 0, 1);\n    glNamedFramebufferTextureLayer(fbos.scene2, GL_DEPTH_STENCIL_ATTACHMENT, textures.scene_depthstencil, 0, 1);\n\n    if(has_GL_NV_bindless_texture)\n    {\n      glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_color));\n      glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_depthstencil));\n    }\n  }\n  else\n  {\n\n    if(has_GL_NV_bindless_texture && textures.scene_color)\n    {\n      glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_color));\n      glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_depthstencil));\n      glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_color2));\n      glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_depthstencil2));\n    }\n\n    nvgl::newTexture(textures.scene_color, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D);\n    nvgl::newTexture(textures.scene_depthstencil, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D);\n\n    if(m_tweak.msaa)\n    {\n      glTextureStorage2DMultisample(textures.scene_color, 1, GL_RGBA8, width, height, GL_TRUE);\n      glTextureStorage2DMultisample(textures.scene_depthstencil, 1, GL_DEPTH24_STENCIL8, width, height, GL_TRUE);\n    }\n    else\n    {\n      glTextureStorage2D(textures.scene_color, 1, GL_RGBA8, width, height);\n      glTextureStorage2D(textures.scene_depthstencil, 1, GL_DEPTH24_STENCIL8, width, height);\n    }\n\n    glNamedFramebufferTexture(fbos.scene, GL_COLOR_ATTACHMENT0, textures.scene_color, 0);\n    glNamedFramebufferTexture(fbos.scene, GL_DEPTH_STENCIL_ATTACHMENT, textures.scene_depthstencil, 0);\n\n    nvgl::newTexture(textures.scene_color2, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D);\n    nvgl::newTexture(textures.scene_depthstencil2, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D);\n\n    if(m_tweak.msaa)\n    {\n      glTextureStorage2DMultisample(textures.scene_color2, 1, GL_RGBA8, width, height, GL_TRUE);\n      glTextureStorage2DMultisample(textures.scene_depthstencil2, 1, GL_DEPTH24_STENCIL8, width, height, GL_TRUE);\n    }\n    else\n    {\n      glTextureStorage2D(textures.scene_color2, 1, GL_RGBA8, width, height);\n      glTextureStorage2D(textures.scene_depthstencil2, 1, GL_DEPTH24_STENCIL8, width, height);\n    }\n\n    glNamedFramebufferTexture(fbos.scene2, GL_COLOR_ATTACHMENT0, textures.scene_color2, 0);\n    glNamedFramebufferTexture(fbos.scene2, GL_DEPTH_STENCIL_ATTACHMENT, textures.scene_depthstencil2, 0);\n\n    if(has_GL_NV_bindless_texture)\n    {\n      glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_color));\n      glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_depthstencil));\n      glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_color2));\n      glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_depthstencil2));\n    }\n  }\n\n  m_resources.fboTextureChangeID++;\n\n  return true;\n}\n\nvoid Sample::deinitRenderer()\n{\n  if(m_renderer)\n  {\n    m_renderer->deinit();\n    delete m_renderer;\n    m_renderer = NULL;\n  }\n}\n\nvoid Sample::initRenderer(int type, Strategy strategy)\n{\n  deinitRenderer();\n  Renderer::getRegistry()[m_renderersSorted[type]]->updatedPrograms(m_progManager);\n  m_renderer             = Renderer::getRegistry()[m_renderersSorted[type]]->create();\n  m_renderer->m_strategy = strategy;\n  m_renderer->init(&m_scene, m_resources);\n}\n\nbool Sample::begin()\n{\n  m_renderer      = NULL;\n  m_stateChangeID = 0;\n\n  ImGuiH::Init(m_windowState.m_winSize[0], m_windowState.m_winSize[1], this);\n  ImGui::InitGL();\n\n  glPixelStorei(GL_UNPACK_ALIGNMENT, 1);\n  glEnable(GL_CULL_FACE);\n  glEnable(GL_DEPTH_TEST);\n\n#if defined(NDEBUG)\n  setVsync(false);\n#endif\n\n  Renderer::s_bindless_ubo = !!m_contextWindow.extensionSupported(\"GL_NV_uniform_buffer_unified_memory\");\n  LOGI(\"\\nNV_uniform_buffer_unified_memory support: %s\\n\\n\", Renderer::s_bindless_ubo ? \"true\" : \"false\");\n\n  bool validated(true);\n\n  GLuint defaultVAO;\n  glGenVertexArrays(1, &defaultVAO);\n  glBindVertexArray(defaultVAO);\n\n  validated = validated && initProgram();\n  validated = validated && initScene(m_modelFilename.c_str(), 0, 3);\n  validated = validated && initFramebuffers(m_windowState.m_winSize[0], m_windowState.m_winSize[1]);\n\n\n  const Renderer::Registry registry = Renderer::getRegistry();\n  for(size_t i = 0; i < registry.size(); i++)\n  {\n    if(registry[i]->isAvailable())\n    {\n      if(!registry[i]->loadPrograms(m_progManager))\n      {\n        LOGE(\"Failed to load resources for renderer %s\\n\", registry[i]->name());\n        return false;\n      }\n\n      uint sortkey = uint(i);\n      sortkey |= registry[i]->priority() << 16;\n      m_renderersSorted.push_back(sortkey);\n    }\n  }\n\n  std::sort(m_renderersSorted.begin(), m_renderersSorted.end());\n\n  for(size_t i = 0; i < m_renderersSorted.size(); i++)\n  {\n    m_renderersSorted[i] &= 0xFFFF;\n\n    m_ui.enumAdd(GUI_RENDERER, int(i), registry[m_renderersSorted[i]]->name());\n  }\n\n  {\n    m_ui.enumAdd(GUI_STRATEGY, STRATEGY_INDIVIDUAL, \"drawcall individual\");\n    m_ui.enumAdd(GUI_STRATEGY, STRATEGY_JOIN, \"drawcall join\");\n    m_ui.enumAdd(GUI_STRATEGY, STRATEGY_GROUPS, \"material groups\");\n\n    m_ui.enumAdd(GUI_SHADE, SHADE_SOLID, toString(SHADE_SOLID));\n    m_ui.enumAdd(GUI_SHADE, SHADE_SOLIDWIRE, toString(SHADE_SOLIDWIRE));\n    m_ui.enumAdd(GUI_SHADE, SHADE_SOLIDWIRE_SPLIT, \"solid w edges (split test, only in sorted)\");\n\n    m_ui.enumAdd(GUI_MSAA, 0, \"none\");\n    m_ui.enumAdd(GUI_MSAA, 2, \"2x\");\n    m_ui.enumAdd(GUI_MSAA, 4, \"4x\");\n    m_ui.enumAdd(GUI_MSAA, 8, \"8x\");\n  }\n\n\n  m_control.m_sceneOrbit     = glm::vec3(m_scene.m_bbox.max + m_scene.m_bbox.min) * 0.5f;\n  m_control.m_sceneDimension = glm::length((m_scene.m_bbox.max - m_scene.m_bbox.min));\n  m_control.m_viewMatrix =\n      glm::lookAt(m_control.m_sceneOrbit - (-vec3(1, 1, 1) * m_control.m_sceneDimension * 0.5f * (float(m_tweak.zoom) / 100.0f)),\n                      m_control.m_sceneOrbit, vec3(0, 1, 0));\n\n  m_sceneUbo.wLightPos   = (m_scene.m_bbox.max + m_scene.m_bbox.min) * 0.5f + m_control.m_sceneDimension;\n  m_sceneUbo.wLightPos.w = 1.0;\n\n  updatedPrograms();\n\n  CullingSystem::Programs cullprogs;\n  getCullPrograms(cullprogs);\n  Renderer::s_cullsys.init(cullprogs, true);\n\n  ScanSystem::Programs scanprogs;\n  getScanPrograms(scanprogs);\n  Renderer::s_scansys.init(scanprogs);\n  //Renderer::s_scansys.test();\n\n  TransformSystem::Programs xformprogs;\n  getTransformPrograms(xformprogs);\n  m_transformSystem.init(xformprogs);\n\n\n  initRenderer(m_tweak.renderer, m_tweak.strategy);\n\n  return validated;\n}\n\nvoid Sample::processUI(double time)\n{\n  int width  = m_windowState.m_winSize[0];\n  int height = m_windowState.m_winSize[1];\n\n  // Update imgui configuration\n  auto& imgui_io       = ImGui::GetIO();\n  imgui_io.DeltaTime   = static_cast<float>(time - m_uiTime);\n  imgui_io.DisplaySize = ImVec2(static_cast<float>(width), static_cast<float>(height));\n\n  m_uiTime = time;\n\n  ImGui::NewFrame();\n  ImGui::SetNextWindowSize(ImGuiH::dpiScaled(350, 0), ImGuiCond_FirstUseEver);\n  if(ImGui::Begin(\"NVIDIA \" PROJECT_NAME, nullptr))\n  {\n    m_ui.enumCombobox(GUI_RENDERER, \"renderer\", &m_tweak.renderer);\n    m_ui.enumCombobox(GUI_STRATEGY, \"strategy\", &m_tweak.strategy);\n    m_ui.enumCombobox(GUI_SHADE, \"shademode\", &m_tweak.shade);\n    ImGui::Checkbox(\"xplode via GPU\", &m_tweak.animateActive);\n    ImGui::SliderFloat(\"xplode min\", &m_tweak.animateMin, 0, 16.0f);\n    ImGui::SliderFloat(\"xplode delta\", &m_tweak.animateDelta, 0, 16.0f);\n    ImGuiH::InputIntClamped(\"clones\", &m_tweak.clones, 0, 255, 1, 10, ImGuiInputTextFlags_EnterReturnsTrue);\n    ImGui::Checkbox(\"clone X\", &m_tweak.cloneaxisX);\n    ImGui::Checkbox(\"clone Y\", &m_tweak.cloneaxisY);\n    ImGui::Checkbox(\"clone Z\", &m_tweak.cloneaxisZ);\n    m_ui.enumCombobox(GUI_MSAA, \"msaa\", &m_tweak.msaa);\n  }\n  if(!m_tweak.cloneaxisX && !m_tweak.cloneaxisY && !m_tweak.cloneaxisZ)\n  {\n    m_tweak.cloneaxisX = true;\n  }\n\n  ImGui::End();\n}\n\nvoid Sample::updatedPrograms()\n{\n\n  CullingSystem::Programs cullprogs;\n  getCullPrograms(cullprogs);\n  Renderer::s_cullsys.update(cullprogs, true);\n\n  ScanSystem::Programs scanprogs;\n  getScanPrograms(scanprogs);\n  Renderer::s_scansys.update(scanprogs);\n\n  TransformSystem::Programs xformprogs;\n  getTransformPrograms(xformprogs);\n  m_transformSystem.update(xformprogs);\n\n  m_resources.programUbo     = m_progManager.get(programs.draw_object);\n  m_resources.programUboLine = m_progManager.get(programs.draw_object_line);\n  m_resources.programUboTris = m_progManager.get(programs.draw_object_tris);\n  m_resources.programIdx     = m_progManager.get(programs.draw_object_indexed);\n  m_resources.programIdxLine = m_progManager.get(programs.draw_object_indexed_line);\n  m_resources.programIdxTris = m_progManager.get(programs.draw_object_indexed_tris);\n\n  GLuint groupsizes[3];\n  glGetProgramiv(m_progManager.get(programs.xplode), GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupsizes);\n  m_xplodeGroupSize = groupsizes[0];\n\n  m_resources.stateChangeID++;\n}\n\nvoid Sample::think(double time)\n{\n  NV_PROFILE_GL_SECTION(\"Frame\");\n\n  processUI(time);\n\n  m_control.processActions({m_windowState.m_winSize[0], m_windowState.m_winSize[1]},\n                           glm::vec2(m_windowState.m_mouseCurrent[0], m_windowState.m_mouseCurrent[1]),\n                           m_windowState.m_mouseButtonFlags, m_windowState.m_mouseWheel);\n\n  if(m_windowState.onPress(KEY_R))\n  {\n    m_progManager.reloadPrograms();\n    Renderer::getRegistry()[m_tweak.renderer]->updatedPrograms(m_progManager);\n    updatedPrograms();\n  }\n\n  if(m_tweak.msaa != m_lastTweak.msaa)\n  {\n    initFramebuffers(m_windowState.m_winSize[0], m_windowState.m_winSize[1]);\n  }\n\n  if(m_tweak.clones != m_lastTweak.clones || m_tweak.cloneaxisX != m_lastTweak.cloneaxisX\n     || m_tweak.cloneaxisY != m_lastTweak.cloneaxisY || m_tweak.cloneaxisZ != m_lastTweak.cloneaxisZ)\n  {\n    deinitRenderer();\n    initScene(m_modelFilename.c_str(), m_tweak.clones,\n              (int(m_tweak.cloneaxisX) << 0) | (int(m_tweak.cloneaxisY) << 1) | (int(m_tweak.cloneaxisZ) << 2));\n  }\n\n  if(m_tweak.renderer != m_lastTweak.renderer || m_tweak.strategy != m_lastTweak.strategy\n     || m_tweak.cloneaxisX != m_lastTweak.cloneaxisX || m_tweak.cloneaxisY != m_lastTweak.cloneaxisY\n     || m_tweak.cloneaxisZ != m_lastTweak.cloneaxisZ || m_tweak.clones != m_lastTweak.clones)\n  {\n    initRenderer(m_tweak.renderer, m_tweak.strategy);\n  }\n\n  if(!m_tweak.animateActive && m_lastTweak.animateActive)\n  {\n    m_scene.resetMatrices();\n  }\n\n  m_lastTweak = m_tweak;\n\n  int width  = m_windowState.m_winSize[0];\n  int height = m_windowState.m_winSize[1];\n\n  {\n    // generic state setup\n    glViewport(0, 0, width, height);\n\n    if(m_tweak.shade == SHADE_SOLIDWIRE_SPLIT)\n    {\n      glBindFramebuffer(GL_FRAMEBUFFER, fbos.scene2);\n      glClearColor(0.2f, 0.2f, 0.2f, 0.0f);\n      glClearDepth(1.0);\n      glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);\n    }\n\n    glBindFramebuffer(GL_FRAMEBUFFER, fbos.scene);\n    glClearColor(0.2f, 0.2f, 0.2f, 0.0f);\n    glClearDepth(1.0);\n    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);\n\n    glEnable(GL_DEPTH_TEST);\n\n    m_sceneUbo.viewport = ivec2(width, height);\n\n    glm::mat4 projection = glm::perspectiveRH_ZO((45.f), float(width) / float(height),\n                                                  m_control.m_sceneDimension * 0.001f, m_control.m_sceneDimension * 10.0f);\n    glm::mat4 view       = m_control.m_viewMatrix;\n\n    m_sceneUbo.viewProjMatrix = projection * view;\n    m_sceneUbo.viewMatrix     = view;\n    m_sceneUbo.viewMatrixIT   = glm::transpose(glm::inverse(view));\n\n    m_sceneUbo.viewPos = glm::row(m_sceneUbo.viewMatrixIT, 3);\n    m_sceneUbo.viewDir = -glm::row(view,2);\n\n    m_sceneUbo.wLightPos   = glm::row(m_sceneUbo.viewMatrixIT, 3);\n    m_sceneUbo.wLightPos.w = 1.0;\n\n    m_sceneUbo.tboMatrices = uvec2(m_scene.m_matricesTexGLADDR & 0xFFFFFFFF, m_scene.m_matricesTexGLADDR >> 32);\n\n    glNamedBufferSubData(buffers.scene_ubo, 0, sizeof(SceneData), &m_sceneUbo);\n\n    glDisable(GL_CULL_FACE);\n  }\n\n  if(m_tweak.animateActive)\n  {\n    {\n      NV_PROFILE_GL_SECTION(\"Xplode\");\n\n      float  speed      = 0.5;\n      float  scale      = m_tweak.animateMin + (cosf(float(time) * speed) * 0.5f + 0.5f) * (m_tweak.animateDelta);\n      GLuint totalNodes = GLuint(m_scene.m_matrices.size());\n      GLuint groupsize  = m_xplodeGroupSize;\n\n      glUseProgram(m_progManager.get(programs.xplode));\n      glUniform1f(0, scale);\n      glUniform1i(1, totalNodes);\n\n      nvgl::bindMultiTexture(GL_TEXTURE0, GL_TEXTURE_BUFFER, m_scene.m_matricesOrigTexGL);\n      glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_scene.m_matricesGL);\n\n      glDispatchCompute((totalNodes + groupsize - 1) / groupsize, 1, 1);\n      glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);\n\n      nvgl::bindMultiTexture(GL_TEXTURE0, GL_TEXTURE_BUFFER, 0);\n      glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);\n      glUseProgram(0);\n    }\n\n    {\n      NV_PROFILE_GL_SECTION(\"Tree\");\n      TransformSystem::Buffer ids;\n      TransformSystem::Buffer world;\n      TransformSystem::Buffer object;\n\n      ids.buffer = m_scene.m_parentIDsGL;\n      ids.offset = 0;\n      ids.size   = sizeof(GLuint) * m_scene.m_matrices.size();\n\n      world.buffer = m_scene.m_matricesGL;\n      world.offset = 0;\n      world.size   = sizeof(CadScene::MatrixNode) * m_scene.m_matrices.size();\n\n      object.buffer = m_scene.m_matricesGL;\n      object.offset = 0;\n      object.size   = sizeof(CadScene::MatrixNode) * m_scene.m_matrices.size();\n\n      m_transformSystem.process(m_scene.m_nodeTree, ids, object, world);\n    }\n  }\n\n  {\n    NV_PROFILE_GL_SECTION(\"Render\");\n\n    m_resources.cullView.viewPos        = glm::value_ptr(m_sceneUbo.viewPos);\n    m_resources.cullView.viewDir        = glm::value_ptr(m_sceneUbo.viewDir);\n    m_resources.cullView.viewProjMatrix = glm::value_ptr(m_sceneUbo.viewProjMatrix);\n\n    m_renderer->draw(m_tweak.shade, m_resources, m_profiler, m_progManager);\n  }\n\n\n  {\n    NV_PROFILE_GL_SECTION(\"Blit\");\n\n\n    if(m_tweak.shade == SHADE_SOLIDWIRE_SPLIT)\n    {\n      glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);\n\n      int wh = width / 2;\n      int hh = height / 2;\n\n      glBindFramebuffer(GL_READ_FRAMEBUFFER, fbos.scene);\n      glBlitFramebuffer(0, 0, wh, hh, 0, 0, wh, hh, GL_COLOR_BUFFER_BIT, GL_NEAREST);\n      glBlitFramebuffer(wh, hh, width, height, wh, hh, width, height, GL_COLOR_BUFFER_BIT, GL_NEAREST);\n\n      glBindFramebuffer(GL_READ_FRAMEBUFFER, fbos.scene2);\n      glBlitFramebuffer(wh, 0, width, hh, wh, 0, width, hh, GL_COLOR_BUFFER_BIT, GL_NEAREST);\n      glBlitFramebuffer(0, hh, wh, height, 0, hh, wh, height, GL_COLOR_BUFFER_BIT, GL_NEAREST);\n    }\n    else\n    {\n      // blit to background\n      glBindFramebuffer(GL_READ_FRAMEBUFFER, fbos.scene);\n      glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);\n      glBlitFramebuffer(0, 0, width, height, 0, 0, width, height, GL_COLOR_BUFFER_BIT, GL_NEAREST);\n    }\n  }\n\n  if(!m_tweak.noUI)\n  {\n    NV_PROFILE_GL_SECTION(\"GUI\");\n    ImGui::Render();\n    ImGui::RenderDrawDataGL(ImGui::GetDrawData());\n  }\n\n  ImGui::EndFrame();\n\n  m_lastTweak = m_tweak;\n}\n\nvoid Sample::resize(int width, int height)\n{\n  initFramebuffers(width, height);\n}\n\nvoid Sample::setRendererFromName()\n{\n  if(!m_rendererName.empty())\n  {\n    const Renderer::Registry registry = Renderer::getRegistry();\n    for(size_t i = 0; i < m_renderersSorted.size(); i++)\n    {\n      if(strcmp(m_rendererName.c_str(), registry[m_renderersSorted[i]]->name()) == 0)\n      {\n        m_tweak.renderer = int(i);\n      }\n    }\n  }\n}\n\nstatic std::string addPath(std::string const& defaultPath, std::string const& filename)\n{\n  if(\n#ifdef _WIN32\n      filename.find(':') != std::string::npos\n#else\n      !filename.empty() && filename[0] == '/'\n#endif\n  )\n  {\n    return filename;\n  }\n  else\n  {\n    return defaultPath + \"/\" + filename;\n  }\n}\n\nstatic bool endsWith(std::string const& s, std::string const& end)\n{\n  if(s.length() >= end.length())\n  {\n    return (0 == s.compare(s.length() - end.length(), end.length(), end));\n  }\n  else\n  {\n    return false;\n  }\n}\n\nvoid Sample::setupConfigParameters()\n{\n  m_parameterList.addFilename(\".csf\", &m_modelFilename);\n  m_parameterList.addFilename(\".csf.gz\", &m_modelFilename);\n  m_parameterList.addFilename(\".gltf\", &m_modelFilename);\n\n  m_parameterList.add(\"noui\", &m_tweak.noUI, false);\n\n  m_parameterList.add(\"renderer\", (uint32_t*)&m_tweak.renderer);\n  m_parameterList.add(\"renderernamed\", &m_rendererName);\n  m_parameterList.add(\"strategy\", (uint32_t*)&m_tweak.strategy);\n  m_parameterList.add(\"shademode\", (uint32_t*)&m_tweak.shade);\n  m_parameterList.add(\"msaa\", &m_tweak.msaa);\n  m_parameterList.add(\"clones\", &m_tweak.clones);\n  m_parameterList.add(\"xplode\", &m_tweak.animateActive);\n  m_parameterList.add(\"zoom\", &m_tweak.zoom);\n}\n\n\nbool Sample::validateConfig()\n{\n  if(m_modelFilename.empty())\n  {\n    LOGI(\"no .csf model file specified\\n\");\n    LOGI(\"exe <filename.csf/cfg> parameters...\\n\");\n    m_parameterList.print();\n    return false;\n  }\n  return true;\n}\n\n}  // namespace csfviewer\n\nusing namespace csfviewer;\n\nint main(int argc, const char** argv)\n{\n  NVPSystem system(PROJECT_NAME);\n\n  Sample sample;\n\n  {\n    std::vector<std::string> directories;\n    directories.push_back(NVPSystem::exePath());\n    directories.push_back(NVPSystem::exePath() + \"/media\");\n    directories.push_back(NVPSystem::exePath() + std::string(PROJECT_DOWNLOAD_RELDIRECTORY));\n    sample.m_modelFilename = nvh::findFile(std::string(\"geforce.csf.gz\"), directories);\n  }\n\n  return sample.run(PROJECT_NAME, argc, argv, SAMPLE_SIZE_WIDTH, SAMPLE_SIZE_HEIGHT);\n}\n"
  },
  {
    "path": "cull-bitpack.vert.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 330\n/**/\n\n#define TEMPORAL_LAST 1\n#define TEMPORAL_NEW  2\n\n#ifndef TEMPORAL\n#define TEMPORAL 0\n#endif\n\n#extension GL_ARB_explicit_attrib_location : require\n#extension GL_ARB_shader_storage_buffer_object : enable\n\nlayout(location=0) in uvec4 instream[8];\n\n#if TEMPORAL\nlayout(location=9) in uint last;\n#endif\n\n#if GL_ARB_shader_storage_buffer_object\nlayout(std430,binding=0)  writeonly buffer outputBuffer {\n  uint outstream[];\n};\n\nvoid storeOutput(uint value)\n{\n  outstream[gl_VertexID] = value;\n}\n\n#else\nflat out uint outstream;\n\nvoid storeOutput(uint value)\n{\n  outstream= value;\n}\n#endif\n\nvoid main ()\n{\n  uint bits = 0u;\n  int outbit = 0;\n  for (int i = 0; i < 8; i++){\n    for (int n = 0; n < 4; n++, outbit++){\n      uint checkbytes = instream[i][n];\n      bits |= (checkbytes & 1u) << outbit;\n    }\n  }\n  \n#if TEMPORAL == TEMPORAL_LAST\n  // render what was visible in last frame and passes current test\n  bits &= last;\n#elif TEMPORAL == TEMPORAL_NEW\n  // render what was not visible in last frame (already rendered), but is now visible\n  bits &= (~last);\n#endif\n\n  storeOutput(bits);\n}\n"
  },
  {
    "path": "cull-downsample.frag.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 330 \n/**/\n\nuniform sampler2D depthTex;\nuniform int       depthLod;\nuniform bool      evenLod;\n\nin vec2 uv;\n\nvoid main()\n{\n  ivec2 lodSize = textureSize(depthTex,depthLod);\n  float depth = 0;\n  \n  if (evenLod){\n    ivec2 offsets[] = ivec2[](\n      ivec2(0,0),\n      ivec2(0,1),\n      ivec2(1,1),\n      ivec2(1,0)\n    );\n    ivec2 coord = ivec2(gl_FragCoord.xy);\n    coord *= 2;\n    \n    for (int i = 0; i < 4; i++){\n      depth = max(\n        depth, \n        texelFetch(depthTex,\n          clamp(coord + offsets[i], ivec2(0), lodSize - ivec2(1)),\n          depthLod).r );\n    }\n  }\n  else{\n    // need this to handle non-power of two\n    // very conservative\n    \n    vec2 offsets[] = vec2[](\n      vec2(-1,-1),\n      vec2( 0,-1),\n      vec2( 1,-1),\n      vec2(-1, 0),\n      vec2( 0, 0),\n      vec2( 1, 0),\n      vec2(-1, 1),\n      vec2( 0, 1),\n      vec2( 1, 1)\n    );\n    vec2 coord = uv;\n    vec2 texel = 1.0/(vec2(lodSize));\n    \n    for (int i = 0; i < 9; i++){\n      vec2 pos = coord + offsets[i] * texel;\n      depth = max(\n        depth, \n        #if 1\n        texelFetch(depthTex,\n          clamp(ivec2(pos * lodSize), ivec2(0), lodSize - ivec2(1)),\n          depthLod).r \n        #else\n        textureLod(depthTex,\n          pos,\n          depthLod).r \n        #endif\n        );\n    }\n  }\n\n  gl_FragDepth = depth;\n}\n"
  },
  {
    "path": "cull-downsample.vert.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 330\n/**/\n\nout vec2 uv;\n\nvoid main()\n{\n  vec4 pos =  vec4(\n      (float( gl_VertexID    &1)) * 4.0 - 1.0,\n      (float((gl_VertexID>>1)&1)) * 4.0 - 1.0,\n      0, 1.0);\n      \n  uv = pos.xy * 0.5 + 0.5;\n  \n  gl_Position = pos;\n}\n"
  },
  {
    "path": "cull-raster.frag.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 430\n/**/\n\nlayout(early_fragment_tests) in;\n\nlayout(std430,binding=0) buffer visibleBuffer {\n  int visibles[];\n};\n\nlayout(location=0,index=0) out vec4 out_Color;\n\nflat in int objid;\n\nvoid main (){\n  visibles[objid] = 1;\n  \n  out_Color = unpackUnorm4x8(uint(objid));\n}\n"
  },
  {
    "path": "cull-raster.geo.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 430\n/**/\n\n#ifndef MATRIX_WORLD\n#define MATRIX_WORLD    0\n#endif\n\n#ifndef MATRIX_WORLD_IT\n#define MATRIX_WORLD_IT 1\n#endif\n\n#ifndef MATRICES\n#define MATRICES        2\n#endif\n\n#ifndef FLIPWIND\n#define FLIPWIND        1\n#endif\n\n#ifndef PERSPECTIVE\n#define PERSPECTIVE     1\n#endif\n\n// render the 3 visible sides based on view direction and box normal\nlayout(points,invocations=3) in;  \n\n// one side each invocation\nlayout(triangle_strip,max_vertices=4) out;\n\nin VertexOut{\n  vec3 bboxCtr;\n  vec3 bboxDim;\n  flat int matrixIndex;\n  flat int objid;\n} IN[1];\n\nflat out int objid;\n\nuniform vec3 viewPos;\nuniform vec3 viewDir;\nuniform mat4 viewProjTM;\nuniform samplerBuffer matricesTex;\n\nvoid main()\n{\n\n  int  matindex = (IN[0].matrixIndex*MATRICES + MATRIX_WORLD)*4;\n  mat4 worldTM = mat4(\n    texelFetch(matricesTex,matindex + 0),\n    texelFetch(matricesTex,matindex + 1),\n    texelFetch(matricesTex,matindex + 2),\n    texelFetch(matricesTex,matindex + 3));\n\n  vec3 faceNormal = vec3(0);\n  vec3 edgeBasis0 = vec3(0);\n  vec3 edgeBasis1 = vec3(0);\n  \n  int id = gl_InvocationID;\n\n  if (id == 0)\n  {\n      faceNormal.x = IN[0].bboxDim.x;\n      edgeBasis0.y = IN[0].bboxDim.y;\n      edgeBasis1.z = IN[0].bboxDim.z;\n  }\n  else if(id == 1)\n  {\n      faceNormal.y = IN[0].bboxDim.y;\n      edgeBasis1.x = IN[0].bboxDim.x;\n      edgeBasis0.z = IN[0].bboxDim.z;\n  }\n  else if(id == 2)\n  {\n      faceNormal.z = IN[0].bboxDim.z;\n      edgeBasis0.x = IN[0].bboxDim.x;\n      edgeBasis1.y = IN[0].bboxDim.y;\n  }\n  \n  vec3 worldCtr = (worldTM * vec4(IN[0].bboxCtr, 1)).xyz;\n  \n#if PERSPECTIVE\n  vec3 worldNormal = mat3(worldTM) * faceNormal;\n  vec3 worldPos    = worldCtr + worldNormal;\n  float proj = sign(dot(worldPos - viewPos.xyz, worldNormal));\n#else\n  vec3 worldNormal = mat3(worldTM) * faceNormal;\n  float proj = sign(dot(viewDir,worldNormal));\n#endif\n  \n#if FLIPWIND\n  proj *= -1;\n#endif\n  \n  \n  faceNormal = mat3(worldTM) * (faceNormal) * proj;\n  edgeBasis0 = mat3(worldTM) * (edgeBasis0);\n  edgeBasis1 = mat3(worldTM) * (edgeBasis1) * proj;\n  \n#if FLIPWIND\n  objid = IN[0].objid;\n  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal - edgeBasis0 - edgeBasis1),1);\n  EmitVertex();\n  \n  objid = IN[0].objid;\n  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal + edgeBasis0 - edgeBasis1),1);\n  EmitVertex();\n  \n  objid = IN[0].objid;\n  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal - edgeBasis0 + edgeBasis1),1);\n  EmitVertex();\n  \n  objid = IN[0].objid;\n  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal + edgeBasis0 + edgeBasis1),1);\n  EmitVertex();\n  \n#else\n  objid = IN[0].objid;\n  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal - edgeBasis0 - edgeBasis1),1);\n  EmitVertex();\n  \n  objid = IN[0].objid;\n  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal - edgeBasis0 + edgeBasis1),1);\n  EmitVertex();\n  \n  objid = IN[0].objid;\n  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal + edgeBasis0 - edgeBasis1),1);\n  EmitVertex();\n  \n  objid = IN[0].objid;\n  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal + edgeBasis0 + edgeBasis1),1);\n  EmitVertex();\n#endif\n  \n}\n"
  },
  {
    "path": "cull-raster.vert.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 430\n/**/\n\n#ifndef MATRIX_WORLD\n#define MATRIX_WORLD    0\n#endif\n\n#ifndef MATRIX_WORLD_IT\n#define MATRIX_WORLD_IT 1\n#endif\n\n#ifndef MATRICES\n#define MATRICES        2\n#endif\n\nlayout(std430,binding=0) buffer visibleBuffer {\n  int visibles[];\n};\n\nuniform samplerBuffer matricesTex;\n\n#ifdef DUALINDEX\nlayout(location=0) in int  bboxIndex;\nlayout(location=2) in int  matrixIndex;\nuniform samplerBuffer     bboxesTex;\n\nvec4 bboxMin = texelFetch(bboxesTex, bboxIndex*2+0);\nvec4 bboxMax = texelFetch(bboxesTex, bboxIndex*2+1);\n#else\nlayout(location=0) in vec4 bboxMin;\nlayout(location=1) in vec4 bboxMax;\nlayout(location=2) in int  matrixIndex;\n#endif\n\nuniform vec3 viewPos;\n\nout VertexOut{\n  vec3 bboxCtr;\n  vec3 bboxDim;\n  flat int matrixIndex;\n  flat int objid;\n} OUT;\n\nvoid main()\n{\n  int objid = gl_VertexID;\n  vec3 ctr =((bboxMin + bboxMax)*0.5).xyz;\n  vec3 dim =((bboxMax - bboxMin)*0.5).xyz;\n  OUT.bboxCtr = ctr;\n  OUT.bboxDim = dim;\n  OUT.matrixIndex = matrixIndex;\n  OUT.objid = objid;\n  \n  {\n    // if camera is inside the bbox then none of our\n    // side faces will be visible, must treat object as \n    // visible\n    int matindex = (matrixIndex * MATRICES + MATRIX_WORLD_IT)*4;\n    mat4 worldInvTransTM = mat4(\n      texelFetch(matricesTex,matindex + 0),\n      texelFetch(matricesTex,matindex + 1),\n      texelFetch(matricesTex,matindex + 2),\n      texelFetch(matricesTex,matindex + 3));\n      \n    vec3 objPos = (vec4(viewPos,1) * worldInvTransTM).xyz;\n    objPos -= ctr;\n    if (all(lessThan(abs(objPos),dim))){\n      // inside bbox\n      visibles[objid] = 1;\n    }\n  }\n}\n"
  },
  {
    "path": "cull-tokencmds.vert.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 440\n/**/\n\n#define SCAN_BATCHSIZE 2048\n\nlayout(location=0) in uint  cmdOffset;\nlayout(location=1) in uint  cmdCullSize;\nlayout(location=2) in uint  cmdCullScan;\n\nuniform uint startOffset;\nuniform int  startID;\nuniform uint endOffset;\nuniform int  endID;\nuniform uint terminateCmd;\n\nlayout(std430,binding=0)  writeonly buffer outputBuffer {\n  uint outcmds[];\n};\n\nlayout(std430,binding=1)  readonly buffer commandBuffer {\n  uint incmds[];\n};\n\nlayout(std430,binding=2)  readonly buffer cullSizesBuffer {\n  uint cullSizes[];\n};\n\nlayout(std430,binding=3)  readonly buffer cullScanBuffer {\n  uint cullScan[];\n};\n\nlayout(std430,binding=4)  readonly buffer cullScanOffsetBuffer {\n  uint cullScanOffsets[];\n};\n\nuint getOffset( int id, uint scan, uint size, bool exclusive)\n{\n  int scanBatch = id / SCAN_BATCHSIZE;\n  uint  scanOffset  = scan;\n        scanOffset += scanBatch > 0 ? cullScanOffsets[ scanBatch-1] : 0;\n  \n  if (exclusive){\n    scanOffset -= size;\n  }\n  return scanOffset;\n}\n\nuint getOffset( int id, bool exclusive)\n{\n  return getOffset(id, cullScan[id], cullSizes[id], exclusive);\n}\n\nuint rebaseOffset(uint cullOffset)\n{\n  // where the current sequence starts\n  uint startCullOffset = getOffset(startID, true);\n\n  // rebase from where it should start\n  uint outOffset    = startOffset + (cullOffset - startCullOffset);\n  \n  return outOffset;\n}\n\n#define DEBUG 0\n\nvoid main ()\n{\n  if (cmdCullSize > 0)\n  {\n    // cullOffset goes across \"stateobject\" sequences\n    uint cullOffset = getOffset(gl_VertexID,cmdCullScan,cmdCullSize,true);\n  \n    uint outOffset  = rebaseOffset(cullOffset);\n    \n  #if DEBUG\n    outcmds[(gl_VertexID)*2+0] = outOffset;\n    outcmds[(gl_VertexID)*2+1] = cmdOffset;\n  #else\n    for (uint i = 0; i < cmdCullSize; i++){\n      outcmds[outOffset+i] = incmds[cmdOffset+i];\n    }\n  #endif\n  }\n#if DEBUG\n  else {\n    outcmds[(gl_VertexID)*2+0] = ~0;\n    outcmds[(gl_VertexID)*2+1] = cmdOffset;\n  }\n#endif\n\n  if (gl_VertexID == startID)\n  {\n    // add terminator if sequence not original\n    uint lastOffset = rebaseOffset( getOffset(endID, false) );\n    if (lastOffset != endOffset) {\n#if !DEBUG\n      outcmds[lastOffset] = terminateCmd;\n#endif\n    }\n    \n#if DEBUG && 0\n    outcmds[(startID)*2+0] = lastOffset;\n    outcmds[(startID)*2+1] = endOffset;\n#endif\n  }\n}\n"
  },
  {
    "path": "cull-tokensizes.vert.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 440\n/**/\n\nlayout(location=0) in uint  cmdSize;\nlayout(location=1) in int   cmdObject;\n\nlayout(std430,binding=0)  writeonly buffer outputBuffer {\n  uint outsizes[];\n};\n\nlayout(std430,binding=1)  readonly buffer visibleBuffer {\n  int visibles[];\n};\n\n#define DEBUG false\n\nvoid main ()\n{\n  if (cmdObject >= 0 && !DEBUG){\n    outsizes[gl_VertexID] = (visibles[cmdObject/32] & (1<<(cmdObject%32))) != 0 ? cmdSize : 0;\n  }\n  else{\n    outsizes[gl_VertexID] = cmdSize;\n  }\n}\n"
  },
  {
    "path": "cull-xfb.vert.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 330\n/**/\n\n#ifndef MATRIX_WORLD\n#define MATRIX_WORLD    0\n#endif\n\n#ifndef MATRIX_WORLD_IT\n#define MATRIX_WORLD_IT 1\n#endif\n\n#ifndef MATRICES\n#define MATRICES        2\n#endif\n\n#extension GL_ARB_explicit_attrib_location : require\n#extension GL_ARB_shader_storage_buffer_object : enable\n\n\n//#define OCCLUSION\n\n#ifdef DUALINDEX\nlayout(location=0) in int  bboxIndex;\nlayout(location=2) in int  matrixIndex;\n\nuniform samplerBuffer     bboxesTex;\nvec4 bboxMin = texelFetch(bboxesTex, bboxIndex*2+0);\nvec4 bboxMax = texelFetch(bboxesTex, bboxIndex*2+1);\n#else\nlayout(location=0) in vec4 bboxMin;\nlayout(location=1) in vec4 bboxMax;\nlayout(location=2) in int  matrixIndex;\n#endif\n\n#if GL_ARB_shader_storage_buffer_object\nlayout(std430,binding=0)  writeonly buffer outputBuffer {\n  int outstream[];\n};\n\nvoid storeOutput(int value)\n{\n  outstream[gl_VertexID] = value;\n}\n\n#else\nflat out int outstream;\n\nvoid storeOutput(int value)\n{\n  outstream = value;\n}\n#endif\n\nuniform mat4              viewProjTM;\nuniform samplerBuffer     matricesTex;\n\n#ifdef OCCLUSION\nuniform sampler2D         depthTex;\n#endif\n\nvec4 getBoxCorner(int n)\n{\n#if 1\n  bvec3 useMax = bvec3((n & 1) != 0, (n & 2) != 0, (n & 4) != 0);\n  return vec4(mix(bboxMin.xyz, bboxMax.xyz, useMax),1);\n#else\n  switch(n){\n  case 0:\n    return vec4(bboxMin.x,bboxMin.y,bboxMin.z,1);\n  case 1:\n    return vec4(bboxMax.x,bboxMin.y,bboxMin.z,1);\n  case 2:\n    return vec4(bboxMin.x,bboxMax.y,bboxMin.z,1);\n  case 3:\n    return vec4(bboxMax.x,bboxMax.y,bboxMin.z,1);\n  case 4:\n    return vec4(bboxMin.x,bboxMin.y,bboxMax.z,1);\n  case 5:\n    return vec4(bboxMax.x,bboxMin.y,bboxMax.z,1);\n  case 6:\n    return vec4(bboxMin.x,bboxMax.y,bboxMax.z,1);\n  case 7:\n    return vec4(bboxMax.x,bboxMax.y,bboxMax.z,1);\n  }\n#endif\n}\n\nvec3 projected(mat4 a, vec4 pos) {\n  vec4 hpos = (a * pos);\n  return hpos.xyz/hpos.w;\n}\n\nvoid main (){\n  int isvisible = 0;\n  int matindex = (matrixIndex*MATRICES + MATRIX_WORLD)*4;\n  mat4 worldTM = mat4(\n    texelFetch(matricesTex,matindex + 0),\n    texelFetch(matricesTex,matindex + 1),\n    texelFetch(matricesTex,matindex + 2),\n    texelFetch(matricesTex,matindex + 3));\n    \n  mat4 worldViewProjTM = (viewProjTM * worldTM);\n  \n  // clipspace bbox\n  vec3 clipmin  = projected(worldViewProjTM, getBoxCorner(0));\n  vec3 clipmax  = clipmin;\n\n  for (int n = 1; n < 8; n++){\n    vec3 ab = projected(worldViewProjTM, getBoxCorner(n));\n    clipmin = min(clipmin,ab);\n    clipmax = max(clipmax,ab);\n  }\n\n  isvisible = (\n    clipmin.x <= 1 &&\n    clipmin.y <= 1 &&\n    clipmin.z <= 1 &&\n    clipmax.x >= -1 &&\n    clipmax.y >= -1 &&\n    clipmax.z >= -1) ? 1 : 0;\n\n#ifdef OCCLUSION\n  if (isvisible != 0){\n    clipmin = clipmin * 0.5 + 0.5;\n    clipmax = clipmax * 0.5 + 0.5;\n    vec2 size = (clipmax.xy - clipmin.xy);\n    ivec2 texsize = textureSize(depthTex,0);\n    float maxsize = max(size.x, size.y) * float(max(texsize.x,texsize.y));\n    float miplevel = ceil(log2(maxsize));\n    \n    float depth = 0;\n    float a = textureLod(depthTex,clipmin.xy,miplevel).r;\n    float b = textureLod(depthTex,vec2(clipmax.x,clipmin.y),miplevel).r;\n    float c = textureLod(depthTex,clipmax.xy,miplevel).r;\n    float d = textureLod(depthTex,vec2(clipmin.x,clipmax.y),miplevel).r;\n    depth = max(depth,max(max(max(a,b),c),d));\n\n    isvisible =  clipmin.z <= depth ? 1 : 0;\n  }\n#endif\n\n  storeOutput(isvisible);\n}\n"
  },
  {
    "path": "cullingsystem.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include \"cullingsystem.hpp\"\n#include <assert.h>\n#include <string.h>\n\n#define DEBUG_VISIBLEBOXES  0\n\ninline unsigned int minDivide(unsigned int val, unsigned int alignment)\n{\n  return (val+alignment-1)/alignment;\n}\n\nvoid CullingSystem::init( const Programs &programs, bool dualindex )\n{\n  update(programs,dualindex);\n  glGenFramebuffers(1,&m_fbo);\n  glCreateTextures(GL_TEXTURE_BUFFER,2,m_tbo);\n}\n\nvoid CullingSystem::update( const Programs &programs, bool dualindex )\n{\n  m_programs = programs;\n  m_dualindex = dualindex;\n  m_useSSBO = has_GL_VERSION_4_2 != 0;\n  m_useRepesentativeTest = !!has_GL_NV_representative_fragment_test;\n\n  if (!m_useSSBO)\n  {\n    const char* xfbstreams[] = {\"outstream\"};\n    glTransformFeedbackVaryings(programs.bit_regular,1,xfbstreams,GL_INTERLEAVED_ATTRIBS);\n    glLinkProgram(programs.bit_regular);\n\n    glTransformFeedbackVaryings(programs.bit_temporallast,1,xfbstreams,GL_INTERLEAVED_ATTRIBS);\n    glLinkProgram(programs.bit_temporallast);\n\n    glTransformFeedbackVaryings(programs.bit_temporalnew,1,xfbstreams,GL_INTERLEAVED_ATTRIBS);\n    glLinkProgram(programs.bit_temporalnew);\n\n    glTransformFeedbackVaryings(programs.object_frustum,1,xfbstreams,GL_INTERLEAVED_ATTRIBS);\n    glLinkProgram(programs.object_frustum);\n\n    glTransformFeedbackVaryings(programs.object_hiz,1,xfbstreams,GL_INTERLEAVED_ATTRIBS);\n    glLinkProgram(programs.object_hiz);\n  }\n\n  glUseProgram(programs.depth_mips);\n  glUniform1i(glGetUniformLocation(programs.depth_mips,\"depthTex\"),0);\n  m_uniforms.depth_lod = glGetUniformLocation(programs.depth_mips,\"depthLod\");\n  m_uniforms.depth_even = glGetUniformLocation(programs.depth_mips,\"evenLod\");\n\n  glUseProgram(programs.object_frustum);\n  glUniform1i(glGetUniformLocation(programs.object_frustum,\"matricesTex\"),0);\n  if (dualindex){\n    glUniform1i(glGetUniformLocation(programs.object_frustum,\"bboxesTex\"),1);\n  }\n  m_uniforms.frustum_viewProj = glGetUniformLocation(programs.object_frustum, \"viewProjTM\");\n\n  glUseProgram(programs.object_hiz);\n  glUniform1i(glGetUniformLocation(programs.object_hiz,\"matricesTex\"),0);\n  if (dualindex){\n    glUniform1i(glGetUniformLocation(programs.object_frustum,\"bboxesTex\"),1);\n  }\n  glUniform1i(glGetUniformLocation(programs.object_hiz,\"depthTex\"),2);\n  m_uniforms.hiz_viewProj = glGetUniformLocation(programs.object_hiz, \"viewProjTM\");\n  \n  glUseProgram(programs.object_raster);\n  glUniform1i(glGetUniformLocation(programs.object_raster,\"matricesTex\"),0);\n  if (dualindex){\n    glUniform1i(glGetUniformLocation(programs.object_frustum,\"bboxesTex\"),1);\n  }\n  m_uniforms.raster_viewProj = glGetUniformLocation(programs.object_raster, \"viewProjTM\");\n  m_uniforms.raster_viewPos  = glGetUniformLocation(programs.object_raster, \"viewPos\");\n  m_uniforms.raster_viewDir  = glGetUniformLocation(programs.object_raster, \"viewDir\");\n\n  glUseProgram(0);\n}\n\nvoid CullingSystem::deinit()\n{\n  glDeleteFramebuffers(1,&m_fbo);\n  glDeleteTextures(2,m_tbo);\n}\n\nvoid CullingSystem::buildDepthMipmaps( GLuint textureDepth, int width, int height )\n{\n  int level = 0;\n  int dim = width > height ? width : height;\n  int twidth  = width;\n  int theight = height;\n  int wasEven = 0;\n\n  glBindFramebuffer(GL_FRAMEBUFFER,m_fbo);\n  glDepthFunc(GL_ALWAYS);\n  glUseProgram(m_programs.depth_mips);\n  glActiveTexture(GL_TEXTURE0);\n  glBindTexture(GL_TEXTURE_2D, textureDepth);\n\n\n  while (dim){\n    if (level){\n      twidth  = twidth < 1 ? 1 : twidth;\n      theight = theight < 1 ? 1 : theight;\n      glViewport(0,0,twidth,theight);\n      glFramebufferTexture2D(GL_FRAMEBUFFER,GL_DEPTH_STENCIL_ATTACHMENT,GL_TEXTURE_2D, textureDepth, level);\n      glUniform1i(m_uniforms.depth_lod, level-1);\n      glUniform1i(m_uniforms.depth_even, wasEven);\n\n      glDrawArrays(GL_TRIANGLES,0,3);\n    }\n\n    wasEven = (twidth % 2 == 0) && (theight % 2 == 0);\n    \n    dim       /=  2;\n    twidth    /=  2;\n    theight   /=  2;\n    level++;\n  }\n\n  glUseProgram(0);\n  glViewport(0,0,width,height);\n  glBindFramebuffer(GL_FRAMEBUFFER,0);\n  glBindTexture(GL_TEXTURE_2D, 0);\n  glDepthFunc(GL_LEQUAL);\n  glViewport(0,0,width,height);\n}\n\n\n\nvoid CullingSystem::testBboxes( Job &job, bool raster )\n{\n  // send the scene's bboxes as points stream\n\n  glBindBuffer(GL_ARRAY_BUFFER, job.m_bufferObjectBbox.buffer);\n  if (m_dualindex){\n    glVertexAttribIPointer(0, 1, GL_INT, job.m_bufferObjectBbox.stride, (const void*) job.m_bufferObjectBbox.offset);\n    glVertexAttribDivisor(0, 0);\n    glEnableVertexAttribArray(0);\n  }\n  else{\n    GLsizei stride = job.m_bufferObjectBbox.stride ? job.m_bufferObjectBbox.stride : sizeof(float)*4*2;\n    glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, stride, (const void*)job.m_bufferObjectBbox.offset);\n    glVertexAttribDivisor(0, 0);\n    glEnableVertexAttribArray(0);\n    glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, stride, (const void*)(sizeof(float)*4 + job.m_bufferObjectBbox.offset));\n    glVertexAttribDivisor(1, 0);\n    glEnableVertexAttribArray(1);\n  }\n  \n  glBindBuffer(GL_ARRAY_BUFFER, job.m_bufferObjectMatrix.buffer);\n  glVertexAttribIPointer(2, 1, GL_INT, job.m_bufferObjectMatrix.stride, (const void*) job.m_bufferObjectMatrix.offset);\n  glVertexAttribDivisor(2, 0);\n  glEnableVertexAttribArray(2);\n  glBindBuffer(GL_ARRAY_BUFFER, 0);\n  \n  glActiveTexture(GL_TEXTURE0);\n  glBindTexture(GL_TEXTURE_BUFFER, m_tbo[0]);\n  job.m_bufferMatrices.TexBuffer(GL_TEXTURE_BUFFER,GL_RGBA32F);\n\n  if (m_dualindex){\n    glActiveTexture(GL_TEXTURE1);\n    glBindTexture(GL_TEXTURE_BUFFER, m_tbo[1]);\n    job.m_bufferBboxes.TexBuffer(GL_TEXTURE_BUFFER,GL_RGBA32F);\n  }\n\n  if (raster){\n    if (m_useRepesentativeTest) {\n      glEnable( GL_REPRESENTATIVE_FRAGMENT_TEST_NV );\n    }\n#if !DEBUG_VISIBLEBOXES\n    glDepthMask(GL_FALSE);\n    glColorMask(GL_FALSE,GL_FALSE,GL_FALSE,GL_FALSE);\n#endif\n  }\n  else if (m_useSSBO){\n    glEnable(GL_RASTERIZER_DISCARD);\n    job.m_bufferVisOutput.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0);\n  }\n  else{\n    glEnable(GL_RASTERIZER_DISCARD);\n    // setup transform feedback\n    job.m_bufferVisOutput.BindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,0);\n    glBeginTransformFeedback(GL_POINTS);\n  }\n\n  glDrawArrays(GL_POINTS,0,job.m_numObjects);\n\n  if (raster){\n    if (m_useRepesentativeTest) {\n      glDisable( GL_REPRESENTATIVE_FRAGMENT_TEST_NV );\n    }\n#if !DEBUG_VISIBLEBOXES\n    glDepthMask(GL_TRUE);\n    glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE);\n#endif\n  }\n  else if (m_useSSBO){\n    glBindBufferBase(GL_SHADER_STORAGE_BUFFER,0,0);\n    glDisable(GL_RASTERIZER_DISCARD);\n  }\n  else{\n    glEndTransformFeedback();\n    glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER,0,0);\n    glDisable(GL_RASTERIZER_DISCARD);\n  }\n\n  if (m_dualindex){\n    glBindTexture(GL_TEXTURE_BUFFER, 0);\n    glActiveTexture(GL_TEXTURE0);\n  }\n  glBindTexture(GL_TEXTURE_BUFFER, 0);\n  \n  glDisableVertexAttribArray(0);\n  glDisableVertexAttribArray(1);\n  glDisableVertexAttribArray(2);\n  \n}\n\nvoid CullingSystem::bitsFromOutput( Job &job, BitType type)\n{\n  // for GL 3.3 compatibility we use xfb\n  // in GL 4.3 SSBO is used\n  // \n  // using compute instead of \"invisible\" point drawing\n  // would be better if we had really huge thread counts\n\n  glEnable(GL_RASTERIZER_DISCARD);\n\n  glBindBuffer(GL_ARRAY_BUFFER, job.m_bufferVisOutput.buffer);\n  for (int i = 0; i < 8; i++){\n    glVertexAttribIPointer(i, 4, GL_UNSIGNED_INT, sizeof(int)*32, (const void*)(i*sizeof(int)*4 + job.m_bufferVisOutput.offset));\n    glVertexAttribDivisor(i, 0);\n    glEnableVertexAttribArray(i);\n  }\n  \n  if (type == BITS_CURRENT){\n    glUseProgram(m_programs.bit_regular);\n  }\n  else{\n    glUseProgram(type == BITS_CURRENT_AND_LAST ? m_programs.bit_temporallast : m_programs.bit_temporalnew);\n\n    glBindBuffer(GL_ARRAY_BUFFER, job.m_bufferVisBitsLast.buffer);\n    glVertexAttribIPointer(9, 1, GL_UNSIGNED_INT, sizeof(int), (const void*)job.m_bufferVisBitsLast.offset);\n    glEnableVertexAttribArray(9);\n  }\n\n  if (m_useSSBO){\n    job.m_bufferVisBitsCurrent.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0);\n    glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);\n  }\n  else{\n    job.m_bufferVisBitsCurrent.BindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,0);\n    glBeginTransformFeedback(GL_POINTS);\n  }\n\n  glDrawArrays(GL_POINTS,0, minDivide(job.m_numObjects,32));\n\n  if (m_useSSBO){\n    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);\n    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);\n  }\n  else{\n    glEndTransformFeedback();\n    glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, 0);\n  }\n  \n  glDisableVertexAttribArray(9);\n  for (int i = 0; i < 8; i++){\n    glDisableVertexAttribArray(i);\n  }\n\n  glDisable(GL_RASTERIZER_DISCARD);\n  glBindBuffer(GL_ARRAY_BUFFER, 0);\n}\n\nvoid CullingSystem::resultFromBits( Job &job )\n{\n  job.resultFromBits(job.m_bufferVisBitsCurrent);\n}\n\nvoid CullingSystem::resultClient(Job &job)\n{\n  job.resultClient();\n}\n\nvoid CullingSystem::buildOutput( MethodType method, Job &job, const View& view )\n{\n  switch(method){\n  case METHOD_FRUSTUM:\n    {\n      glUseProgram(m_programs.object_frustum);\n      glUniformMatrix4fv(m_uniforms.frustum_viewProj, 1 ,GL_FALSE, view.viewProjMatrix);\n      \n      testBboxes(job,false);\n    }\n    break;\n  case METHOD_HIZ:\n    {\n      glUseProgram(m_programs.object_hiz);\n      glUniformMatrix4fv(m_uniforms.hiz_viewProj, 1, GL_FALSE, view.viewProjMatrix);\n      glActiveTexture(GL_TEXTURE2);\n      glBindTexture(GL_TEXTURE_2D,job.m_textureDepthWithMipmaps);\n      \n      testBboxes(job,false);\n      \n      glActiveTexture(GL_TEXTURE2);\n      glBindTexture(GL_TEXTURE_2D,0);\n      glActiveTexture(GL_TEXTURE0);\n    }\n    break;\n  case METHOD_RASTER:\n    {\n      // clear visibles\n      job.m_bufferVisOutput.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0);\n      glClearBufferData(GL_SHADER_STORAGE_BUFFER, GL_R32UI,GL_RED_INTEGER,GL_UNSIGNED_INT,0);\n\n      glUseProgram(m_programs.object_raster);\n      glUniformMatrix4fv(m_uniforms.raster_viewProj, 1, GL_FALSE, view.viewProjMatrix);\n      glUniform3fv(m_uniforms.raster_viewPos, 1, view.viewPos);\n      glUniform3fv(m_uniforms.raster_viewDir, 1, view.viewDir);\n      \n      glEnable( GL_POLYGON_OFFSET_FILL );\n      glPolygonOffset(-1,-1);\n      testBboxes(job,true);\n      glPolygonOffset(0,0);\n      glDisable( GL_POLYGON_OFFSET_FILL );\n\n      glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);\n\n      glBindBufferBase (GL_SHADER_STORAGE_BUFFER,0,0);\n    }\n    break;\n  }\n}\n\n\nvoid CullingSystem::swapBits( Job &job )\n{\n  Buffer temp = job.m_bufferVisBitsCurrent;\n  job.m_bufferVisBitsCurrent = job.m_bufferVisBitsLast;\n  job.m_bufferVisBitsLast = temp;\n}\n\n\nvoid CullingSystem::JobIndirectUnordered::resultFromBits( const Buffer& bufferVisBitsCurrent )\n{\n  glEnable(GL_RASTERIZER_DISCARD);\n\n  glUseProgram(m_program_indirect_compact);\n\n  m_bufferIndirectCounter.BindBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0);\n  m_bufferIndirectCounter.ClearBufferSubData (GL_ATOMIC_COUNTER_BUFFER, GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);\n\n  bufferVisBitsCurrent.   BindBufferRange(GL_SHADER_STORAGE_BUFFER, 2);\n  m_bufferObjectIndirects.BindBufferRange(GL_SHADER_STORAGE_BUFFER, 1);\n  m_bufferIndirectResult. BindBufferRange(GL_SHADER_STORAGE_BUFFER, 0);\n  m_bufferIndirectResult. ClearBufferSubData(GL_SHADER_STORAGE_BUFFER, GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);\n\n  glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);\n  glDrawArrays(GL_POINTS,0,m_numObjects);\n\n  glDisable(GL_RASTERIZER_DISCARD);\n  glBindBufferBase  (GL_ATOMIC_COUNTER_BUFFER, 0, 0);\n  glBindBufferBase  (GL_SHADER_STORAGE_BUFFER, 2, 0);\n  glBindBufferBase  (GL_SHADER_STORAGE_BUFFER, 1, 0);\n  glBindBufferBase  (GL_SHADER_STORAGE_BUFFER, 0, 0);\n}\n\nvoid CullingSystem::JobReadback::resultFromBits( const Buffer& bufferVisBitsCurrent )\n{\n  GLsizeiptr size = sizeof(int) * minDivide(m_numObjects,32);\n  glBindBuffer(GL_COPY_READ_BUFFER, bufferVisBitsCurrent.buffer );\n  glBindBuffer(GL_COPY_WRITE_BUFFER, m_bufferVisBitsReadback.buffer );\n  glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, bufferVisBitsCurrent.offset, m_bufferVisBitsReadback.offset, size);\n  glBindBuffer( GL_COPY_READ_BUFFER, 0 );\n  glBindBuffer( GL_COPY_WRITE_BUFFER, 0 );\n}\n\nvoid CullingSystem::JobReadback::resultClient()\n{\n  glBindBuffer(GL_COPY_WRITE_BUFFER, m_bufferVisBitsReadback.buffer);\n  glGetBufferSubData(GL_COPY_WRITE_BUFFER, m_bufferVisBitsReadback.offset, m_bufferVisBitsReadback.size, m_hostVisBits);\n  glBindBuffer( GL_COPY_WRITE_BUFFER, 0);\n}\n\nvoid CullingSystem::JobReadbackPersistent::resultFromBits(const Buffer& bufferVisBitsCurrent)\n{\n  GLsizeiptr size = sizeof( int ) * minDivide( m_numObjects, 32 );\n  glCopyNamedBufferSubData( bufferVisBitsCurrent.buffer, m_bufferVisBitsReadback.buffer, bufferVisBitsCurrent.offset, m_bufferVisBitsReadback.offset, size);\n  if (m_fence) {\n    glDeleteSync( m_fence );\n  }\n  m_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);\n}\n\nvoid CullingSystem::JobReadbackPersistent::resultClient()\n{\n  if (m_fence) {\n    GLsizeiptr size = sizeof( int ) * minDivide( m_numObjects, 32 );\n    // as some samples read-back within same frame (not recommended) we use the flush here, normally one wouldnt use it\n    glClientWaitSync(m_fence, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);\n    glDeleteSync(m_fence);\n    m_fence = NULL;\n    memcpy( m_hostVisBits, ((uint8_t*)m_bufferVisBitsMapping) + m_bufferVisBitsReadback.offset, size );\n  }\n}\n"
  },
  {
    "path": "cullingsystem.hpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#ifndef CULLINGSYSTEM_H__\n#define CULLINGSYSTEM_H__\n\n#include <cstddef>\n#include <cstdint>\n#include <nvgl/extensions_gl.hpp>\n\n\nclass CullingSystem {\npublic:\n  struct Programs {\n    GLuint  object_frustum;\n    GLuint  object_hiz;\n    GLuint  object_raster;\n\n    GLuint  bit_temporallast;\n    GLuint  bit_temporalnew;\n    GLuint  bit_regular;\n    GLuint  depth_mips;\n  };\n\n  enum MethodType {\n    METHOD_FRUSTUM,\n    METHOD_HIZ,\n    METHOD_RASTER,\n    NUM_METHODS,\n  };\n\n  enum BitType {\n    BITS_CURRENT,\n    BITS_CURRENT_AND_LAST,\n    BITS_CURRENT_AND_NOT_LAST,\n    NUM_BITS,\n  };\n\n  struct Buffer {\n    GLuint      buffer;\n    GLsizei     stride;\n    GLintptr    offset;\n    GLsizeiptr  size;\n\n    void create( size_t sizei, const void* data, GLbitfield flags )\n    {\n      size = sizei;\n      offset = 0;\n      stride = 0;\n      glCreateBuffers( 1, &buffer );\n      glNamedBufferStorage( buffer, size, data, flags );\n    }\n\n    Buffer( GLuint buffer, size_t sizei = 0 )\n      : buffer( buffer )\n      , offset( 0 )\n      , stride( 0 )\n    {\n      if (!sizei) {\n        if (sizeof( GLsizeiptr ) > 4)\n          glGetNamedBufferParameteri64v( buffer, GL_BUFFER_SIZE, (GLint64*)&size );\n        else\n          glGetNamedBufferParameteriv( buffer, GL_BUFFER_SIZE, (GLint*)&size );\n      }\n      else {\n        size = sizei;\n      }\n    }\n\n    Buffer()\n      : buffer(0)\n      , stride(0)\n      , offset(0)\n      , size(0)\n    {\n\n    }\n\n    inline void BindBufferRange(GLenum target, GLuint index) const {\n      glBindBufferRange(target, index, buffer, offset, size);\n    }\n    inline void TexBuffer(GLenum target, GLenum internalformat) const {\n      glTexBufferRange(target, internalformat, buffer, offset, size);\n    }\n    inline void ClearBufferSubData(GLenum target,GLenum internalformat,GLenum format,GLenum type,const GLvoid* data) const {\n      glClearBufferSubData(target,internalformat,offset,size,format,type,data);\n    }\n\n  };\n  \n  class Job {\n  public:\n    int     m_numObjects;\n      // world-space matrices {mat4 world, mat4 worldInverseTranspose}\n    Buffer  m_bufferMatrices;\n    Buffer  m_bufferBboxes; // only used in dualindex mode (2 x vec4)\n      // 1 32-bit integer per object (index)\n    Buffer  m_bufferObjectMatrix;\n      // object-space bounding box (2 x vec4)\n      // or 1 32-bit integer per object (dualindex mode)\n    Buffer  m_bufferObjectBbox;\n    \n      // 1 32-bit integer per object\n    Buffer  m_bufferVisOutput;\n    \n      // 1 32-bit integer per 32 objects (1 bit per object)\n    Buffer  m_bufferVisBitsCurrent;\n    Buffer  m_bufferVisBitsLast;\n    \n      // for HiZ\n    GLuint  m_textureDepthWithMipmaps;\n\n    // derive from this class and implement this function how you want to\n    // deal with the results that are provided in the buffer\n    virtual void resultFromBits( const Buffer& bufferVisBitsCurrent ) = 0;\n    // for readback methods we need to wait for a result\n    virtual void resultClient() {};\n\n  };\n\n  class JobReadback : public Job {\n  public:\n    // 1 32-bit integer per 32 objects (1 bit per object)\n    Buffer      m_bufferVisBitsReadback;\n    uint32_t*   m_hostVisBits;\n\n    // Do not use this Job class unless you have to. Persistent \n    // mapped buffers are preferred.\n\n    // Copies result into readback buffer\n    void resultFromBits( const Buffer& bufferVisBitsCurrent );\n\n    // getBufferData into hostVisBits (blocking!)\n    void resultClient();\n  };\n\n  class JobReadbackPersistent : public Job {\n  public:\n    // 1 32-bit integer per 32 objects (1 bit per object)\n    Buffer      m_bufferVisBitsReadback;\n    void*       m_bufferVisBitsMapping;\n    uint32_t*   m_hostVisBits;\n    GLsync      m_fence;\n\n    // Copies result into readback buffer and records\n    // a fence.\n    void resultFromBits(const Buffer& bufferVisBitsCurrent);\n\n    // waits on fence and copies mapping into hostVisBits\n    void resultClient();\n  };\n\n  // multidrawindirect based\n  class JobIndirectUnordered : public Job {\n  public:\n    GLuint  m_program_indirect_compact;\n    // 1 indirectSize per object, \n    Buffer  m_bufferObjectIndirects;\n    Buffer  m_bufferIndirectResult;\n    // 1 integer\n    Buffer  m_bufferIndirectCounter;\n\n    void resultFromBits( const Buffer& bufferVisBitsCurrent );\n  };\n  \n  struct View {\n    const float*  viewProjMatrix;\n    const float*  viewDir;\n    const float*  viewPos;\n  };\n  \n  void init( const Programs &programs, bool dualindex );\n  void deinit();\n  void update( const Programs &programs, bool dualindex );\n  \n  // helper function for HiZ method, leaves fbo bound to 0\n  void buildDepthMipmaps(GLuint textureDepth, int width, int height);\n  \n  // assumes relevant fbo bound for raster method\n  void buildOutput( MethodType  method, Job &job, const View& view );\n\n  void bitsFromOutput ( Job &job, BitType type );\n  void resultFromBits ( Job &job );\n  void resultClient   ( Job &job );\n\n  // swaps the Current/Last bit array (for temporal coherent techniques)\n  void swapBits       ( Job &job );\n\nprivate:\n\n  struct Uniforms {\n    GLint   depth_lod;\n    GLint   depth_even;\n    GLint   frustum_viewProj;\n    GLint   hiz_viewProj;\n    GLint   raster_viewProj;\n    GLint   raster_viewDir;\n    GLint   raster_viewPos;\n  };\n\n  void testBboxes( Job &job, bool raster);\n  \n  Programs  m_programs;\n  Uniforms  m_uniforms;\n  GLuint    m_fbo;\n  GLuint    m_tbo[2];\n  bool      m_dualindex;\n  bool      m_useSSBO;\n  bool      m_useRepesentativeTest;\n};\n\n#endif\n"
  },
  {
    "path": "nodetree.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include \"nodetree.hpp\"\n#include <assert.h>\n\n//////////////////////////////////////////////////////////////////////////\n\n\nstatic inline void clearNode(NodeTree::Node &node)\n{\n  node.level      = -1;\n  node.leafidx    = NodeTree::INVALID;\n  node.levelidx   = NodeTree::INVALID;\n  node.parentidx  = NodeTree::INVALID;\n  node.childidx   = NodeTree::INVALID;\n  node.siblingidx = NodeTree::INVALID;\n}\n\nNodeTree::NodeTree()\n{\n  m_levelsUsed = 0;\n  m_treeCompactChangeID = 0;\n  m_nodesActive = 0;\n\n  clearNode(m_root);\n  m_root.levelidx =  0;\n  m_root.level    = -1;\n}\n\nconst NodeTree::Level* NodeTree::getUsedLevel( int level ) const\n{\n  if (0 <= level && level < m_levelsUsed){\n    return &m_levels[level];\n  }\n  return nullptr;\n}\n\nunsigned int NodeTree::getTreeParentChangeID() const\n{\n  return m_treeCompactChangeID;\n}\n\nconst std::vector<NodeTree::compactID>& NodeTree::getTreeCompactNodes() const\n{\n  return m_treeCompactNodes;\n}\n\nNodeTree::nodeID NodeTree::createNode()\n{\n  nodeID id;\n\n  if (!m_unusedNodes.empty()){\n    id = m_unusedNodes[m_unusedNodes.size()-1];\n    m_unusedNodes.pop_back();\n  }\n  else{\n    Node node;\n    m_nodes.push_back(node);\n    m_treeCompactNodes.push_back(compactID());\n    id = (nodeID)(m_nodes.size()-1);\n  }\n\n  Node&  node = getNode(id);\n  clearNode(node);\n\n  return id;\n}\n\nvoid NodeTree::deleteNode( nodeID nodeidx )\n{\n  assert (isValid(nodeidx) && nodeidx != ROOT);\n\n  const Node &node = getNode(nodeidx);\n\n  // make children unlinked\n  while (isValid(node.childidx)){\n    setNodeParent(node.childidx,INVALID);\n  }\n\n  // remove self from parent list\n  setNodeParent(nodeidx,INVALID);\n\n  m_unusedNodes.push_back(nodeidx);\n}\n\nvoid NodeTree::setNodeParent( nodeID nodeidx, nodeID parentidx )\n{\n  assert (isValid(nodeidx) && nodeidx != ROOT);\n\n  Node &node = getNode(nodeidx);\n  if (node.parentidx == parentidx)\n    return;\n\n  if (isValid(node.parentidx)){\n    // unlink from old\n    Node& parent = getNode(node.parentidx);\n    bool found = false;\n    \n    if (parent.childidx == nodeidx){\n      parent.childidx = node.siblingidx;\n      found = true;\n    }\n    else if (isValid(parent.childidx)){\n      nodeID child = parent.childidx;\n      while(isValid(getNode(child).siblingidx)){\n        if (getNode(child).siblingidx == nodeidx){\n          getNode(child).siblingidx = node.siblingidx;\n          found = true;\n          break;\n        }\n        child = getNode(child).siblingidx;\n      }\n    }\n\n    assert(found && \"node was not a child of parent\");\n    node.siblingidx = INVALID;\n    updateLeafNode(node.parentidx);\n  }\n\n  if (isValid(parentidx)){\n    // link to new\n    Node& parent = getNode(parentidx);\n    node.siblingidx = parent.childidx;\n    parent.childidx = nodeidx;\n    updateLeafNode(node.parentidx);\n  }\n\n  if (isNodeInTree(nodeidx)){\n    updateLevelNode(nodeidx, isNodeInTree(parentidx) ? parentidx : INVALID);\n  }\n\n  node.parentidx = parentidx;\n}\n\nvoid NodeTree::addToTree( nodeID nodeidx )\n{\n  assert (isValid(nodeidx) && nodeidx != ROOT);\n\n  const Node &node = getNode(nodeidx);\n  assert (!isNodeInTree(nodeidx)        && \"must not be already added to tree\");\n  assert ( isNodeInTree(node.parentidx) && \"parent must be already added to tree\");\n\n  updateLevelNode(nodeidx,node.parentidx);\n}\n\nvoid NodeTree::removeFromTree( nodeID nodeidx )\n{\n  assert (isValid(nodeidx) && nodeidx != ROOT);\n  const Node &node = getNode(nodeidx);\n  assert (isNodeInTree(nodeidx) && \"must be already added to tree\");\n\n  updateLevelNode(nodeidx,INVALID);\n}\n\nvoid NodeTree::addToLevel( nodeID nodeidx, nodeID parentidx )\n{\n  Node&   node        = getNode(nodeidx);\n  const Node& parent  = getNode(parentidx);\n  Level&  level       = getLevel(parent.level+1);\n\n  level.changeID++;\n\n  node.levelidx = (lvlID)level.nodes.size();\n  node.level    = parent.level+1;\n  level.nodes.push_back(nodeidx);\n\n  if (!isValid(node.childidx)){\n    addLeafNode(nodeidx);\n  }\n\n  m_levelsUsed = node.level+1 > m_levelsUsed ? node.level+1 : m_levelsUsed;\n\n  m_nodesActive++;\n}\n\nvoid NodeTree::removeFromLevel( nodeID nodeidx )\n{\n  Node&   node  = getNode(nodeidx);\n  Level&  level = getLevel(node.level);\n\n  level.changeID++;\n\n  level.nodes[node.levelidx] = level.nodes[level.nodes.size()-1];\n  getNode(level.nodes[node.levelidx]).levelidx = node.levelidx;\n  level.nodes.pop_back();\n\n  if (isValid(node.leafidx)){\n    removeLeafNode(nodeidx);\n  }\n\n  if (node.level+1 == m_levelsUsed && level.nodes.empty()){\n    m_levelsUsed--;\n  }\n\n  node.level    = -1;\n  node.levelidx = INVALID;\n  node.leafidx  = INVALID;\n\n  m_nodesActive--;\n}\n\nvoid NodeTree::removeLeafNode( nodeID nodeidx )\n{\n  assert(isNodeInTree(nodeidx));\n  Node& node    = getNode(nodeidx);\n  Level& level  = getLevel(node.level);\n  // remove\n  level.leaves[node.leafidx] = level.leaves[level.leaves.size()-1];\n  getNode(level.leaves[node.leafidx]).leafidx = node.leafidx;\n  level.leaves.pop_back();\n}\n\nvoid NodeTree::addLeafNode( nodeID nodeidx )\n{\n  assert(isNodeInTree(nodeidx));\n  Node& node    = getNode(nodeidx);\n  Level& level  = getLevel(node.level);\n  // add\n  node.leafidx = (lvlID)level.leaves.size();\n  level.leaves.push_back(nodeidx);\n}\n\nvoid NodeTree::updateLeafNode( nodeID nodeidx )\n{\n  if (!isNodeInTree(nodeidx))\n    return;\n\n  Node& node    = getNode(nodeidx);\n  if (!isValid(node.childidx) && isValid(node.leafidx)){\n    removeLeafNode(nodeidx);\n  }\n  else if (isValid(node.childidx) && !isValid(node.leafidx)){\n    addLeafNode(nodeidx);\n  }\n}\n\nvoid NodeTree::updateLevelNode( nodeID nodeidx, nodeID parentidx )\n{\n  // at this point node.parentidx is still the old value\n  Node &node = getNode(nodeidx);\n\n  // update level parent buffer to reflect last state always\n  m_treeCompactNodes[nodeidx].parent = parentidx;\n  m_treeCompactChangeID++;\n\n  if (isValid(node.levelidx)){\n    // already active\n    if (isValid(parentidx)){\n      const Node& parent = getNode(parentidx);\n      int oldlevel = node.level;\n      int newlevel = parent.level + 1;\n\n      // we remain in the same level and only our parent has changed\n      if (oldlevel == newlevel){\n        return;\n      }\n\n      removeFromLevel(nodeidx);\n      addToLevel(nodeidx,parentidx);\n    }\n    else{\n      removeFromLevel(nodeidx);\n    }\n  }\n  else if (isValid(parentidx)){\n    // was inactive \n    // add to level\n    addToLevel(nodeidx,parentidx);\n  }\n\n  m_treeCompactNodes[nodeidx].level  = node.level;\n\n  nodeID child = node.childidx;\n  while (isValid(child)){\n    updateLevelNode(child, isValid(parentidx) ? nodeidx : INVALID );\n    child = getNode(child).siblingidx;\n  }\n}\n\nvoid NodeTree::reserve( int numNodes )\n{\n  m_nodes.reserve( numNodes );\n  m_treeCompactNodes.reserve( numNodes );\n}\n\nvoid NodeTree::create( int numNodes )\n{\n  Node node;\n  clearNode(node);\n\n  m_nodes.resize( numNodes, node );\n  m_treeCompactNodes.resize( numNodes, compactID() );\n}\n\nvoid NodeTree::clear()\n{\n  m_nodesActive = 0;\n  m_levelsUsed  = 0;\n  m_treeCompactChangeID = 0;\n  m_levels.clear();\n  m_nodes.clear();\n  m_treeCompactNodes.clear();\n}\n\n"
  },
  {
    "path": "nodetree.hpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#pragma once\n\n#include <vector>\n\nclass NodeTree {\npublic:\n  enum Flags {\n    INVALID = 0xFFFFFFFF,\n    ROOT = 0x7FFFFFFF,\n    LEVELBITS = 8,\n    PARENTBITS = 32 - LEVELBITS\n  };\n\n  static constexpr unsigned INVALID_LEVEL = (1 << LEVELBITS) - 1;\n  static constexpr unsigned INVALID_PARENT = (1 << PARENTBITS) - 1;\n\n  struct compactID {\n    unsigned level : LEVELBITS;\n    unsigned parent : PARENTBITS;\n\n    compactID(){\n      level = INVALID_LEVEL;\n      parent = INVALID_PARENT;\n    }\n  };\n  typedef unsigned int nodeID;\n  typedef unsigned int lvlID;\n\n\n  struct Level {\n    unsigned int          changeID;\n    std::vector<nodeID>   nodes;\n    std::vector<nodeID>   leaves;\n\n    Level(){\n      changeID = 0;\n    }\n  };\n\n  struct Node {\n    nodeID                parentidx;\n    lvlID                 levelidx;\n    lvlID                 leafidx;\n    int                   level;\n    nodeID                childidx;\n    nodeID                siblingidx;\n  };\n\nprivate:\n\n  Node                              m_root;\n\n  // general nodes\n  std::vector<Node>                 m_nodes;\n  std::vector<nodeID>               m_unusedNodes;\n\n  // actual nodes added to tree\n  std::vector<compactID>            m_treeCompactNodes;\n  std::vector<Level>                m_levels;\n  unsigned int                      m_treeCompactChangeID;\n  int                               m_nodesActive;\n  int                               m_levelsUsed;\n\npublic:\n  NodeTree();\n\n  const Level*  getUsedLevel(int level) const;\n  inline int getNumUsedLevel() const \n  {\n    return m_levelsUsed;\n  }\n\n  unsigned int getTreeParentChangeID() const;\n  const std::vector<compactID>& getTreeCompactNodes() const;\n\n  inline nodeID getTreeRoot()\n  {\n    return ROOT;\n  }\n\n  inline const Node& getNode(nodeID nodeidx) const\n  {\n    if (nodeidx == ROOT) return m_root;\n    else                 return m_nodes[nodeidx];\n  }\n\n  inline bool  isValid(unsigned int id)\n  {\n    return id != INVALID;\n  }\n\n  inline bool  isNodeInTree(nodeID nodeidx)\n  {\n    return isValid(nodeidx) && isValid(getNode(nodeidx).levelidx);\n  }\n\n  inline nodeID  getParentNode(nodeID nodeidx) const\n  {\n    return getNode(nodeidx).parentidx;\n  }\n\n  nodeID  createNode();\n\n  void    deleteNode(nodeID nodeidx);\n\n  void    setNodeParent(nodeID nodeidx, nodeID parentidx);\n\n  void    addToTree(nodeID nodeidx);\n\n  void    removeFromTree(nodeID nodeidx);\n\n  void    reserve(int numNodes);\n\n  void    create(int numNodes);\n\n  void    clear();\n\n  int     getNumActiveNodes() const {\n    return m_nodesActive;\n  }\n\nprivate:\n\n  inline Level& getLevel(int level)\n  {\n    if ((int)m_levels.size() < level+1){\n      m_levels.resize(level+1);\n    }\n    return m_levels[level];\n  }\n\n  inline Node& getNode(nodeID nodeidx)\n  {\n    if (nodeidx == ROOT) return m_root;\n    else                 return m_nodes[nodeidx];\n  }\n\n  void addToLevel(nodeID nodeidx, nodeID parentidx);\n\n  void removeFromLevel(nodeID nodeidx);\n\n  void removeLeafNode(nodeID nodeidx);\n\n  void addLeafNode(nodeID nodeidx);\n\n  void updateLeafNode(nodeID nodeidx);\n\n  void updateLevelNode(nodeID nodeidx, nodeID parentidx);\n\n};\n\n\n\n\n"
  },
  {
    "path": "nvtoken.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include \"nvtoken.hpp\"\n\nnamespace nvtoken\n{\n\n  //////////////////////////////////////////////////////////////////////////\n  // generic\n\n  GLuint   s_nvcmdlist_header[NVTOKEN_TYPES] = {0};\n  GLuint   s_nvcmdlist_headerSizes[NVTOKEN_TYPES] = {0};\n  GLushort s_nvcmdlist_stages[NVTOKEN_STAGES] = {0};\n  bool     s_nvcmdlist_bindless  = false;\n  \n  static inline GLuint nvtokenHeaderSW(GLuint type, GLuint size){\n    return type | (size<<16);\n  }\n  \n  static inline GLenum nvtokenHeaderCommandSW(GLuint header)\n  {\n    return header & 0xFFFF;\n  }\n\n  static inline GLuint nvtokenHeaderSizeSW(GLuint header)\n  {\n    return header>>16;\n  }\n\n  static inline GLenum nvtokenHeaderCommand(GLuint header)\n  {\n    for (int i = 0; i < NVTOKEN_TYPES; i++){\n      if (header == s_nvcmdlist_header[i]) return i;\n    }\n\n    assert(0 && \"can't find header\");\n    return -1;\n  }\n\n  template <class T>\n  static void nvtokenRegisterSize()\n  {\n    s_nvcmdlist_headerSizes[T::ID] = sizeof(T);\n  }\n\n  void nvtokenInitInternals( bool hwsupport, bool bindlessSupport)\n  {\n    assert( !hwsupport || (hwsupport && bindlessSupport) );\n\n    nvtokenRegisterSize<NVTokenTerminate>();\n    nvtokenRegisterSize<NVTokenNop>();\n    nvtokenRegisterSize<NVTokenDrawElems>();\n    nvtokenRegisterSize<NVTokenDrawArrays>();\n    nvtokenRegisterSize<NVTokenDrawElemsStrip>();\n    nvtokenRegisterSize<NVTokenDrawArraysStrip>();\n    nvtokenRegisterSize<NVTokenDrawElemsInstanced>();\n    nvtokenRegisterSize<NVTokenDrawArraysInstanced>();\n    nvtokenRegisterSize<NVTokenVbo>();\n    nvtokenRegisterSize<NVTokenIbo>();\n    nvtokenRegisterSize<NVTokenUbo>();\n    nvtokenRegisterSize<NVTokenLineWidth>();\n    nvtokenRegisterSize<NVTokenPolygonOffset>();\n    nvtokenRegisterSize<NVTokenScissor>();\n    nvtokenRegisterSize<NVTokenBlendColor>();\n    nvtokenRegisterSize<NVTokenViewport>();\n    nvtokenRegisterSize<NVTokenAlphaRef>();\n    nvtokenRegisterSize<NVTokenStencilRef>();\n    nvtokenRegisterSize<NVTokenFrontFace>();\n    \n    for (int i = 0; i < NVTOKEN_TYPES; i++){\n      GLuint sz = s_nvcmdlist_headerSizes[i];\n      assert(sz);\n    }\n    \n    s_nvcmdlist_bindless  = bindlessSupport;\n    \n    if (hwsupport){\n      for (int i = 0; i < NVTOKEN_TYPES; i++){\n        s_nvcmdlist_header[i] = glGetCommandHeaderNV(i,s_nvcmdlist_headerSizes[i]);\n      }\n      s_nvcmdlist_stages[NVTOKEN_STAGE_VERTEX] = glGetStageIndexNV(GL_VERTEX_SHADER);\n      s_nvcmdlist_stages[NVTOKEN_STAGE_TESS_CONTROL] = glGetStageIndexNV(GL_TESS_CONTROL_SHADER);\n      s_nvcmdlist_stages[NVTOKEN_STAGE_TESS_EVALUATION] = glGetStageIndexNV(GL_TESS_EVALUATION_SHADER);\n      s_nvcmdlist_stages[NVTOKEN_STAGE_GEOMETRY] = glGetStageIndexNV(GL_GEOMETRY_SHADER);\n      s_nvcmdlist_stages[NVTOKEN_STAGE_FRAGMENT] = glGetStageIndexNV(GL_FRAGMENT_SHADER);\n    }\n    else{\n      for (int i = 0; i < NVTOKEN_TYPES; i++){\n        s_nvcmdlist_header[i] = nvtokenHeaderSW(i,s_nvcmdlist_headerSizes[i]);\n      }\n      for (int i = 0; i < NVTOKEN_STAGES; i++){\n        s_nvcmdlist_stages[i] = i;\n      }\n    }\n  }\n\n#define TOSTRING(a)  case a: return #a;\n  const char* nvtokenCommandToString(GLenum type){\n    switch  (type){\n      TOSTRING(GL_NOP_COMMAND_NV                   );\n      TOSTRING(GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV);\n      TOSTRING(GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV  );\n      TOSTRING(GL_ELEMENT_ADDRESS_COMMAND_NV       );\n      TOSTRING(GL_ATTRIBUTE_ADDRESS_COMMAND_NV     );\n      TOSTRING(GL_UNIFORM_ADDRESS_COMMAND_NV       );\n      TOSTRING(GL_BLEND_COLOR_COMMAND_NV           );\n      TOSTRING(GL_STENCIL_REF_COMMAND_NV           );\n      TOSTRING(GL_TERMINATE_SEQUENCE_COMMAND_NV    );\n      TOSTRING(GL_LINE_WIDTH_COMMAND_NV            );\n      TOSTRING(GL_POLYGON_OFFSET_COMMAND_NV        );\n      TOSTRING(GL_ALPHA_REF_COMMAND_NV             );\n      TOSTRING(GL_VIEWPORT_COMMAND_NV              );\n      TOSTRING(GL_SCISSOR_COMMAND_NV               );\n      TOSTRING(GL_DRAW_ELEMENTS_COMMAND_NV         );\n      TOSTRING(GL_DRAW_ARRAYS_COMMAND_NV           );\n      TOSTRING(GL_DRAW_ELEMENTS_STRIP_COMMAND_NV   );\n      TOSTRING(GL_DRAW_ARRAYS_STRIP_COMMAND_NV     );\n    }\n    return NULL;\n  }\n\n  //////////////////////////////////////////////////////////////////////////\n\n\n  void nvtokenGetStats( const void* NV_RESTRICT stream, size_t streamSize, int stats[NVTOKEN_TYPES] )\n  {\n    const GLubyte* NV_RESTRICT current = (GLubyte*)stream;\n    const GLubyte* streamEnd = current + streamSize;\n\n    while (current < streamEnd){\n      const GLuint*             header  = (const GLuint*)current;\n\n      GLenum type = nvtokenHeaderCommand(*header);\n      stats[type]++;\n\n      current += s_nvcmdlist_headerSizes[type];\n    }\n  }\n\n\n  // Emulation related\n\n  static inline GLenum nvtokenDrawCommandSequenceSW( const void* NV_RESTRICT stream, size_t streamSize, GLenum mode, GLenum type, const StateSystem::State& state )\n  {\n    const GLubyte* NV_RESTRICT current = (GLubyte*)stream;\n    const GLubyte* streamEnd = current + streamSize;\n\n    GLenum modeStrip;\n    if      (mode == GL_LINES)                modeStrip = GL_LINE_STRIP;\n    else if (mode == GL_TRIANGLES)            modeStrip = GL_TRIANGLE_STRIP;\n    /*else if (mode == GL_QUADS)                modeStrip = GL_QUAD_STRIP;*/\n    else if (mode == GL_LINES_ADJACENCY)      modeStrip = GL_LINE_STRIP_ADJACENCY;\n    else if (mode == GL_TRIANGLES_ADJACENCY)  modeStrip = GL_TRIANGLE_STRIP_ADJACENCY;\n    else    modeStrip = mode;\n\n    GLenum modeSpecial;\n    if      (mode == GL_LINES)      modeSpecial = GL_LINE_LOOP;\n    else if (mode == GL_TRIANGLES)  modeSpecial = GL_TRIANGLE_FAN;\n    else    modeSpecial = mode;\n\n    while (current < streamEnd){\n      const GLuint*             header  = (const GLuint*)current;\n\n      GLenum cmdtype = nvtokenHeaderCommand(*header);\n      // if you always use emulation on non-native tokens you can use \n      // cmdtype = nvtokenHeaderCommandSW(header->encoded)\n      switch(cmdtype){\n      case GL_TERMINATE_SEQUENCE_COMMAND_NV:\n        {\n          return type;\n        }\n        break;\n      case GL_NOP_COMMAND_NV:\n        {\n        }\n        break;\n      case GL_DRAW_ELEMENTS_COMMAND_NV:\n        {\n          const DrawElementsCommandNV* cmd = (const DrawElementsCommandNV*)current;\n          glDrawElementsBaseVertex(mode, cmd->count, type, (const GLvoid*)(cmd->firstIndex * sizeof(GLuint)), cmd->baseVertex);\n        }\n        break;\n      case GL_DRAW_ARRAYS_COMMAND_NV:\n        {\n          const DrawArraysCommandNV* cmd = (const DrawArraysCommandNV*)current;\n          glDrawArrays(mode, cmd->first, cmd->count);\n        }\n        break;\n      case GL_DRAW_ELEMENTS_STRIP_COMMAND_NV:\n        {\n          const DrawElementsCommandNV* cmd = (const DrawElementsCommandNV*)current;\n          glDrawElementsBaseVertex(modeStrip, cmd->count, type, (const GLvoid*)(cmd->firstIndex * sizeof(GLuint)), cmd->baseVertex);\n        }\n        break;\n      case GL_DRAW_ARRAYS_STRIP_COMMAND_NV:\n        {\n          const DrawArraysCommandNV* cmd = (const DrawArraysCommandNV*)current;\n          glDrawArrays(modeStrip, cmd->first, cmd->count);\n        }\n        break;\n      case GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV:\n        {\n          const DrawElementsInstancedCommandNV* cmd = (const DrawElementsInstancedCommandNV*)current;\n\n          assert (cmd->mode == mode || cmd->mode == modeStrip || cmd->mode == modeSpecial);\n\n          glDrawElementsIndirect(cmd->mode, type, &cmd->count);\n        }\n        break;\n      case GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV:\n        {\n          const DrawArraysInstancedCommandNV* cmd = (const DrawArraysInstancedCommandNV*)current;\n\n          assert (cmd->mode == mode || cmd->mode == modeStrip || cmd->mode == modeSpecial);\n\n          glDrawArraysIndirect(cmd->mode, &cmd->count);\n        }\n        break;\n      case GL_ELEMENT_ADDRESS_COMMAND_NV:\n        {\n          const ElementAddressCommandNV* cmd = (const ElementAddressCommandNV*)current;\n          type = cmd->typeSizeInByte == 4 ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT;\n          if (s_nvcmdlist_bindless){\n            glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, GLuint64(cmd->addressLo) | (GLuint64(cmd->addressHi)<<32), 0x7FFFFFFF);\n          }\n          else{\n            const ElementAddressCommandEMU* cmd = (const ElementAddressCommandEMU*)current;\n            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, cmd->buffer);\n          }\n        }\n        break;\n      case GL_ATTRIBUTE_ADDRESS_COMMAND_NV:\n        {\n          if (s_nvcmdlist_bindless){\n            const AttributeAddressCommandNV* cmd = (const AttributeAddressCommandNV*)current;\n            glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, cmd->index, GLuint64(cmd->addressLo) | (GLuint64(cmd->addressHi)<<32), 0x7FFFFFFF);\n          }\n          else{\n            const AttributeAddressCommandEMU* cmd = (const AttributeAddressCommandEMU*)current;\n            glBindVertexBuffer(cmd->index, cmd->buffer, cmd->offset, state.vertexformat.bindings[cmd->index].stride);\n          }\n        }\n        break;\n      case GL_UNIFORM_ADDRESS_COMMAND_NV:\n        {\n           if (s_nvcmdlist_bindless){\n            const UniformAddressCommandNV* cmd = (const UniformAddressCommandNV*)current;\n            glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV, cmd->index, GLuint64(cmd->addressLo) | (GLuint64(cmd->addressHi)<<32), 0x10000);\n          }\n          else{\n            const UniformAddressCommandEMU* cmd = (const UniformAddressCommandEMU*)current;\n            glBindBufferRange(GL_UNIFORM_BUFFER,cmd->index, cmd->buffer, cmd->offset256 * 256, cmd->size4*4);\n          }\n        }\n        break;\n      case GL_BLEND_COLOR_COMMAND_NV:\n        {\n          const BlendColorCommandNV* cmd = (const BlendColorCommandNV*)current;\n          glBlendColor(cmd->red,cmd->green,cmd->blue,cmd->alpha);\n        }\n        break;\n      case GL_STENCIL_REF_COMMAND_NV:\n        {\n          const StencilRefCommandNV* cmd = (const StencilRefCommandNV*)current;\n          glStencilFuncSeparate(GL_FRONT, state.stencil.funcs[StateSystem::FACE_FRONT].func, cmd->frontStencilRef, state.stencil.funcs[StateSystem::FACE_FRONT].mask);\n          glStencilFuncSeparate(GL_BACK,  state.stencil.funcs[StateSystem::FACE_BACK ].func, cmd->backStencilRef,  state.stencil.funcs[StateSystem::FACE_BACK ].mask);\n        }\n        break;\n\n      case GL_LINE_WIDTH_COMMAND_NV:\n        {\n          const LineWidthCommandNV* cmd = (const LineWidthCommandNV*)current;\n          glLineWidth(cmd->lineWidth);\n        }\n        break;\n      case GL_POLYGON_OFFSET_COMMAND_NV:\n        {\n          const PolygonOffsetCommandNV* cmd = (const PolygonOffsetCommandNV*)current;\n          glPolygonOffset(cmd->scale,cmd->bias);\n        }\n        break;\n      case GL_ALPHA_REF_COMMAND_NV:\n        {/*\n          const AlphaRefCommandNV* cmd = (const AlphaRefCommandNV*)current;\n          glAlphaFunc(state.alpha.mode, cmd->alphaRef);\n          */\n        }\n        break;\n      case GL_VIEWPORT_COMMAND_NV:\n        {\n          const ViewportCommandNV* cmd = (const ViewportCommandNV*)current;\n          glViewport(cmd->x, cmd->y, cmd->width, cmd->height);\n        }\n        break;\n      case GL_SCISSOR_COMMAND_NV:\n        {\n          const ScissorCommandNV* cmd = (const ScissorCommandNV*)current;\n          glScissor(cmd->x,cmd->y,cmd->width,cmd->height);\n        }\n        break;\n      case GL_FRONT_FACE_COMMAND_NV:\n        {\n          FrontFaceCommandNV* cmd = (FrontFaceCommandNV*)current;\n          glFrontFace(cmd->frontFace?GL_CW:GL_CCW);\n        }\n        break;\n      }\n\n\n      GLuint tokenSize = s_nvcmdlist_headerSizes[cmdtype];\n      assert(tokenSize);\n\n      current += tokenSize;\n\n    }\n    return type;\n  }\n\n  void nvtokenDrawCommandsSW(GLenum mode, const void* NV_RESTRICT stream, size_t streamSize, \n    const GLintptr* NV_RESTRICT offsets, const GLsizei* NV_RESTRICT sizes, \n    GLuint count, \n    StateSystem::State &state)\n  {\n    const char* NV_RESTRICT tokens = (const char*)stream;\n    GLenum type = GL_UNSIGNED_SHORT;\n    for (GLuint i = 0; i < count; i++)\n    {\n      size_t offset = offsets[i];\n      size_t size   = sizes[i];\n\n      assert(size + offset <= streamSize);\n\n      type = nvtokenDrawCommandSequenceSW(&tokens[offset], size, mode, type, state);\n    }\n\n  }\n\n#if NVTOKEN_STATESYSTEM\n  void nvtokenDrawCommandsStatesSW(const void* NV_RESTRICT stream, size_t streamSize, \n    const GLintptr* NV_RESTRICT offsets, const GLsizei* NV_RESTRICT sizes, \n    const GLuint* NV_RESTRICT states, const GLuint* NV_RESTRICT fbos, GLuint count, \n    StateSystem &stateSystem)\n  {\n    int lastFbo = ~0;\n    const char* NV_RESTRICT tokens = (const char*)stream;\n\n    StateSystem::StateID lastID;\n\n    GLenum type = GL_UNSIGNED_SHORT;\n    for (GLuint i = 0; i < count; i++)\n    {\n      GLuint fbo;\n\n      StateSystem::StateID curID = states[i];\n      const StateSystem::State&  state = stateSystem.get(curID);\n\n      if (fbos[i]){\n        fbo = fbos[i];\n      }\n      else{\n        fbo = state.fbo.fboDraw;\n      }\n\n      if (fbo != lastFbo){\n        glBindFramebuffer(GL_FRAMEBUFFER, fbo);\n        lastFbo = fbo;\n      }\n\n      if (i == 0){\n        stateSystem.applyGL( curID, true ); // quite costly\n      }\n      else {\n        stateSystem.applyGL( curID, lastID, true );\n      }\n      lastID = curID;\n\n      size_t offset = offsets[i];\n      size_t size   = sizes[i];\n\n      GLenum mode = state.basePrimitiveMode;\n\n      assert(size + offset <= streamSize);\n\n      type = nvtokenDrawCommandSequenceSW(&tokens[offset], size, mode, type, state);\n    }\n  }\n#endif\n}\n"
  },
  {
    "path": "nvtoken.hpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n\n#include <assert.h>\n#include <string>\n#include <vector>\n\n#define NVTOKEN_STATESYSTEM 1\n\n#include \"platform.h\"\n#include <nvgl/extensions_gl.hpp>\n#if NVTOKEN_STATESYSTEM\n// not needed if emulation is not used, or implemented differently\n#include \"statesystem.hpp\"\n#else\nnamespace StateSystem {\n  // Minimal emulation layer\n  enum Faces {\n    FACE_FRONT,\n    FACE_BACK,\n    MAX_FACES,\n  };\n  struct State {\n    struct {\n      struct {\n        GLsizei stride;\n      }bindings[16];\n    }vertexformat;\n\n    struct {\n      GLenum mode;\n    }alpha;\n\n    struct {\n      struct {\n        GLenum func;\n        GLuint mask;\n      }funcs[MAX_FACES];\n    }stencil;\n  };\n}\n#endif\n\n\nnamespace nvtoken\n{\n\n  //////////////////////////////////////////////////////////////////////////\n  // generic\n\n  // not the cleanest way\n  #define NVTOKEN_TYPES (GL_FRONT_FACE_COMMAND_NV+1)\n\n  enum NVTokenShaderStage {\n    NVTOKEN_STAGE_VERTEX,\n    NVTOKEN_STAGE_TESS_CONTROL,\n    NVTOKEN_STAGE_TESS_EVALUATION,\n    NVTOKEN_STAGE_GEOMETRY,\n    NVTOKEN_STAGE_FRAGMENT,\n    NVTOKEN_STAGES,\n  };\n\n  extern bool     s_nvcmdlist_bindless;\n  extern GLuint   s_nvcmdlist_header[NVTOKEN_TYPES];\n  extern GLuint   s_nvcmdlist_headerSizes[NVTOKEN_TYPES];\n  extern GLushort s_nvcmdlist_stages[NVTOKEN_STAGES];\n  \n  class NVPointerStream {\n  public:\n    size_t          m_max;\n    unsigned char*  m_begin;\n    unsigned char*  m_end;\n    unsigned char* NV_RESTRICT m_cur;\n\n    void init(void* data, size_t size)\n    {\n      m_begin = (unsigned char*)data;\n      m_end   = m_begin + size;\n      m_cur   = m_begin;\n      m_max   = size;\n    }\n\n    size_t size() const\n    {\n      return m_cur - m_begin;\n    }\n\n    size_t  capacity() const\n    {\n      return m_max;\n    }\n  };\n\n  struct NVTokenSequence {\n    std::vector<GLintptr>  offsets;\n    std::vector<GLsizei>   sizes;\n    std::vector<GLuint>    states;\n    std::vector<GLuint>    fbos;\n  };\n\n#pragma pack(push,1)\n\n  typedef struct {\n    GLuint   header;\n    GLuint   buffer;\n    GLuint   _pad;\n    GLuint   typeSizeInByte;\n  } ElementAddressCommandEMU;\n\n  typedef struct {\n    GLuint   header;\n    GLuint   index;\n    GLuint   buffer;\n    GLuint   offset;\n  } AttributeAddressCommandEMU;\n\n  typedef struct {\n    GLuint      header;\n    GLushort    index;\n    GLushort    stage;\n    GLuint      buffer;\n    GLushort    offset256;\n    GLushort    size4;\n  } UniformAddressCommandEMU;\n\n\n  struct NVTokenNop {\n    static const GLenum   ID = GL_NOP_COMMAND_NV;\n\n    NOPCommandNV      cmd;\n\n    NVTokenNop() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n  };\n\n  struct NVTokenTerminate {\n    static const GLenum   ID = GL_TERMINATE_SEQUENCE_COMMAND_NV;\n\n    TerminateSequenceCommandNV      cmd;\n\n    NVTokenTerminate() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n  };\n\n  struct NVTokenDrawElemsInstanced {\n    static const GLenum   ID = GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV;\n\n    DrawElementsInstancedCommandNV   cmd;\n\n    NVTokenDrawElemsInstanced() {\n      cmd.mode = GL_TRIANGLES;\n      cmd.baseInstance = 0;\n      cmd.baseVertex = 0;\n      cmd.firstIndex = 0;\n      cmd.count = 0;\n      cmd.instanceCount = 1;\n\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n    \n    void setMode(GLenum primmode) {\n      cmd.mode = primmode;\n    }\n\n    void setParams(GLuint count, GLuint firstIndex=0, GLuint baseVertex=0)\n    {\n      cmd.count = count;\n      cmd.firstIndex = firstIndex;\n      cmd.baseVertex = baseVertex;\n    }\n\n    void setInstances(GLuint count, GLuint baseInstance=0){\n      cmd.baseInstance  = baseInstance;\n      cmd.instanceCount = count;\n    }\n  };\n\n  struct NVTokenDrawArraysInstanced {\n    static const GLenum   ID = GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV;\n\n    DrawArraysInstancedCommandNV          cmd;\n\n    NVTokenDrawArraysInstanced() {\n      cmd.mode = GL_TRIANGLES;\n      cmd.baseInstance = 0;\n      cmd.first = 0;\n      cmd.count = 0;\n      cmd.instanceCount = 1;\n\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n    \n    void setMode(GLenum primmode) {\n      cmd.mode = primmode;\n    }\n\n    void setParams(GLuint count, GLuint first=0)\n    {\n      cmd.count = count;\n      cmd.first = first;\n    }\n\n    void setInstances(GLuint count, GLuint baseInstance=0){\n      cmd.baseInstance  = baseInstance;\n      cmd.instanceCount = count;\n    }\n  };\n\n  struct NVTokenDrawElems {\n    static const GLenum   ID = GL_DRAW_ELEMENTS_COMMAND_NV;\n\n    DrawElementsCommandNV   cmd;\n\n    NVTokenDrawElems() {\n      cmd.baseVertex = 0;\n      cmd.firstIndex = 0;\n      cmd.count = 0;\n\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n\n    void setParams(GLuint count, GLuint firstIndex=0, GLuint baseVertex=0)\n    {\n      cmd.count = count;\n      cmd.firstIndex = firstIndex;\n      cmd.baseVertex = baseVertex;\n    }\n    \n    void setMode(GLenum primmode) {\n      assert(primmode != GL_TRIANGLE_FAN && /* primmode != GL_POLYGON && */ primmode != GL_LINE_LOOP);\n      \n      if (primmode == GL_LINE_STRIP || primmode == GL_TRIANGLE_STRIP || /* primmode == GL_QUAD_STRIP || */\n          primmode == GL_LINE_STRIP_ADJACENCY || primmode == GL_TRIANGLE_STRIP_ADJACENCY)\n      {\n        cmd.header = s_nvcmdlist_header[GL_DRAW_ELEMENTS_STRIP_COMMAND_NV];\n      }\n      else\n      {\n        cmd.header = s_nvcmdlist_header[GL_DRAW_ELEMENTS_COMMAND_NV];\n      }\n    }\n  };\n\n  struct NVTokenDrawArrays {\n    static const GLenum   ID = GL_DRAW_ARRAYS_COMMAND_NV;\n\n    DrawArraysCommandNV   cmd;\n\n    NVTokenDrawArrays() {\n      cmd.first = 0;\n      cmd.count = 0;\n\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n\n    void setParams(GLuint count, GLuint first=0)\n    {\n      cmd.count = count;\n      cmd.first = first;\n    }\n    \n    void setMode(GLenum primmode) {\n      assert(primmode != GL_TRIANGLE_FAN && /* primmode != GL_POLYGON && */ primmode != GL_LINE_LOOP);\n      \n      if (primmode == GL_LINE_STRIP || primmode == GL_TRIANGLE_STRIP || /* primmode == GL_QUAD_STRIP || */\n          primmode == GL_LINE_STRIP_ADJACENCY || primmode == GL_TRIANGLE_STRIP_ADJACENCY)\n      {\n        cmd.header = s_nvcmdlist_header[GL_DRAW_ARRAYS_STRIP_COMMAND_NV];\n      }\n      else\n      {\n        cmd.header = s_nvcmdlist_header[GL_DRAW_ARRAYS_COMMAND_NV];\n      }\n    }\n  };\n\n  struct NVTokenDrawElemsStrip {\n    static const GLenum   ID = GL_DRAW_ELEMENTS_STRIP_COMMAND_NV;\n\n    DrawElementsCommandNV   cmd;\n\n    NVTokenDrawElemsStrip() {\n      cmd.baseVertex = 0;\n      cmd.firstIndex = 0;\n      cmd.count = 0;\n\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n\n    void setParams(GLuint count, GLuint firstIndex=0, GLuint baseVertex=0)\n    {\n      cmd.count = count;\n      cmd.firstIndex = firstIndex;\n      cmd.baseVertex = baseVertex;\n    }\n  };\n\n  struct NVTokenDrawArraysStrip {\n    static const GLenum   ID = GL_DRAW_ARRAYS_STRIP_COMMAND_NV;\n\n    DrawArraysCommandNV   cmd;\n\n    NVTokenDrawArraysStrip() {\n      cmd.first = 0;\n      cmd.count = 0;\n\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n\n    void setParams(GLuint count, GLuint first=0)\n    {\n      cmd.count = count;\n      cmd.first = first;\n    }\n  };\n\n  struct NVTokenVbo {\n    static const GLenum   ID = GL_ATTRIBUTE_ADDRESS_COMMAND_NV;\n\n    union {\n      AttributeAddressCommandNV   cmd;\n      AttributeAddressCommandEMU  cmdEMU;\n    };\n\n    void setBinding(GLuint idx){\n      cmd.index = idx;\n    }\n\n    void setBuffer(GLuint buffer, GLuint64 address, GLuint offset)\n    {\n      if (s_nvcmdlist_bindless){\n        address += offset;\n        cmd.addressLo = GLuint(address & 0xFFFFFFFF);\n        cmd.addressHi = GLuint(address >> 32);\n      }\n      else{\n        cmdEMU.buffer = buffer;\n        cmdEMU.offset = offset;\n      }\n    }\n\n    NVTokenVbo() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n  };\n\n  struct NVTokenIbo {\n    static const GLenum   ID = GL_ELEMENT_ADDRESS_COMMAND_NV;\n\n    union{\n      ElementAddressCommandNV     cmd;\n      ElementAddressCommandEMU    cmdEMU;\n    };\n\n    void setType(GLenum type){\n      if (type == GL_UNSIGNED_BYTE){\n        cmd.typeSizeInByte = 1;\n      }\n      else if (type == GL_UNSIGNED_SHORT){\n        cmd.typeSizeInByte = 2;\n      }\n      else if (type == GL_UNSIGNED_INT){\n        cmd.typeSizeInByte = 4;\n      }\n      else{\n        assert(0 && \"illegal type\");\n      }\n    }\n\n    void setBuffer(GLuint buffer, GLuint64 address)\n    {\n      if (s_nvcmdlist_bindless){\n        cmd.addressLo = GLuint(address & 0xFFFFFFFF);\n        cmd.addressHi = GLuint(address >> 32);\n      }\n      else{\n        cmdEMU.buffer = buffer;\n        cmdEMU._pad   = 0;\n      }\n    }\n    \n    NVTokenIbo() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n  };\n\n  struct NVTokenUbo {\n    static const GLenum   ID = GL_UNIFORM_ADDRESS_COMMAND_NV;\n\n    union{\n      UniformAddressCommandNV   cmd;\n      UniformAddressCommandEMU  cmdEMU;\n    };\n\n    void setBuffer(GLuint buffer, GLuint64 address, GLuint offset, GLuint size)\n    {\n      assert(size % 4 == 0 && offset % 256 == 0);\n      if (s_nvcmdlist_bindless){\n        address += offset;\n        cmd.addressLo = GLuint(address & 0xFFFFFFFF);\n        cmd.addressHi = GLuint(address >> 32);\n      }\n      else{\n        cmdEMU.buffer = buffer;\n        cmdEMU.offset256 = offset / 256;\n        cmdEMU.size4     = size / 4;\n      }\n    }\n\n    void setBinding(GLuint idx, NVTokenShaderStage stage){\n      cmd.index = idx;\n      cmd.stage = s_nvcmdlist_stages[stage];\n    }\n    \n    NVTokenUbo() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n  };\n\n  struct NVTokenBlendColor{\n    static const GLenum   ID = GL_BLEND_COLOR_COMMAND_NV;\n\n    BlendColorCommandNV     cmd;\n\n    NVTokenBlendColor() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n  };\n\n  struct NVTokenStencilRef{\n    static const GLenum   ID = GL_STENCIL_REF_COMMAND_NV;\n\n    StencilRefCommandNV cmd;\n\n    NVTokenStencilRef() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n  } ;\n\n  struct NVTokenLineWidth{\n    static const GLenum   ID = GL_LINE_WIDTH_COMMAND_NV;\n\n    LineWidthCommandNV  cmd;\n\n    NVTokenLineWidth() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n  };\n\n  struct NVTokenPolygonOffset{\n    static const GLenum   ID = GL_POLYGON_OFFSET_COMMAND_NV;\n\n    PolygonOffsetCommandNV  cmd;\n\n    NVTokenPolygonOffset() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n  };\n\n  struct NVTokenAlphaRef{\n    static const GLenum   ID = GL_ALPHA_REF_COMMAND_NV;\n\n    AlphaRefCommandNV cmd;\n\n    NVTokenAlphaRef() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n  };\n\n  struct NVTokenViewport{\n    static const GLenum   ID = GL_VIEWPORT_COMMAND_NV;\n\n    ViewportCommandNV cmd;\n\n    NVTokenViewport() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n  };\n\n  struct NVTokenScissor {\n    static const GLenum   ID = GL_SCISSOR_COMMAND_NV;\n\n    ScissorCommandNV  cmd;\n\n    NVTokenScissor() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n  };\n\n  struct NVTokenFrontFace {\n    static const GLenum   ID = GL_FRONT_FACE_COMMAND_NV;\n\n    FrontFaceCommandNV  cmd;\n\n    NVTokenFrontFace() {\n      cmd.header  = s_nvcmdlist_header[ID];\n    }\n\n    void setFrontFace(GLenum winding){\n      cmd.frontFace = winding == GL_CCW;\n    }\n  };\n\n#pragma pack(pop)\n\n  template <class T>\n  void nvtokenMakeNop(T & token){\n    NVTokenNop *nop = (NVTokenNop*)&token;\n    for (size_t i = 0; i < (sizeof(T))/4; i++){\n      nop[i] = NVTokenNop();\n    }\n  }\n\n  template <class T>\n  size_t nvtokenEnqueue(std::string& queue, T& data)\n  {\n    size_t offset = queue.size();\n    std::string cmd = std::string((const char*)&data,sizeof(T));\n\n    queue += cmd;\n\n    return offset;\n  }\n\n  template <class T>\n  size_t nvtokenEnqueue(NVPointerStream& queue, T& data)\n  {\n    assert(queue.m_cur + sizeof(T) <= queue.m_end);\n    size_t offset = queue.m_cur - queue.m_begin;\n\n    memcpy(queue.m_cur,&data,sizeof(T));\n    queue.m_cur += sizeof(T);\n\n    return offset;\n  }\n  \n  //////////////////////////////////////////////////////////\n  \n  void        nvtokenInitInternals( bool hwsupport, bool bindlessSupport);\n  const char* nvtokenCommandToString( GLenum type );\n  void        nvtokenGetStats( const void* NV_RESTRICT stream, size_t streamSize, int stats[NVTOKEN_TYPES]);\n\n  void nvtokenDrawCommandsSW(GLenum mode, const void* NV_RESTRICT stream, size_t streamSize, \n    const GLintptr* NV_RESTRICT offsets, const GLsizei* NV_RESTRICT sizes, \n    GLuint count, \n    StateSystem::State &state);\n\n#if NVTOKEN_STATESYSTEM\n  void nvtokenDrawCommandsStatesSW(const void* NV_RESTRICT stream, size_t streamSize, \n    const GLintptr* NV_RESTRICT offsets, const GLsizei* NV_RESTRICT sizes, \n    const GLuint* NV_RESTRICT states, const GLuint* NV_RESTRICT fbos, GLuint count, \n    StateSystem &stateSystem);\n#endif\n}\n"
  },
  {
    "path": "renderer.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include <assert.h>\n#include <algorithm>\n#include \"renderer.hpp\"\n\n#include \"common.h\"\n\n#pragma pack(1)\n\n\nnamespace csfviewer\n{\n\n  //////////////////////////////////////////////////////////////////////////\n\n  bool Renderer::s_bindless_ubo = false;\n\n  CullingSystem   Renderer::s_cullsys;\n  ScanSystem      Renderer::s_scansys;\n\n  const char* toString( enum ShadeType st )\n  {\n    switch(st){\n    case SHADE_SOLID: return \"solid\";\n    case SHADE_SOLIDWIRE: return \"solid w edges\";\n    case SHADE_SOLIDWIRE_SPLIT: return \"solid w edges (split)\";\n    }\n\n    return NULL;\n  }\n\n\n  static void FillCache( std::vector<Renderer::DrawItem>& drawItems, const CadScene::Object& obj, const CadScene::Geometry& geo,  bool solid, int objectIndex ) \n  {\n    int begin = 0;\n    const CadScene::DrawRangeCache &cache = solid ? obj.cacheSolid : obj.cacheWire;\n\n    for (size_t s = 0; s < cache.state.size(); s++)\n    {\n      const CadScene::DrawStateInfo &state = cache.state[s];\n      for (int d = 0; d < cache.stateCount[s]; d++){\n        // evict\n        Renderer::DrawItem di;\n        di.geometryIndex = obj.geometryIndex;\n        di.matrixIndex   = state.matrixIndex;\n        di.materialIndex = state.materialIndex;\n        di.objectIndex   = objectIndex;\n\n        di.solid = solid;\n        di.range.offset = cache.offsets[begin + d];\n        di.range.count  = cache.counts [begin + d];\n\n        drawItems.push_back(di);\n      }\n      begin += cache.stateCount[s];\n    }\n  }\n\n  static void FillJoin( std::vector<Renderer::DrawItem>& drawItems, const CadScene::Object& obj, const CadScene::Geometry& geo,  bool solid, int objectIndex ) \n  {\n    CadScene::DrawRange range;\n\n    int lastMaterial = -1;\n    int lastMatrix   = -1;\n\n    for (size_t p = 0; p < obj.parts.size(); p++){\n      const CadScene::ObjectPart&   part = obj.parts[p];\n      const CadScene::GeometryPart& mesh = geo.parts[p];\n\n      if (!part.active) continue;\n\n      if (part.materialIndex != lastMaterial || part.matrixIndex != lastMatrix){\n\n        if (range.count){\n          // evict\n          Renderer::DrawItem di;\n          di.geometryIndex = obj.geometryIndex;\n          di.matrixIndex   = lastMatrix;\n          di.materialIndex = lastMaterial;\n          di.objectIndex   = objectIndex;\n\n          di.solid = solid;\n          di.range = range;\n\n          drawItems.push_back(di);\n        }\n\n        range = CadScene::DrawRange();\n\n        lastMaterial = part.materialIndex;\n        lastMatrix   = part.matrixIndex;\n      }\n\n      if (!range.count){\n        range.offset = solid ? mesh.indexSolid.offset : mesh.indexWire.offset;\n      }\n\n      range.count += solid ? mesh.indexSolid.count : mesh.indexWire.count;\n    }\n\n    // evict\n    Renderer::DrawItem di;\n    di.geometryIndex = obj.geometryIndex;\n    di.matrixIndex   = lastMatrix;\n    di.materialIndex = lastMaterial;\n    di.objectIndex   = objectIndex;\n\n    di.solid = solid;\n    di.range = range;\n\n    drawItems.push_back(di);\n  }\n\n  static void FillIndividual( std::vector<Renderer::DrawItem>& drawItems, const CadScene::Object& obj, const CadScene::Geometry& geo, bool solid, int objectIndex ) \n  {\n    for (size_t p = 0; p < obj.parts.size(); p++){\n      const CadScene::ObjectPart&   part = obj.parts[p];\n      const CadScene::GeometryPart& mesh = geo.parts[p];\n\n      if (!part.active) continue;\n\n      Renderer::DrawItem di;\n      di.geometryIndex = obj.geometryIndex;\n      di.matrixIndex   = part.matrixIndex;\n      di.materialIndex = part.materialIndex;\n      di.objectIndex   = objectIndex;\n\n      di.solid = solid;\n      di.range = solid ? mesh.indexSolid : mesh.indexWire;\n\n      drawItems.push_back(di);\n    }\n  }\n\n\n  void Renderer::fillDrawItems( std::vector<DrawItem>& drawItems, size_t from, size_t to, bool solid, bool wire )\n  {\n    const CadScene* NV_RESTRICT scene = m_scene;\n    for (size_t i = from; i < scene->m_objects.size() && i < to; i++){\n      const CadScene::Object& obj = scene->m_objects[i];\n      const CadScene::Geometry& geo = scene->m_geometry[obj.geometryIndex];\n\n      if (m_strategy == STRATEGY_GROUPS){\n        if (solid)  FillCache(drawItems, obj, geo, true,  int(i));\n        if (wire)   FillCache(drawItems, obj, geo, false, int(i));\n      }\n      else if (m_strategy == STRATEGY_JOIN) {\n        if (solid)  FillJoin(drawItems, obj, geo, true,  int(i));\n        if (wire)   FillJoin(drawItems, obj, geo, false, int(i));\n      }\n      else if (m_strategy == STRATEGY_INDIVIDUAL){\n        if (solid)  FillIndividual(drawItems, obj, geo, true,  int(i));\n        if (wire)   FillIndividual(drawItems, obj, geo, false, int(i));\n      }\n    }\n  }\n\n}\n\n\n\n"
  },
  {
    "path": "renderer.hpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#ifndef RENDERER_H__\n#define RENDERER_H__\n\n// bindless UBO\n#ifndef GL_UNIFORM_BUFFER_UNIFIED_NV\n#define GL_UNIFORM_BUFFER_UNIFIED_NV                        0x936E\n#endif\n#ifndef GL_UNIFORM_BUFFER_ADDRESS_NV\n#define GL_UNIFORM_BUFFER_ADDRESS_NV                        0x936F\n#endif\n#ifndef GL_UNIFORM_BUFFER_LENGTH_NV\n#define GL_UNIFORM_BUFFER_LENGTH_NV                         0x9370\n#endif\n\n#include \"cadscene.hpp\"\n#include <NvFoundation.h>\n#include <nvgl/programmanager_gl.hpp>\n#include <nvgl/base_gl.hpp>\n#include <nvh/profiler.hpp>\n#include \"cullingsystem.hpp\"\n#include \"scansystem.hpp\"\n\nnamespace csfviewer {\n  #define USE_NOFILTER           0  // some renderers support turning off redundancy filter\n\n  #define USE_WIRE_SHADERSWITCH  0  // If set we use two different shaders for tris and lines,\n                                    // otherwise we use an immediate mode vertexattrib as pseudo uniform toggle.\n                                    // Enable this to stress shader switching in app (becomes primary bottleneck)\n  enum Strategy {\n    STRATEGY_GROUPS,\n    STRATEGY_JOIN,\n    STRATEGY_INDIVIDUAL,\n  };\n\n  enum ShadeType {\n    SHADE_SOLID,\n    SHADE_SOLIDWIRE,\n    SHADE_SOLIDWIRE_SPLIT, // this mode is not \"sane\" it is only meant for performance testing of fbo toggles\n    NUM_SHADES,\n  };\n\n  const char* toString(enum ShadeType st);\n\n  struct Resources {\n    GLuint    sceneUbo;\n    GLuint64  sceneAddr;\n\n    GLuint    programUbo;\n    GLuint    programUboTris;\n    GLuint    programUboLine;\n\n    GLuint    programIdx;\n    GLuint    programIdxTris;\n    GLuint    programIdxLine;\n\n    GLuint    fbo;\n    GLuint    fbo2;\n\n    size_t    stateChangeID;\n    size_t    fboTextureChangeID;\n\n    CullingSystem::View cullView;\n\n    // ugly hack\n    mutable GLuint programUsed;\n    mutable GLuint programUsedTris;\n    mutable GLuint programUsedLine;\n\n    void usingUboProgram(bool ubo=true) const\n    {\n      programUsed     = ubo ? programUbo     : programIdx;\n      programUsedTris = ubo ? programUboTris : programIdxTris;\n      programUsedLine = ubo ? programUboLine : programIdxLine;\n    }\n\n    Resources() {\n      stateChangeID = 0;\n      fboTextureChangeID = 0;\n    }\n  };\n\n#if USE_WIRE_SHADERSWITCH\n  #define SetWireMode(state) glUseProgram((state) ? resources.programUsedLine : resources.programUsedTris )\n#else\n  #define SetWireMode(state) glVertexAttribI1i(VERTEX_WIREMODE,(state))\n#endif\n\n  class Renderer {\n  public:\n\n    struct DrawItem {\n      bool                solid;\n      int                 materialIndex;\n      int                 geometryIndex;\n      int                 matrixIndex;\n      int                 objectIndex;\n      CadScene::DrawRange range;\n    };\n\n    static bool DrawItem_compare_groups(const DrawItem& a, const DrawItem& b)\n    {\n      int diff = 0;\n      diff = diff != 0 ? diff : (a.solid == b.solid ? 0 : ( a.solid ? -1 : 1 ));\n      diff = diff != 0 ? diff : (a.materialIndex - b.materialIndex);\n      diff = diff != 0 ? diff : (a.geometryIndex - b.geometryIndex);\n      diff = diff != 0 ? diff : (a.matrixIndex - b.matrixIndex);\n\n      return diff < 0;\n    }\n\n    class Type {\n    public:\n      Type() {\n        getRegistry().push_back(this);\n      }\n\n    public:\n      virtual bool loadPrograms( nvgl::ProgramManager &mgr ) { return true; }\n      virtual void updatedPrograms( nvgl::ProgramManager &mgr ) { }\n      virtual bool isAvailable() const = 0;\n      virtual const char* name() const = 0;\n      virtual Renderer* create() const = 0;\n      virtual unsigned int priority() const { return 0xFF; } \n    };\n\n    typedef std::vector<Type*> Registry;\n\n    static bool s_bindless_ubo;\n    static Registry& getRegistry()\n    {\n      static Registry s_registry;\n      return s_registry;\n    }\n\n    static CullingSystem   s_cullsys;\n    static ScanSystem      s_scansys;\n\n  public:\n    virtual void init(const CadScene* NV_RESTRICT scene, const Resources& resources) {}\n    virtual void deinit() {}\n    virtual void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager ) {}\n    virtual ~Renderer() {}\n\n\n    void fillDrawItems( std::vector<DrawItem>& drawItems, size_t from, size_t to, bool solid, bool wire);\n\n    Strategy                    m_strategy;\n    const CadScene* NV_RESTRICT  m_scene;\n  };\n}\n\n#endif\n"
  },
  {
    "path": "rendererindexedmdi.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include <assert.h>\n#include <algorithm>\n#include \"renderer.hpp\"\n\n#include \"common.h\"\n\n#define USE_VERTEX_ASSIGNS  (!USE_BASEINSTANCE)\n#define USE_GPU_INDIRECT    1\n#define USE_CPU_INDIRECT    (!USE_GPU_INDIRECT)\n\nnamespace csfviewer\n{\n  //////////////////////////////////////////////////////////////////////////\n\n  class RendererIndexedMDI: public Renderer {\n  public:\n    class Type : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return true;\n      }\n      const char* name() const\n      {\n        return \"indexedmdi\";\n      }\n      Renderer* create() const\n      {\n        RendererIndexedMDI* renderer = new RendererIndexedMDI();\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 3;\n      }\n    };\n    class TypeVbum : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return !!has_GL_NV_vertex_buffer_unified_memory;\n      }\n      const char* name() const\n      {\n        return \"indexedmdi_bindless\";\n      }\n      Renderer* create() const\n      {\n        RendererIndexedMDI* renderer = new RendererIndexedMDI();\n        renderer->m_vbum = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 3;\n      }\n    };\n    class TypeSort : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return true;\n      }\n      const char* name() const\n      {\n        return \"indexedmdi_sorted\";\n      }\n      Renderer* create() const\n      {\n        RendererIndexedMDI* renderer = new RendererIndexedMDI();\n        renderer->m_sort = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 3;\n      }\n    };\n    class TypeSortVbum : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return !!has_GL_NV_vertex_buffer_unified_memory;\n      }\n      const char* name() const\n      {\n        return \"indexedmdi_sorted_bindless\";\n      }\n      Renderer* create() const\n      {\n        RendererIndexedMDI* renderer = new RendererIndexedMDI();\n        renderer->m_vbum = true;\n        renderer->m_sort = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 3;\n      }\n    };\n\n  private:\n    struct DrawIndirectGL {\n      GLuint count;\n      GLuint instanceCount;\n      GLuint firstIndex;\n      GLint  baseVertex;\n      GLuint baseInstance;\n\n      DrawIndirectGL ()\n        : count(0)\n        , instanceCount(1)\n        , firstIndex(0)\n        , baseVertex(0)\n        , baseInstance(0) {}\n    };\n\n    struct IndexedCommand {\n      DrawIndirectGL  cmd;\n    };\n\n    struct ShadeCommand {\n      std::vector<IndexedCommand> indirects;\n      std::vector<int>      assigns;\n\n      std::vector<size_t>   sizes;\n      std::vector<size_t>   offsets;\n      std::vector<int>      geometries;\n      std::vector<bool>     solids;\n\n#if USE_GPU_INDIRECT\n      GLuint    indirectGL;\n      GLuint64  indirectADDR;\n#endif\n\n#if USE_VERTEX_ASSIGNS\n      GLuint    assignGL;\n      GLuint64  assignADDR;\n#endif\n\n      ShadeCommand() {\n#if USE_GPU_INDIRECT\n        indirectGL = 0;\n#endif\n#if USE_VERTEX_ASSIGNS\n        assignGL = 0;\n#endif\n      }\n    };\n\n  public:\n    void init(const CadScene* NV_RESTRICT scene, const Resources& resources);\n    void deinit();\n    void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager);\n\n    bool                        m_vbum;\n    bool                        m_sort;\n\n\n    RendererIndexedMDI()\n      : m_vbum(false) \n      , m_sort(false)\n    {\n\n    }\n\n  private:\n\n    ShadeCommand    m_shades[NUM_SHADES];\n    \n    GLuint packBaseInstance( int matrixIndex, int materialIndex )\n    {\n      assert( materialIndex <= 0xFFF );\n      assert( matrixIndex   <= 0xFFFFF );\n      return (GLuint(matrixIndex) | (GLuint(materialIndex) << 20));\n    }\n\n    void GenerateIndirects(std::vector<DrawItem>& drawItems, ShadeType shade, const CadScene* NV_RESTRICT scene, const Resources& resources )\n    {\n      int lastMaterial = -1;\n      int lastGeometry = -1;\n      int lastMatrix   = -1;\n      bool lastSolid   = true;\n\n      ShadeCommand& sc = m_shades[shade];\n      sc.assigns.clear();\n      sc.indirects.clear();\n\n      sc.sizes.clear();\n      sc.offsets.clear();\n      sc.solids.clear();\n      sc.geometries.clear();\n\n      std::vector<int>& assigns = sc.assigns;\n      std::vector<IndexedCommand>& indirectStream = sc.indirects;\n\n      size_t begin = 0;\n\n      int numAssigns = 0;\n\n      for (int i = 0; i < drawItems.size(); i++){\n        const DrawItem& di = drawItems[i];\n\n        if (shade == SHADE_SOLID && !di.solid){\n          if (m_sort) break;\n          continue;\n        }\n\n        if (lastGeometry != di.geometryIndex || (shade == SHADE_SOLIDWIRE && di.solid != lastSolid)){\n          sc.offsets.push_back( begin );\n          sc.sizes.  push_back( GLsizei((indirectStream.size()-begin)) );\n          sc.solids. push_back( lastSolid );\n          sc.geometries.push_back( lastGeometry );\n\n          begin = indirectStream.size();\n        }\n\n#if USE_VERTEX_ASSIGNS\n        if (lastMatrix != di.matrixIndex || lastMaterial != di.materialIndex)\n        {\n          // push indices\n          assigns.push_back(di.matrixIndex);\n          assigns.push_back(di.materialIndex);\n          numAssigns++;\n\n          lastMatrix    = di.matrixIndex;\n          lastMaterial  = di.materialIndex;\n        }\n#endif\n\n        IndexedCommand drawelems;\n        drawelems.cmd.count = di.range.count;\n        drawelems.cmd.firstIndex = GLuint((di.range.offset )/sizeof(GLuint));\n#if USE_VERTEX_ASSIGNS\n        drawelems.cmd.baseInstance = numAssigns - 1;\n#else\n        drawelems.cmd.baseInstance = packBaseInstance(di.matrixIndex, di.materialIndex);\n#endif\n        indirectStream.push_back(drawelems);\n\n        lastGeometry = di.geometryIndex;\n        lastSolid = di.solid;\n      }\n\n      sc.offsets.push_back( begin );\n      sc.sizes.  push_back( GLsizei((indirectStream.size()-begin)) );\n      sc.solids. push_back( lastSolid );\n      sc.geometries.push_back( lastGeometry );\n    }\n\n  };\n\n  static RendererIndexedMDI::Type s_indexed;\n  static RendererIndexedMDI::TypeVbum s_indexed_vbum;\n  static RendererIndexedMDI::TypeSort s_indexedsort;\n  static RendererIndexedMDI::TypeSortVbum s_indexedsort_vbum;\n\n  void RendererIndexedMDI::init( const CadScene* NV_RESTRICT scene, const Resources& resources )\n  {\n    m_scene = scene;\n    resources.usingUboProgram(false);\n\n    std::vector<DrawItem> drawItems;\n\n    fillDrawItems(drawItems,0,scene->m_objects.size(), true, true);\n\n    if (m_sort){\n      std::sort(drawItems.begin(),drawItems.end(),DrawItem_compare_groups);\n    }\n\n    // build SC\n\n    GenerateIndirects(drawItems, SHADE_SOLID, scene, resources);\n    GenerateIndirects(drawItems, SHADE_SOLIDWIRE, scene, resources);\n\n    for (size_t i = 0; i <= SHADE_SOLIDWIRE; i++){\n      ShadeCommand& sc = m_shades[i];\n#if USE_GPU_INDIRECT\n      glCreateBuffers(1,&sc.indirectGL);\n      glNamedBufferStorage( sc.indirectGL, sizeof(IndexedCommand) * sc.indirects.size(), &sc.indirects[0], 0 );\n      if (m_vbum){\n        glGetNamedBufferParameterui64vNV(sc.indirectGL, GL_BUFFER_GPU_ADDRESS_NV, &sc.indirectADDR);\n        glMakeNamedBufferResidentNV(sc.indirectGL, GL_READ_ONLY);\n      }\n#endif\n#if USE_VERTEX_ASSIGNS\n      glCreateBuffers(1,&sc.assignGL);\n      glNamedBufferStorage( sc.assignGL, sizeof(int) * sc.assigns.size(), &sc.assigns[0], 0 );\n      if (m_vbum){\n        glGetNamedBufferParameterui64vNV(sc.assignGL, GL_BUFFER_GPU_ADDRESS_NV, &sc.assignADDR);\n        glMakeNamedBufferResidentNV(sc.assignGL, GL_READ_ONLY);\n      }\n#endif\n    }\n\n    m_shades[SHADE_SOLIDWIRE_SPLIT] = m_shades[SHADE_SOLIDWIRE];\n\n  }\n\n  void RendererIndexedMDI::deinit()\n  {\n    for (size_t i = 0; i < SHADE_SOLIDWIRE; i++){\n      ShadeCommand& sc = m_shades[i];\n      if (m_vbum){\n#if USE_GPU_INDIRECT\n        glMakeNamedBufferNonResidentNV(sc.indirectGL);\n#endif\n#if USE_VERTEX_ASSIGNS\n        glMakeNamedBufferNonResidentNV(sc.assignGL);\n#endif\n      }\n#if USE_GPU_INDIRECT\n      glDeleteBuffers(1,&sc.indirectGL);\n#endif\n#if USE_VERTEX_ASSIGNS\n      glDeleteBuffers(1,&sc.assignGL);\n#endif\n    }\n  }\n\n  void RendererIndexedMDI::draw( ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager )\n  {\n    const CadScene* NV_RESTRICT scene = m_scene;\n    bool vbum = m_vbum;\n\n    scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL);\n\n    glUseProgram(resources.programIdx);\n\n    if (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT){\n      glEnable(GL_POLYGON_OFFSET_FILL);\n      glPolygonOffset(1,1);\n    }\n\n    SetWireMode(GL_FALSE);\n\n#if USE_VERTEX_ASSIGNS\n    glVertexAttribIFormat(VERTEX_ASSIGNS,2,GL_INT,0);\n    glVertexAttribBinding(VERTEX_ASSIGNS,1);\n    glEnableVertexAttribArray(VERTEX_ASSIGNS);\n    glBindVertexBuffer(1,0,0,sizeof(GLint)*2);\n    glVertexBindingDivisor(1,1);\n#endif\n    if (vbum){\n      glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);\n      glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);\n#if USE_GPU_INDIRECT\n      glEnableClientState(GL_DRAW_INDIRECT_UNIFIED_NV);\n#endif\n    }\n    if (vbum && s_bindless_ubo){\n      glEnableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV);\n      glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV, UBO_MATERIAL, scene->m_materialsADDR, sizeof(CadScene::Material) * scene->m_materials.size() );\n      glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV, UBO_SCENE,resources.sceneAddr,sizeof(SceneData));\n    }\n    else{\n      glBindBufferBase(GL_UNIFORM_BUFFER, UBO_SCENE, resources.sceneUbo);\n      glBindBufferBase(GL_UNIFORM_BUFFER, UBO_MATERIAL, scene->m_materialsGL);\n    }\n\n    nvgl::bindMultiTexture(GL_TEXTURE0 + TEX_MATRICES, GL_TEXTURE_BUFFER, scene->m_matricesTexGL);\n    glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);\n\n    {\n      ShadeCommand& sc = m_shades[shadetype];\n      if (vbum){\n  #if USE_GPU_INDIRECT\n        glBufferAddressRangeNV(GL_DRAW_INDIRECT_ADDRESS_NV, 0,       sc.indirectADDR, sc.indirects.size() * sizeof(IndexedCommand) );\n  #endif\n  #if USE_VERTEX_ASSIGNS\n        glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 1, sc.assignADDR, sc.assigns.size() * sizeof(GLint));\n  #endif\n      }\n      else{\n  #if USE_GPU_INDIRECT\n        glBindBuffer(GL_DRAW_INDIRECT_BUFFER, sc.indirectGL);\n  #endif\n  #if USE_VERTEX_ASSIGNS\n        glBindVertexBuffer(1, sc.assignGL, 0, sizeof(GLint)*2);\n  #endif\n      }\n  #if USE_CPU_INDIRECT\n      size_t offset = (size_t)&sc.indirects[0];\n  #else\n      size_t offset = 0;\n  #endif\n\n      int lastGeometry = -1;\n      bool lastSolid  = true;\n      for (size_t i = 0; i < sc.geometries.size(); i++){\n        int geometryIndex = sc.geometries[i];\n\n        if (geometryIndex != lastGeometry){\n          const CadScene::Geometry& geo = m_scene->m_geometry[ geometryIndex ];\n          if (vbum){\n            glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0,  geo.vboADDR, geo.numVertices * sizeof(CadScene::Vertex));\n            glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV,0,         geo.iboADDR, (geo.numIndexSolid+geo.numIndexWire) * sizeof(GLuint));\n          }\n          else{\n            glBindVertexBuffer(0, geo.vboGL, 0, sizeof(CadScene::Vertex));\n            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, geo.iboGL);\n          }\n          lastGeometry = geometryIndex;\n        }\n\n        bool solid = sc.solids[i];\n        if (solid != lastSolid){\n          SetWireMode((!solid));\n        }\n\n        glMultiDrawElementsIndirect(solid ? GL_TRIANGLES : GL_LINES,GL_UNSIGNED_INT, (const void*)(offset + sc.offsets[i] * sizeof(IndexedCommand)), GLsizei(sc.sizes[i]), 0);\n\n        lastSolid = solid;\n      }\n    }\n#if USE_VERTEX_ASSIGNS\n    glDisableVertexAttribArray(VERTEX_ASSIGNS);\n    glBindVertexBuffer(1,0,0,0);\n    glVertexBindingDivisor(1,0);\n#endif\n\n    glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);\n    nvgl::bindMultiTexture(GL_TEXTURE0 + TEX_MATRICES, GL_TEXTURE_BUFFER, 0);\n\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, 0);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, 0);\n\n    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);\n    glBindVertexBuffer(0,0,0,0);\n\n    if (vbum){\n      glDisableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);\n      glDisableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);\n#if USE_GPU_INDIRECT\n      glDisableClientState(GL_DRAW_INDIRECT_UNIFIED_NV);\n#endif\n      if (s_bindless_ubo){\n        glDisableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV);\n      }\n    }\n\n    if (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT){\n      glDisable(GL_POLYGON_OFFSET_FILL);\n      glPolygonOffset(0,0);\n    }\n\n    SetWireMode(GL_FALSE);\n\n    scene->disableVertexFormat(VERTEX_POS,VERTEX_NORMAL);\n\n  }\n\n}\n"
  },
  {
    "path": "renderertoken.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include \"tokenbase.hpp\"\n\n#include \"common.h\"\n\nnamespace csfviewer\n{\n  //////////////////////////////////////////////////////////////////////////\n\n  class RendererToken: public Renderer, public TokenRendererBase {\n  public:\n    class Type : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return TokenRendererBase::hasNativeCommandList();\n      }\n      const char* name() const\n      {\n        return \"tokenbuffer\";\n      }\n      Renderer* create() const\n      {\n        RendererToken* renderer = new RendererToken();\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 9;\n      }\n    };\n    class TypeAddr : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return TokenRendererBase::hasNativeCommandList();\n      }\n      const char* name() const\n      {\n        return \"tokenbuffer_address\";\n      }\n      Renderer* create() const\n      {\n        RendererToken* renderer = new RendererToken();\n        renderer->m_useaddress = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 9;\n      }\n    };\n    class TypeList : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return TokenRendererBase::hasNativeCommandList();\n      }\n      const char* name() const\n      {\n        return \"tokenlist\";\n      }\n      Renderer* create() const\n      {\n        RendererToken* renderer = new RendererToken();\n        renderer->m_uselist = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 8;\n      }\n    };\n    class TypeEmu : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return true;\n      }\n      const char* name() const\n      {\n        return \"tokenbuffer_emulated\";\n      }\n      Renderer* create() const\n      {\n        RendererToken* renderer = new RendererToken();\n        renderer->m_emulate = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 9;\n      }\n    };\n\n    class TypeSort : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return TokenRendererBase::hasNativeCommandList();\n      }\n      const char* name() const\n      {\n        return \"tokenbuffer_sorted\";\n      }\n      Renderer* create() const\n      {\n        RendererToken* renderer = new RendererToken();\n        renderer->m_sort = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 9;\n      }\n    };\n    class TypeSortAddr : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return TokenRendererBase::hasNativeCommandList();\n      }\n      const char* name() const\n      {\n        return \"tokenbuffer_sorted_address\";\n      }\n      Renderer* create() const\n      {\n        RendererToken* renderer = new RendererToken();\n        renderer->m_useaddress = true;\n        renderer->m_sort = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 9;\n      }\n    };\n    class TypeSortList : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return TokenRendererBase::hasNativeCommandList();\n      }\n      const char* name() const\n      {\n        return \"tokenlist_sorted\";\n      }\n      Renderer* create() const\n      {\n        RendererToken* renderer = new RendererToken();\n        renderer->m_uselist = true;\n        renderer->m_sort = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 8;\n      }\n    };\n    class TypeSortEmu : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return true;\n      }\n      const char* name() const\n      {\n        return \"tokenbuffer_sorted_emulated\";\n      }\n      Renderer* create() const\n      {\n        RendererToken* renderer = new RendererToken();\n        renderer->m_emulate = true;\n        renderer->m_sort = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 9;\n      }\n    };\n\n  public:\n    void init(const CadScene* NV_RESTRICT scene, const Resources& resources);\n    void deinit();\n    void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager);\n\n  private:\n\n    std::vector<DrawItem>       m_drawItems;\n\n    void GenerateTokens(std::vector<DrawItem>& drawItems, ShadeType shade, const CadScene* NV_RESTRICT scene, const Resources& resources )\n    {\n      int lastMaterial = -1;\n      int lastGeometry = -1;\n      int lastMatrix   = -1;\n      bool lastSolid   = true;\n\n      ShadeCommand& sc = m_shades[shade];\n      sc.fbos.clear();\n      sc.offsets.clear();\n      sc.sizes.clear();\n      sc.states.clear();\n      \n      std::string& tokenStream = m_tokenStreams[shade];\n      tokenStream.clear();\n\n      size_t begin = 0;\n\n      {\n        NVTokenUbo ubo;\n        ubo.cmd.index   = UBO_SCENE;\n        ubo.cmd.stage   = UBOSTAGE_VERTEX;\n        ubo.setBuffer(resources.sceneUbo, resources.sceneAddr, 0, sizeof(SceneData));\n        nvtokenEnqueue(tokenStream, ubo);\n\n        ubo.cmd.stage   = UBOSTAGE_FRAGMENT;\n        nvtokenEnqueue(tokenStream, ubo);\n\n#if USE_POLYOFFSETTOKEN\n        NVTokenPolygonOffset offset;\n        offset.cmd.bias = 1;\n        offset.cmd.scale = 1;\n        nvtokenEnqueue(tokenStream, offset);\n#endif\n      }\n\n      for (int i = 0; i < drawItems.size(); i++){\n        const DrawItem& di = drawItems[i];\n\n        if (shade == SHADE_SOLID && !di.solid){\n          continue;\n        }\n\n        if (shade == SHADE_SOLIDWIRE && di.solid != lastSolid){\n          sc.offsets.push_back( begin );\n          sc.sizes.  push_back( GLsizei((tokenStream.size()-begin)) );\n          sc.states. push_back( m_stateObjects[ lastSolid ? STATE_TRISOFFSET : STATE_LINES ] );\n          sc.fbos.   push_back( 0 );\n\n          begin = tokenStream.size();\n        }\n\n        if (lastGeometry != di.geometryIndex){\n          const CadScene::Geometry &geo = scene->m_geometry[di.geometryIndex];\n          NVTokenVbo vbo;\n          vbo.cmd.index = 0;\n          vbo.setBuffer(geo.vboGL, geo.vboADDR, 0);\n          nvtokenEnqueue(tokenStream, vbo);\n\n          NVTokenIbo ibo;\n          ibo.setBuffer(geo.iboGL, geo.iboADDR);\n          ibo.cmd.typeSizeInByte = 4;\n          nvtokenEnqueue(tokenStream, ibo);\n\n          lastGeometry = di.geometryIndex;\n        }\n\n        if (lastMatrix != di.matrixIndex){\n\n          NVTokenUbo ubo;\n          ubo.cmd.index   = UBO_MATRIX;\n          ubo.cmd.stage   = UBOSTAGE_VERTEX;\n          ubo.setBuffer(scene->m_matricesGL, scene->m_matricesADDR, sizeof(CadScene::MatrixNode) * di.matrixIndex, sizeof(CadScene::MatrixNode));\n          nvtokenEnqueue(tokenStream, ubo);\n\n          lastMatrix = di.matrixIndex;\n        }\n\n        if (lastMaterial != di.materialIndex){\n\n          NVTokenUbo ubo;\n          ubo.cmd.index   = UBO_MATERIAL;\n          ubo.cmd.stage   = UBOSTAGE_FRAGMENT;\n          ubo.setBuffer(scene->m_materialsGL, scene->m_materialsADDR, sizeof(CadScene::Material) * di.materialIndex, sizeof(CadScene::Material));\n          nvtokenEnqueue(tokenStream, ubo);\n\n          lastMaterial = di.materialIndex;\n        }\n\n\n        NVTokenDrawElemsUsed drawelems;\n        drawelems.setMode(di.solid ? GL_TRIANGLES : GL_LINES);\n        drawelems.cmd.count = di.range.count;\n        drawelems.cmd.firstIndex = GLuint((di.range.offset )/sizeof(GLuint));\n        nvtokenEnqueue(tokenStream, drawelems);\n\n        lastSolid = di.solid;\n      }\n\n      sc.offsets.push_back( begin );\n      sc.sizes.  push_back( GLsizei((tokenStream.size()-begin)) );\n      if (shade == SHADE_SOLID){\n        sc.states. push_back( m_stateObjects[ STATE_TRIS ] );\n      }\n      else{\n        sc.states. push_back( m_stateObjects[ lastSolid ? STATE_TRISOFFSET : STATE_LINES ] );\n      }\n      sc.fbos. push_back( 0 );\n\n    }\n\n  };\n  static RendererToken::Type      s_token;\n  static RendererToken::TypeAddr  s_token_addr;\n  static RendererToken::TypeList  s_token_list;\n  static RendererToken::TypeEmu   s_token_emu;\n\n  static RendererToken::TypeSort      s_sorttoken;\n  static RendererToken::TypeSortAddr  s_sorttoken_addr;\n  static RendererToken::TypeSortList  s_sorttoken_list;\n  static RendererToken::TypeSortEmu   s_sorttoken_emu;\n\n  void RendererToken::init(const CadScene* NV_RESTRICT scene, const Resources& resources)\n  {\n    TokenRendererBase::init(s_bindless_ubo, !!has_GL_NV_vertex_buffer_unified_memory);\n    resources.usingUboProgram(true);\n\n    m_scene = scene;\n\n    std::vector<DrawItem> drawItems;\n\n    fillDrawItems(drawItems,0,scene->m_objects.size(), true, true);\n\n    if (USE_PERFRAMEBUILD){\n      m_drawItems = drawItems;\n    }\n\n    if (m_sort){\n      std::sort(drawItems.begin(),drawItems.end(),DrawItem_compare_groups);\n    }\n\n    GenerateTokens(drawItems, SHADE_SOLID, scene, resources);\n\n    TokenRendererBase::printStats(SHADE_SOLID);\n\n    GenerateTokens(drawItems, SHADE_SOLIDWIRE, scene, resources);\n\n    TokenRendererBase::printStats(SHADE_SOLIDWIRE);\n\n    TokenRendererBase::finalize(resources);\n  }\n\n  void RendererToken::deinit()\n  {\n    TokenRendererBase::deinit();\n    m_drawItems.clear();\n  }\n\n  void RendererToken::draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager)\n  {\n    const CadScene* NV_RESTRICT scene = m_scene;\n\n    // do state setup (primarily for sake of state capturing)\n    scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL);\n\n    if (m_bindlessVboUbo){\n      glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);\n      glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);\n      glEnableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV);\n    }\n    else{\n      glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE,resources.sceneUbo);\n    }\n\n    if (USE_PERFRAMEBUILD){\n\n#if 0\n      std::vector<DrawItem> drawItems;\n      {\n        nvh::Profiler::Section _tempTimer(profiler ,\"Copy\");\n        drawItems = m_drawItems;\n      }\n#else\n      std::vector<DrawItem>& drawItems = m_drawItems;\n#endif\n      {\n        nvh::Profiler::Section _tempTimer(profiler ,\"Sort\");\n        std::sort(drawItems.begin(),drawItems.end(),DrawItem_compare_groups);\n      }\n\n      {\n        nvh::Profiler::Section _tempTimer(profiler ,\"Token\");\n        GenerateTokens(drawItems, shadetype, scene, resources);\n      }\n\n      if (!m_emulate && !m_uselist){\n        nvh::Profiler::Section _tempTimer(profiler ,\"Build\");\n        ShadeCommand & shade =  m_shades[shadetype];\n        glInvalidateBufferData(m_tokenBuffers[shadetype]);\n        glNamedBufferSubData(m_tokenBuffers[shadetype],shade.offsets[0], m_tokenStreams[shadetype].size(), &m_tokenStreams[shadetype][0]);\n      }\n    }\n\n    if (USE_STATEOBJ_REBUILD){\n      nvh::Profiler::Section section(profiler,\"state\");\n      for (int i = 0; i < 25; i++){\n        m_stateChangeID = resources.stateChangeID + 1;\n        m_fboStateChangeID = resources.fboTextureChangeID + 1;\n        captureState(resources);\n      }\n    }\n    else{\n      captureState(resources);\n    }\n\n    if (!USE_POLYOFFSETTOKEN && (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT)){\n      glPolygonOffset(1,1);\n    }\n\n    if (m_hwsupport){\n      if (m_uselist){\n        glCallCommandListNV(m_commandLists[shadetype]);\n      }\n      else{\n        ShadeCommand & shade =  m_shades[shadetype];\n        if (m_useaddress){\n          glDrawCommandsStatesAddressNV(&shade.addresses[0], &shade.sizes[0], &shade.states[0], &shade.fbos[0], int(shade.states.size()) );\n        }\n        else{\n          glDrawCommandsStatesNV(m_tokenBuffers[shadetype], &shade.offsets[0], &shade.sizes[0], &shade.states[0], &shade.fbos[0], int(shade.states.size()) );\n        }\n      }\n    }\n    else{\n      ShadeCommand & shade =  m_shades[shadetype];\n      std::string& stream  =  m_tokenStreams[shadetype];\n      renderShadeCommandSW(&stream[0], stream.size(), shade);\n    }\n\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, 0);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATRIX, 0);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, 0);\n\n    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);\n    glBindVertexBuffer(0,0,0,0);\n\n    glDisable(GL_POLYGON_OFFSET_FILL);\n    glPolygonOffset(0,0);\n\n    if (m_bindlessVboUbo){\n      glDisableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);\n      glDisableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);\n      glDisableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV);\n    }\n\n    scene->disableVertexFormat(VERTEX_POS,VERTEX_NORMAL);\n  }\n\n}\n"
  },
  {
    "path": "renderertokensortcull.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include \"tokenbase.hpp\"\n#include \"cullingsystem.hpp\"\n\n#include \"common.h\"\n\nnamespace csfviewer\n{\n  //////////////////////////////////////////////////////////////////////////\n\n#define USE_TEMPORALRASTER      1\n#define USE_OBJECTSORT_CULLING  1\n\n\n  class RendererCullSortToken : public Renderer, public TokenRendererBase {\n  public:\n    class Shared {\n    public:\n      nvgl::ProgramID \n        token_sizes,\n        token_scan,\n        token_cmds;\n\n      static Shared& get()\n      {\n        static Shared res;\n        return res;\n      }\n\n      Shared() : loaded(false) {}\n\n      bool load(nvgl::ProgramManager &progManager)\n      {\n        if (loaded) return true;\n\n        loaded = true;\n\n        token_sizes = progManager.createProgram(\n          nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"cull-tokensizes.vert.glsl\"));\n        token_cmds = progManager.createProgram(\n          nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, \"cull-tokencmds.vert.glsl\"));\n\n        if (!progManager.areProgramsValid()) return false;\n\n        return true;\n      }\n\n    private:\n      bool loaded;\n    };\n\n    class Type : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return TokenRendererBase::hasNativeCommandList();\n      }\n      const char* name() const\n      {\n        return \"tokenbuffer_cullsorted\";\n      }\n      Renderer* create() const\n      {\n        RendererCullSortToken* renderer = new RendererCullSortToken();\n        return renderer;\n      }\n      bool loadPrograms( nvgl::ProgramManager &mgr)\n      {\n        return Shared::get().load(mgr);\n      }\n      unsigned int priority() const \n      {\n        return 9;\n      }\n    };\n    class TypeEmu : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return !!has_GL_NV_vertex_buffer_unified_memory;\n      }\n      const char* name() const\n      {\n        return \"tokenbuffer_cullsorted_emulated\";\n      }\n      Renderer* create() const\n      {\n        RendererCullSortToken* renderer = new RendererCullSortToken();\n        renderer->m_emulate = true;\n        return renderer;\n      }\n      bool loadPrograms( nvgl::ProgramManager &mgr )\n      {\n        return Shared::get().load(mgr);\n      }\n      unsigned int priority() const \n      {\n        return 9;\n      }\n    };\n\n  public:\n    void init(const CadScene* NV_RESTRICT scene, const Resources& resources);\n    void deinit();\n    void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager);\n    void drawScene(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager, const char*what);\n\n  private:\n\n    static bool DrawItem_compare_groups(const DrawItem& a, const DrawItem& b)\n    {\n      int diff = 0;\n      diff = diff != 0 ? diff : (a.solid == b.solid ? 0 : ( a.solid ? -1 : 1 ));\n#if USE_OBJECTSORT_CULLING\n      diff = diff != 0 ? diff : (a.objectIndex - b.objectIndex);\n#endif\n      diff = diff != 0 ? diff : (a.materialIndex - b.materialIndex);\n      diff = diff != 0 ? diff : (a.geometryIndex - b.geometryIndex);\n      diff = diff != 0 ? diff : (a.matrixIndex - b.matrixIndex);\n\n      return diff < 0;\n    }\n\n    struct CullSequence {\n      GLuint    offset;\n      GLint     endoffset;\n      int       first;\n      int       num;\n    };\n\n    struct CullShade {\n      GLuint                    numTokens;\n      std::vector<CullSequence> sequnces;\n\n      // static buffers\n      ScanSystem::Buffer   tokenOrig;\n\n      // for each command, #cmds rounded to multiple of 4\n      ScanSystem::Buffer   tokenSizes;   // in integers\n      ScanSystem::Buffer   tokenObjects; // -1 if no drawcall, otherwise object\n      ScanSystem::Buffer   tokenOffsets; // offsets for each command\n\n      ScanSystem::Buffer   tokenOutSizes;\n      ScanSystem::Buffer   tokenOutScan;\n      ScanSystem::Buffer   tokenOutScanOffset;\n    };\n\n    class CullJobToken : public CullingSystem::Job\n    {\n    public:\n      void resultFromBits( const CullingSystem::Buffer& bufferVisBitsCurrent );\n\n      GLuint      program_sizes;\n      GLuint      program_cmds;\n\n      // dynamic\n      ScanSystem::Buffer   tokenOut;\n\n      CullShade* NV_RESTRICT cullshade;\n    };\n\n    std::vector<DrawItem>       m_drawItems;\n\n    CullJobToken                m_culljob;\n    CullShade                   m_cullshades[NUM_SHADES];\n    GLuint                      m_maxGrps;\n\n    void PrepareCullJob(ShadeType shade);\n\n\n    template <class T>\n    static void handleToken(std::vector<GLuint> &tokenSizes, std::vector<GLuint> &tokenOffsets,std::vector<GLint>&  tokenObjects, T &token, size_t stream, int obj=-1)\n    {\n      tokenSizes.push_back(GLuint(sizeof(T) / sizeof(GLuint) ));\n      tokenOffsets.push_back(GLuint( (stream - sizeof(T))/ sizeof(GLuint) ));\n      tokenObjects.push_back(obj);\n    }\n\n    void GenerateTokens(std::vector<DrawItem>& drawItems, ShadeType shade, const CadScene* NV_RESTRICT scene, const Resources& resources )\n    {\n      int lastMaterial = -1;\n      int lastGeometry = -1;\n      int lastMatrix   = -1;\n      int lastObject   = -1;\n      bool lastSolid   = true;\n\n      ShadeCommand& sc = m_shades[shade];\n      CullShade& cull = m_cullshades[shade];\n\n      sc.fbos.clear();\n      sc.offsets.clear();\n      sc.sizes.clear();\n      sc.states.clear();\n\n      std::string& tokenStream = m_tokenStreams[shade];\n      tokenStream.clear();\n\n\n      cull.numTokens = 0;\n      GLuint beginToken = 0;\n\n      size_t begin = 0;\n      size_t start = begin;\n\n      std::vector<GLuint> tokenSizes;\n      std::vector<GLuint> tokenOffsets;\n      std::vector<GLint>  tokenObjects;\n\n      {\n        NVTokenUbo ubo;\n        ubo.cmd.index   = UBO_SCENE;\n        ubo.cmd.stage   = UBOSTAGE_VERTEX;\n        ubo.setBuffer(resources.sceneUbo, resources.sceneAddr, 0, sizeof(SceneData) );\n        nvtokenEnqueue(tokenStream, ubo);\n        handleToken(tokenSizes,tokenOffsets,tokenObjects, ubo, tokenStream.size()-start, -1);\n        cull.numTokens++;\n\n        ubo.cmd.stage   = UBOSTAGE_FRAGMENT;\n        nvtokenEnqueue(tokenStream, ubo);\n        handleToken(tokenSizes,tokenOffsets,tokenObjects, ubo, tokenStream.size()-start, -1);\n        cull.numTokens++;\n\n#if USE_POLYOFFSETTOKEN\n        NVTokenPolygonOffset offset;\n        offset.cmd.bias = 1;\n        offset.cmd.scale = 1;\n        nvtokenEnqueue(tokenStream, offset);\n        handleToken(tokenSizes,tokenOffsets,tokenObjects, offset, tokenStream.size()-start, -1);\n        cull.numTokens++;\n#endif\n      }\n\n      for (int i = 0; i < drawItems.size(); i++){\n        const DrawItem& di = drawItems[i];\n\n        if (shade == SHADE_SOLID && !di.solid){\n          continue;\n        }\n\n        int bufferObjIndex = -1;\n#if USE_OBJECTSORT_CULLING\n        bufferObjIndex = di.objectIndex;\n        if (di.objectIndex != lastObject || di.solid != lastSolid){\n          // whenever an object changes or we switches from solid to edges (happens only once in this sorted scenario)\n          // we have to ensure all buffers are reset as well\n          lastObject = di.objectIndex;\n          lastMaterial = -1;\n          lastGeometry = -1;\n          lastMatrix   = -1;\n        }\n#endif\n\n        if (shade == SHADE_SOLIDWIRE && di.solid != lastSolid){\n          sc.offsets.push_back( begin );\n          sc.sizes.  push_back( GLsizei((tokenStream.size()-begin)) );\n          sc.states. push_back( m_stateObjects[ lastSolid ? STATE_TRISOFFSET : STATE_LINES ] );\n          sc.fbos.   push_back( 0 );\n          CullSequence cullseq;\n          cullseq.num     = cull.numTokens - beginToken;\n          cullseq.first   = beginToken;\n          cullseq.offset  = GLuint((begin-start)/sizeof(GLuint));\n          cullseq.endoffset = GLuint((tokenStream.size()-start)/sizeof(GLuint));\n          cull.sequnces.push_back(cullseq);\n\n          beginToken = cull.numTokens;\n          begin = tokenStream.size();\n        }\n\n        if (lastGeometry != di.geometryIndex){\n          const CadScene::Geometry &geo = scene->m_geometry[di.geometryIndex];\n          NVTokenVbo vbo;\n          vbo.cmd.index = 0;\n          vbo.setBuffer(geo.vboGL, geo.vboADDR, 0);\n\n          nvtokenEnqueue(tokenStream, vbo);\n          handleToken(tokenSizes,tokenOffsets,tokenObjects, vbo, tokenStream.size()-start, bufferObjIndex);\n          cull.numTokens++;\n\n          NVTokenIbo ibo;\n          ibo.setBuffer(geo.iboGL, geo.iboADDR);\n          ibo.cmd.typeSizeInByte = 4;\n          nvtokenEnqueue(tokenStream, ibo);\n          handleToken(tokenSizes,tokenOffsets,tokenObjects, vbo, tokenStream.size()-start, bufferObjIndex);\n          cull.numTokens++;\n\n          lastGeometry = di.geometryIndex;\n        }\n\n        if (lastMatrix != di.matrixIndex){\n\n          NVTokenUbo ubo;\n          ubo.cmd.index   = UBO_MATRIX;\n          ubo.cmd.stage   = UBOSTAGE_VERTEX;\n          ubo.setBuffer(scene->m_matricesGL, scene->m_matricesADDR, sizeof(CadScene::MatrixNode) * di.matrixIndex, sizeof(CadScene::MatrixNode) );\n          nvtokenEnqueue(tokenStream, ubo);\n          handleToken(tokenSizes,tokenOffsets,tokenObjects, ubo, tokenStream.size()-start, bufferObjIndex);\n          cull.numTokens++;\n\n          lastMatrix = di.matrixIndex;\n        }\n\n        if (lastMaterial != di.materialIndex){\n\n          NVTokenUbo ubo;\n          ubo.cmd.index   = UBO_MATERIAL;\n          ubo.cmd.stage   = UBOSTAGE_FRAGMENT;\n          ubo.setBuffer(scene->m_materialsGL, scene->m_materialsADDR, sizeof(CadScene::Material) * di.materialIndex, sizeof(CadScene::Material) );\n          nvtokenEnqueue(tokenStream, ubo);\n          handleToken(tokenSizes,tokenOffsets,tokenObjects, ubo, tokenStream.size()-start, bufferObjIndex);\n          cull.numTokens++;\n\n          lastMaterial = di.materialIndex;\n        }\n\n\n        NVTokenDrawElemsUsed drawelems;\n        drawelems.setMode(di.solid ? GL_TRIANGLES : GL_LINES);\n        drawelems.cmd.count = di.range.count;\n        drawelems.cmd.firstIndex = GLuint((di.range.offset )/sizeof(GLuint));\n        nvtokenEnqueue(tokenStream, drawelems);\n        handleToken(tokenSizes,tokenOffsets,tokenObjects, drawelems, tokenStream.size()-start, di.objectIndex);\n        cull.numTokens++;\n\n        lastSolid = di.solid;\n      }\n\n      sc.offsets.push_back( begin );\n      sc.sizes.  push_back( GLsizei((tokenStream.size()-begin)) );\n      if (shade == SHADE_SOLID){\n        sc.states. push_back( m_stateObjects[ STATE_TRIS ] );\n      }\n      else{\n        sc.states. push_back( m_stateObjects[ lastSolid ? STATE_TRISOFFSET : STATE_LINES ] );\n      }\n      sc.fbos. push_back( 0 );\n\n      CullSequence cullseq;\n      cullseq.num     = cull.numTokens - beginToken;\n      cullseq.first   = beginToken;\n      cullseq.offset  = GLuint((begin-start)/sizeof(GLuint));\n      cullseq.endoffset = GLuint((tokenStream.size()-start)/sizeof(GLuint));\n      cull.sequnces.push_back(cullseq);\n\n      // create buffers for culling\n      cull.tokenOrig.create(tokenStream.size() - start,&tokenStream[start], 0);\n\n      cull.tokenOffsets.create(sizeof(GLuint)*cull.numTokens,&tokenOffsets[0], 0);\n      cull.tokenSizes.  create(sizeof(GLuint)*cull.numTokens,&tokenSizes[0], 0);\n      cull.tokenObjects.create(sizeof(GLint)*cull.numTokens,&tokenObjects[0], 0);\n\n      int round4 = ((cull.numTokens+3)/4)*4;\n\n      cull.tokenOutScan.      create(sizeof(GLuint)*round4,NULL, 0);\n      cull.tokenOutScanOffset.create(std::max(ScanSystem::getOffsetSize(round4), size_t(16)),NULL, 0);\n      cull.tokenOutSizes.     create(sizeof(GLuint)*round4,NULL, 0);\n    }\n\n  };\n\n\n  // not yet fully implemented\n  static RendererCullSortToken::Type s_cullsorttoken;\n  static RendererCullSortToken::TypeEmu s_cullsorttoken_emu;\n\n\n  void RendererCullSortToken::init(const CadScene* NV_RESTRICT scene, const Resources& resources)\n  {\n    TokenRendererBase::init(s_bindless_ubo, !!has_GL_NV_vertex_buffer_unified_memory);\n    resources.usingUboProgram(true);\n\n    m_scene = scene;\n    glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT,0,(GLint*)&m_maxGrps);\n\n    std::vector<DrawItem> drawItems;\n\n    fillDrawItems(drawItems,0,scene->m_objects.size(), true, true);\n\n    std::sort(drawItems.begin(),drawItems.end(),DrawItem_compare_groups);\n\n    GenerateTokens(drawItems, SHADE_SOLID, scene, resources);\n\n    TokenRendererBase::printStats(SHADE_SOLID);\n\n    GenerateTokens(drawItems, SHADE_SOLIDWIRE, scene, resources);\n\n    TokenRendererBase::printStats(SHADE_SOLIDWIRE);\n\n    TokenRendererBase::finalize(resources);\n\n    if (m_emulate){\n      for (int i = 0; i < NUM_SHADES; i++){\n        glNamedBufferStorage(m_tokenBuffers[i], m_tokenStreams[i].size(), &m_tokenStreams[i][0], GL_MAP_READ_BIT);\n      }\n    }\n\n    m_culljob.m_numObjects = int(m_scene->m_objects.size());\n\n    int roundedBits = (m_culljob.m_numObjects+31)/32;\n    int roundedInts = roundedBits*32;\n\n    m_culljob.m_bufferBboxes    = CullingSystem::Buffer(m_scene->m_geometryBboxesGL, sizeof(CadScene::BBox) * m_scene->m_geometryBboxes.size());\n    m_culljob.m_bufferMatrices  = CullingSystem::Buffer(m_scene->m_matricesGL, sizeof(CadScene::MatrixNode) * m_scene->m_matrices.size());\n    m_culljob.m_bufferObjectMatrix  = CullingSystem::Buffer(m_scene->m_objectAssignsGL, sizeof(GLint)*2* m_scene->m_objectAssigns.size());\n    m_culljob.m_bufferObjectMatrix.stride = sizeof(GLint)*2;\n    m_culljob.m_bufferObjectBbox    = m_culljob.m_bufferObjectMatrix;\n    m_culljob.m_bufferObjectBbox.offset = sizeof(GLint);\n    m_culljob.m_bufferObjectBbox.size  -= sizeof(GLint);\n    m_culljob.m_bufferObjectBbox.stride = sizeof(GLint)*2;\n\n    m_culljob.m_bufferVisBitsCurrent.create(sizeof(int)*roundedBits,NULL,0);\n    GLuint full = ~0;\n    glClearNamedBufferData(m_culljob.m_bufferVisBitsCurrent.buffer,GL_R32UI,GL_RED_INTEGER,GL_UNSIGNED_INT,&full);\n    m_culljob.m_bufferVisBitsLast.create(sizeof(int)*roundedBits,NULL,0);\n    glClearNamedBufferData(m_culljob.m_bufferVisBitsLast.buffer,GL_R32UI,GL_RED_INTEGER,GL_UNSIGNED_INT,0);\n\n    m_culljob.m_bufferVisOutput.create(sizeof(int)*roundedInts,NULL,0);\n    m_cullshades[SHADE_SOLIDWIRE_SPLIT] = m_cullshades[SHADE_SOLIDWIRE];\n  }\n\n  void RendererCullSortToken::deinit()\n  {\n    for (int i = 0; i < 2; i++){\n      CullShade &cs = m_cullshades[i];\n      glDeleteBuffers(1,&cs.tokenOrig.buffer);\n      glDeleteBuffers(1,&cs.tokenOffsets.buffer);\n      glDeleteBuffers(1,&cs.tokenSizes.buffer);\n      glDeleteBuffers(1,&cs.tokenObjects.buffer);\n\n      glDeleteBuffers(1,&cs.tokenOutScan.buffer);\n      glDeleteBuffers(1,&cs.tokenOutScanOffset.buffer);\n      glDeleteBuffers(1,&cs.tokenOutSizes.buffer);\n    }\n\n    glDeleteBuffers(1,&m_culljob.m_bufferVisBitsCurrent.buffer);\n    glDeleteBuffers(1,&m_culljob.m_bufferVisBitsLast.buffer);\n    glDeleteBuffers(1,&m_culljob.m_bufferVisOutput.buffer);\n\n\n    TokenRendererBase::deinit();\n    m_drawItems.clear();\n  }\n\n  void RendererCullSortToken::PrepareCullJob(ShadeType shade)\n  {\n    ShadeCommand& sc = m_shades[shade];\n    RendererCullSortToken::CullJobToken& job = m_culljob;\n\n    job.cullshade = &m_cullshades[shade];\n\n    // setup buffer offsets\n    job.tokenOut.buffer = m_tokenBuffers[shade];\n    job.tokenOut.offset = sc.offsets[0];\n    job.tokenOut.size   = m_cullshades[shade].tokenOrig.size;\n  }\n\n  void RendererCullSortToken::CullJobToken::resultFromBits( const CullingSystem::Buffer& bufferVisBitsCurrent )\n  {\n    // first compute sizes based on culling result\n    glUseProgram(program_sizes);\n\n    glBindBuffer(GL_ARRAY_BUFFER, cullshade->tokenSizes.buffer);\n    glVertexAttribIPointer(0,1,GL_UNSIGNED_INT,0,(const void*)cullshade->tokenSizes.offset);\n    glBindBuffer(GL_ARRAY_BUFFER, cullshade->tokenObjects.buffer);\n    glVertexAttribIPointer(1,1,GL_INT,0,(const void*)cullshade->tokenObjects.offset);\n\n    glEnableVertexAttribArray(0);\n    glEnableVertexAttribArray(1);\n\n    cullshade->tokenOutSizes.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0);\n    bufferVisBitsCurrent.BindBufferRange(GL_SHADER_STORAGE_BUFFER,1);\n\n    glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);\n\n    GLuint numTokens = cullshade->numTokens;\n\n    glEnable(GL_RASTERIZER_DISCARD);\n    glDrawArrays(GL_POINTS,0, numTokens);\n\n    glDisableVertexAttribArray(0);\n    glDisableVertexAttribArray(1);\n\n    Renderer::s_scansys.scanData(((numTokens+3)/4)*4,cullshade->tokenOutSizes,cullshade->tokenOutScan,cullshade->tokenOutScanOffset);\n\n    glUseProgram(program_cmds);\n    glUniform1ui(glGetUniformLocation(program_cmds,\"terminateCmd\"),s_nvcmdlist_header[GL_TERMINATE_SEQUENCE_COMMAND_NV]);\n\n    glBindBuffer(GL_ARRAY_BUFFER, cullshade->tokenOffsets.buffer);\n    glVertexAttribIPointer(0,1,GL_UNSIGNED_INT,0,(const void*)cullshade->tokenOffsets.offset);\n    glBindBuffer(GL_ARRAY_BUFFER, cullshade->tokenOutSizes.buffer);\n    glVertexAttribIPointer(1,1,GL_UNSIGNED_INT,0,(const void*)cullshade->tokenOutSizes.offset);\n    glBindBuffer(GL_ARRAY_BUFFER, cullshade->tokenOutScan.buffer);\n    glVertexAttribIPointer(2,1,GL_UNSIGNED_INT,0,(const void*)cullshade->tokenOutScan.offset);\n\n    glEnableVertexAttribArray(0);\n    glEnableVertexAttribArray(1);\n    glEnableVertexAttribArray(2);\n\n    tokenOut.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0);\n    cullshade->tokenOrig.BindBufferRange(GL_SHADER_STORAGE_BUFFER,1);\n    cullshade->tokenOutSizes.BindBufferRange(GL_SHADER_STORAGE_BUFFER,2);\n    cullshade->tokenOutScan.BindBufferRange(GL_SHADER_STORAGE_BUFFER,3);\n    cullshade->tokenOutScanOffset.BindBufferRange(GL_SHADER_STORAGE_BUFFER,4);\n\n    glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);\n\n    for (GLuint i = 0; i < cullshade->sequnces.size() ; i++){\n      glUniform1ui(glGetUniformLocation(program_cmds,\"startOffset\"),cullshade->sequnces[i].offset);\n      glUniform1i (glGetUniformLocation(program_cmds,\"startID\"),cullshade->sequnces[i].first);\n      glUniform1ui(glGetUniformLocation(program_cmds,\"endOffset\"),cullshade->sequnces[i].endoffset);\n      glUniform1i (glGetUniformLocation(program_cmds,\"endID\"),cullshade->sequnces[i].first + cullshade->sequnces[i].num - 1);\n      glDrawArrays(GL_POINTS,cullshade->sequnces[i].first,cullshade->sequnces[i].num);\n    }\n\n    glDisableVertexAttribArray(0);\n    glDisableVertexAttribArray(1);\n    glDisableVertexAttribArray(2);\n\n    glBindBuffer(GL_ARRAY_BUFFER,0);\n\n    for (GLuint i = 0; i < 5; i++){\n      glBindBufferBase(GL_SHADER_STORAGE_BUFFER,i,0);\n    }\n\n    glDisable(GL_RASTERIZER_DISCARD);\n  }\n\n  void RendererCullSortToken::drawScene(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager, const char*what)\n  {\n    const CadScene* NV_RESTRICT scene = m_scene;\n\n    nvh::Profiler::Section  section(profiler,what);\n\n    // do state setup (primarily for sake of state capturing)\n    m_scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL);\n\n    if (m_bindlessVboUbo){\n      glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);\n      glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);\n      glEnableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV);\n    }\n    else{\n      glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE,resources.sceneUbo);\n    }\n\n    captureState(resources);\n\n    if (!USE_POLYOFFSETTOKEN && (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT)){\n      glPolygonOffset(1,1);\n    }\n\n    if (m_hwsupport){\n      if (m_uselist){\n        glCallCommandListNV(m_commandLists[shadetype]);\n      }\n      else{\n        ShadeCommand & shade =  m_shades[shadetype];\n        glDrawCommandsStatesNV(m_tokenBuffers[shadetype], &shade.offsets[0], &shade.sizes[0], &shade.states[0], &shade.fbos[0], int(shade.states.size()) );\n      }\n    }\n    else{\n      ShadeCommand & shade =  m_shades[shadetype];\n      std::string& stream  =  m_tokenStreams[shadetype];\n      renderShadeCommandSW(&stream[0], stream.size(), shade);\n    }\n\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, 0);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATRIX, 0);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, 0);\n\n    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);\n    glBindVertexBuffer(0,0,0,0);\n\n    glDisable(GL_POLYGON_OFFSET_FILL);\n    glPolygonOffset(0,0);\n\n    if (m_bindlessVboUbo){\n      glDisableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);\n      glDisableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);\n      glDisableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV);\n    }\n\n    scene->disableVertexFormat(VERTEX_POS,VERTEX_NORMAL);\n  }\n\n\n#define CULL_TEMPORAL_NOFRUSTUM 1\n\n  void RendererCullSortToken::draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager)\n  {\n    // broken in other types atm\n    //shadetype = SHADE_SOLID;\n\n    m_culljob.program_cmds  = progManager.get( Shared::get().token_cmds );\n    m_culljob.program_sizes = progManager.get( Shared::get().token_sizes );\n\n    PrepareCullJob(shadetype);\n\n    CullingSystem& cullSys = Renderer::s_cullsys;\n\n\n#if !USE_TEMPORALRASTER\n\n    {\n      nvh::Profiler::Section section(profiler,\"CullF\");\n      cullSys.buildOutput( CullingSystem::METHOD_FRUSTUM, m_culljob, resources.cullView );\n      cullSys.bitsFromOutput( m_culljob, CullingSystem::BITS_CURRENT );\n      {\n        nvh::Profiler::Section section(profiler,\"ResF\");\n        cullSys.resultFromBits( m_culljob );\n      }\n\n      if (m_emulate){\n        nvh::Profiler::Section read(profiler,\"Read\");\n        m_culljob.tokenOut.GetNamedBufferSubData(&m_tokenStream[m_culljob.tokenOut.offset]);\n        GLuint* first = (GLuint*)&m_tokenStream[m_culljob.tokenOut.offset];\n        first[0] = first[0];\n      }\n      else {\n        glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_culljob.tokenOut.buffer);\n        glMemoryBarrier(GL_COMMAND_BARRIER_BIT);\n        glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);\n        //glFinish();\n      }\n    }\n\n    drawScene(shadetype,resources,profiler,progManager, \"Last\");\n\n#else\n\n    {\n      nvh::Profiler::Section section(profiler,\"CullF\");\n#if CULL_TEMPORAL_NOFRUSTUM\n      {\n        nvh::Profiler::Section section(profiler,\"ResF\");\n        cullSys.resultFromBits( m_culljob );\n      }\n      cullSys.swapBits( m_culljob );  // last/output\n#else\n      cullSys.buildOutput( CullingSystem::METHOD_FRUSTUM, m_culljob, resources.cullView );\n      cullSys.bitsFromOutput( m_culljob, CullingSystem::BITS_CURRENT_AND_LAST );\n      {\n        nvh::Profiler::Section section(profiler,\"ResF\");\n        cullSys.resultFromBits( m_culljob );\n      }\n#endif\n      if (m_emulate){\n        nvh::Profiler::Section read(profiler,\"Read\");\n        void* data = &m_tokenStreams[shadetype][m_culljob.tokenOut.offset];\n        m_culljob.tokenOut.GetNamedBufferSubData(data);\n      }\n      else {\n        glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_culljob.tokenOut.buffer);\n        glMemoryBarrier(GL_COMMAND_BARRIER_BIT);\n        glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);\n        //glFinish();\n      }\n    }\n\n    drawScene(shadetype,resources,profiler,progManager, \"Last\");\n\n    {\n      nvh::Profiler::Section section(profiler,\"CullR\");\n      cullSys.buildOutput( CullingSystem::METHOD_RASTER, m_culljob, resources.cullView );\n      cullSys.bitsFromOutput( m_culljob, CullingSystem::BITS_CURRENT_AND_NOT_LAST );\n      {\n        nvh::Profiler::Section section(profiler,\"ResR\");\n        cullSys.resultFromBits( m_culljob );\n      }\n\n      // for next frame\n      cullSys.bitsFromOutput( m_culljob, CullingSystem::BITS_CURRENT );\n#if !CULL_TEMPORAL_NOFRUSTUM\n      cullSys.swapBits( m_culljob );  // last/output\n#endif\n      if (m_emulate){\n        nvh::Profiler::Section read(profiler,\"Read\");\n        void* data = &m_tokenStreams[shadetype][m_culljob.tokenOut.offset];\n        m_culljob.tokenOut.GetNamedBufferSubData(data);\n      }\n      else {\n        glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_culljob.tokenOut.buffer);\n        glMemoryBarrier(GL_COMMAND_BARRIER_BIT);\n        glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);\n        //glFinish();\n      }\n    }\n\n    drawScene(shadetype,resources,profiler,progManager, \"New\");\n#endif\n  }\n\n}\n"
  },
  {
    "path": "renderertokenstream.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include \"tokenbase.hpp\"\n\n#include \"common.h\"\n\nnamespace csfviewer\n{\n  //////////////////////////////////////////////////////////////////////////\n\n  class RendererTokenStream: public Renderer, public TokenRendererBase {\n  public:\n    class Type : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return TokenRendererBase::hasNativeCommandList();\n      }\n      const char* name() const\n      {\n        return \"tokenstream\";\n      }\n      Renderer* create() const\n      {\n        RendererTokenStream* renderer = new RendererTokenStream();\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 10;\n      }\n    };\n    class TypeEmu : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return !!has_GL_NV_vertex_buffer_unified_memory;\n      }\n      const char* name() const\n      {\n        return \"tokenstream_emulated\";\n      }\n      Renderer* create() const\n      {\n        RendererTokenStream* renderer = new RendererTokenStream();\n        renderer->m_emulate = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 10;\n      }\n    };\n\n  public:\n    void init(const CadScene* NV_RESTRICT scene, const Resources& resources);\n    void deinit();\n    void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager);\n\n  private:\n\n    static const size_t bufferSize = 1024*16;\n\n    std::vector<DrawItem>       m_drawItems;\n\n    size_t GenerateTokens(NVPointerStream& tokenStream, std::vector<DrawItem>& drawItems, size_t from, ShadeType shade, const CadScene* NV_RESTRICT scene, const Resources& resources )\n    {\n      int lastMaterial = -1;\n      int lastGeometry = -1;\n      int lastMatrix   = -1;\n      bool lastSolid   = true;\n\n      ShadeCommand& sc = m_shades[shade];\n      sc.fbos.clear();\n      sc.offsets.clear();\n      sc.sizes.clear();\n      sc.states.clear();\n\n      size_t begin = 0;\n\n      {\n        NVTokenUbo ubo;\n        ubo.cmd.index   = UBO_SCENE;\n        ubo.cmd.stage   = UBOSTAGE_VERTEX;\n        ubo.setBuffer(resources.sceneUbo, resources.sceneAddr, 0, sizeof(SceneData));\n        nvtokenEnqueue(tokenStream, ubo);\n\n        ubo.cmd.stage   = UBOSTAGE_FRAGMENT;\n        nvtokenEnqueue(tokenStream, ubo);\n\n#if USE_POLYOFFSETTOKEN\n        NVTokenPolygonOffset offset;\n        offset.cmd.bias = 1;\n        offset.cmd.scale = 1;\n        nvtokenEnqueue(tokenStream, offset);\n#endif\n      }\n\n      size_t i = from;\n      for (; i < drawItems.size(); i++){\n        const DrawItem& di = drawItems[i];\n\n        if (tokenStream.size() + sizeof(NVTokenIbo) + sizeof(NVTokenVbo) + sizeof(NVTokenUbo)*2 + sizeof(NVTokenDrawElemsUsed) > tokenStream.capacity()){\n          break;\n        }\n\n        if (shade == SHADE_SOLID && !di.solid){\n          continue;\n        }\n\n        if ((shade == SHADE_SOLIDWIRE || shade == SHADE_SOLIDWIRE_SPLIT) && di.solid != lastSolid){\n          sc.offsets.push_back( begin );\n          sc.sizes.  push_back( GLsizei((tokenStream.size()-begin)) );\n          sc.states. push_back( m_stateObjects[ lastSolid ? STATE_TRISOFFSET : STATE_LINES ] );\n          if ( shade == SHADE_SOLIDWIRE_SPLIT ){\n            sc.fbos.   push_back( USE_STATEFBO_SPLIT ? 0 : ( di.solid ? resources.fbo : resources.fbo2  ) );\n          }\n          else{\n            sc.fbos.push_back(0);\n          }\n\n\n          begin = tokenStream.size();\n        }\n\n        if (lastGeometry != di.geometryIndex){\n          const CadScene::Geometry &geo = scene->m_geometry[di.geometryIndex];\n          NVTokenVbo vbo;\n          vbo.cmd.index = 0;\n          vbo.setBuffer(geo.vboGL, geo.vboADDR, 0);\n          nvtokenEnqueue(tokenStream, vbo);\n\n          NVTokenIbo ibo;\n          ibo.setBuffer(geo.iboGL, geo.iboADDR);\n          ibo.cmd.typeSizeInByte = 4;\n          nvtokenEnqueue(tokenStream, ibo);\n\n          lastGeometry = di.geometryIndex;\n        }\n\n        if (lastMatrix != di.matrixIndex){\n\n          NVTokenUbo ubo;\n          ubo.cmd.index   = UBO_MATRIX;\n          ubo.cmd.stage   = UBOSTAGE_VERTEX;\n          ubo.setBuffer(scene->m_matricesGL, scene->m_matricesADDR, sizeof(CadScene::MatrixNode) * di.matrixIndex, sizeof(CadScene::MatrixNode));\n          nvtokenEnqueue(tokenStream, ubo);\n\n          lastMatrix = di.matrixIndex;\n        }\n\n        if (lastMaterial != di.materialIndex){\n\n          NVTokenUbo ubo;\n          ubo.cmd.index   = UBO_MATERIAL;\n          ubo.cmd.stage   = UBOSTAGE_FRAGMENT;\n          ubo.setBuffer(scene->m_materialsGL, scene->m_materialsADDR, sizeof(CadScene::Material) * di.materialIndex, sizeof(CadScene::Material));\n          nvtokenEnqueue(tokenStream, ubo);\n\n          lastMaterial = di.materialIndex;\n        }\n\n\n        NVTokenDrawElemsUsed drawelems;\n        drawelems.setMode(di.solid ? GL_TRIANGLES : GL_LINES);\n        drawelems.cmd.count = di.range.count;\n        drawelems.cmd.firstIndex = GLuint((di.range.offset )/sizeof(GLuint));\n        nvtokenEnqueue(tokenStream, drawelems);\n\n        lastSolid = di.solid;\n      }\n\n      sc.offsets.push_back( begin );\n      sc.sizes.  push_back( GLsizei((tokenStream.size()-begin)) );\n      if (shade == SHADE_SOLID){\n        sc.states. push_back( m_stateObjects[ STATE_TRIS ] );\n      }\n      else{\n        sc.states. push_back( m_stateObjects[ lastSolid ? STATE_TRISOFFSET : STATE_LINES ] );\n      }\n      if ( shade == SHADE_SOLIDWIRE_SPLIT ){\n        sc.fbos.   push_back( USE_STATEFBO_SPLIT ? 0 : ( lastSolid ? resources.fbo : resources.fbo2  ) );\n      }\n      else{\n        sc.fbos.push_back(0);\n      }\n\n      return i;\n    }\n\n  };\n\n  static RendererTokenStream::Type s_sorttoken;\n  static RendererTokenStream::TypeEmu s_sorttoken_emu;\n\n  void RendererTokenStream::init(const CadScene* NV_RESTRICT scene, const Resources& resources)\n  {\n    TokenRendererBase::init(s_bindless_ubo, !!has_GL_NV_vertex_buffer_unified_memory);\n    resources.usingUboProgram(true);\n\n    m_scene = scene;\n\n    fillDrawItems(m_drawItems,0,scene->m_objects.size(), true, true);\n\n    TokenRendererBase::finalize(resources,false);\n\n    for (int i = 0; i < NUM_SHADES; i++){\n      m_tokenStreams[i].resize(bufferSize);\n      glNamedBufferData(m_tokenBuffers[i], bufferSize, 0, GL_DYNAMIC_DRAW);\n    }\n  }\n\n  void RendererTokenStream::deinit()\n  {\n    TokenRendererBase::deinit();\n    m_drawItems.clear();\n  }\n\n  void RendererTokenStream::draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager)\n  {\n    const CadScene* NV_RESTRICT scene = m_scene;\n\n    // do state setup (primarily for sake of state capturing)\n    scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL);\n\n    if (m_bindlessVboUbo){\n      glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);\n      glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);\n      glEnableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV);\n    }\n    else{\n      glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE,resources.sceneUbo);\n    }\n\n    captureState(resources);\n\n    if (!USE_POLYOFFSETTOKEN && (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT)){\n      glPolygonOffset(1,1);\n    }\n\n    bool useSub = true;\n    bool usePersistent = false;\n\n    size_t begin = 0;\n    while (begin < m_drawItems.size())\n    {\n      NVPointerStream stream;\n      GLuint buffer;\n\n      void* bufferPtr = NULL;\n      if (m_hwsupport && !useSub){\n        if (usePersistent){\n          // not ideal, best would be finding max frame usage and then keep * 4 the size to account for driver/gpu\n          // race\n          glCreateBuffers(1,&buffer);\n          glNamedBufferStorage(buffer, bufferSize, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_CLIENT_STORAGE_BIT);\n          bufferPtr = glMapNamedBufferRange(buffer, 0, bufferSize, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);\n        }\n        else{\n          buffer = m_tokenBuffers[shadetype];\n          bufferPtr = glMapNamedBufferRange(buffer, 0, bufferSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);\n        }\n      }\n      else{\n        bufferPtr = &m_tokenStreams[shadetype][0];\n      }\n\n      stream.init(bufferPtr,bufferSize);\n\n      {\n        nvh::Profiler::Section _tempTimer(profiler ,\"Token\");\n        begin = GenerateTokens(stream, m_drawItems, begin, shadetype, scene, resources);\n      }\n\n      if (useSub){\n        buffer = m_tokenBuffers[shadetype];\n\n        nvh::Profiler::Section _tempTimer(profiler ,\"Send\");\n        glInvalidateBufferData(buffer);\n        glNamedBufferSubData(buffer,0,stream.size(), stream.m_begin);\n      }\n\n      {\n        nvh::Profiler::Section _tempTimer(profiler ,\"Draw\");\n        if (m_hwsupport){\n          ShadeCommand & shade =  m_shades[shadetype];\n          glDrawCommandsStatesNV(buffer, &shade.offsets[0], &shade.sizes[0], &shade.states[0], &shade.fbos[0], int(shade.states.size()) );\n        }\n        else{\n          ShadeCommand & shade =  m_shades[shadetype];\n          renderShadeCommandSW(stream.m_begin, stream.size(), shade);\n        }\n      }\n      \n      if (m_hwsupport && !useSub){\n        if (usePersistent){\n          glDeleteBuffers(1,&buffer);\n        }\n        else{\n          glUnmapNamedBuffer(buffer);\n        }\n      }\n    }\n\n    profiler.accumulationSplit();\n\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, 0);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATRIX, 0);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, 0);\n\n    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);\n    glBindVertexBuffer(0,0,0,0);\n\n    glDisable(GL_POLYGON_OFFSET_FILL);\n    glPolygonOffset(0,0);\n\n    if (m_bindlessVboUbo){\n      glDisableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);\n      glDisableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);\n      glDisableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV);\n    }\n\n    scene->disableVertexFormat(VERTEX_POS,VERTEX_NORMAL);\n  }\n\n}\n"
  },
  {
    "path": "rendereruborange.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include <assert.h>\n#include <algorithm>\n#include \"renderer.hpp\"\n\n#include \"common.h\"\n\nnamespace csfviewer\n{\n  //////////////////////////////////////////////////////////////////////////\n\n  class RendererUboRange: public Renderer {\n  public:\n    class Type : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return true;\n      }\n      const char* name() const\n      {\n        return \"uborange\";\n      }\n      Renderer* create() const\n      {\n        RendererUboRange* renderer = new RendererUboRange();\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 0;\n      }\n    };\n    class TypeEmu : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return !!has_GL_NV_vertex_buffer_unified_memory;\n      }\n      const char* name() const\n      {\n        return \"uborange_bindless\";\n      }\n      Renderer* create() const\n      {\n        RendererUboRange* renderer = new RendererUboRange();\n        renderer->m_vbum = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 0;\n      }\n    };\n    class TypeSort : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return true;\n      }\n      const char* name() const\n      {\n        return \"uborange_sorted\";\n      }\n      Renderer* create() const\n      {\n        RendererUboRange* renderer = new RendererUboRange();\n        renderer->m_sort = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 1;\n      }\n    };\n    class TypeSortEmu : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return !!has_GL_NV_vertex_buffer_unified_memory;\n      }\n      const char* name() const\n      {\n        return \"uborange_sorted_bindless\";\n      }\n      Renderer* create() const\n      {\n        RendererUboRange* renderer = new RendererUboRange();\n        renderer->m_vbum = true;\n        renderer->m_sort = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 1;\n      }\n    };\n\n  public:\n    void init(const CadScene* NV_RESTRICT scene, const Resources& resources);\n    void deinit();\n    void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager);\n\n    RendererUboRange()\n      : m_vbum(false)\n      , m_sort(false)\n    {\n\n    }\n\n    bool                        m_vbum;\n    bool                        m_sort;\n\n  private:\n\n    std::vector<DrawItem>       m_drawItems;\n\n  };\n  static RendererUboRange::Type         s_uborange;\n  static RendererUboRange::TypeEmu      s_uborange_emu;\n\n  static RendererUboRange::TypeSort     s_sortuborange;\n  static RendererUboRange::TypeSortEmu  s_sortuborange_emu;\n\n  void RendererUboRange::init(const CadScene* NV_RESTRICT scene, const Resources& resources)\n  {\n    m_scene = scene;\n\n    fillDrawItems(m_drawItems,0,scene->m_objects.size(), true, true);\n\n    if (m_sort){\n      std::sort(m_drawItems.begin(),m_drawItems.end(),DrawItem_compare_groups);\n    }\n  }\n\n  void RendererUboRange::deinit()\n  {\n    m_drawItems.clear();\n  }\n\n  void RendererUboRange::draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager)\n  {\n    const CadScene* NV_RESTRICT scene = m_scene;\n\n    bool vbum = m_vbum;\n\n    scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL);\n\n    if (vbum){\n      glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);\n      glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);\n      if (s_bindless_ubo){\n        glEnableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV);\n        glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV,UBO_SCENE,resources.sceneAddr,sizeof(SceneData));\n      }\n      else{\n        glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE,resources.sceneUbo);\n      }\n    }\n    else{\n      glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE,resources.sceneUbo);\n    }\n\n    glUseProgram(resources.programUbo);\n\n    SetWireMode(GL_FALSE);\n\n    if (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT){\n      glEnable(GL_POLYGON_OFFSET_FILL);\n      glPolygonOffset(1,1);\n    }\n\n    {\n      int lastMaterial = -1;\n      int lastGeometry = -1;\n      int lastMatrix   = -1;\n      bool lastSolid   = true;\n\n      GLenum mode = GL_TRIANGLES;\n\n      for (int i = 0; i < m_drawItems.size(); i++){\n        const DrawItem& di = m_drawItems[i];\n\n        if (shadetype == SHADE_SOLID && !di.solid){\n          if (m_sort) break;\n          continue;\n        }\n\n        if (lastSolid != di.solid){\n          SetWireMode( di.solid ? GL_FALSE : GL_TRUE );\n          if (shadetype == SHADE_SOLIDWIRE_SPLIT){\n            glBindFramebuffer(GL_FRAMEBUFFER, di.solid ? resources.fbo : resources.fbo2);\n          }\n        }\n\n        if (lastGeometry != di.geometryIndex){\n          const CadScene::Geometry &geo = scene->m_geometry[di.geometryIndex];\n\n          if (vbum){\n            glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0,  geo.vboADDR, geo.numVertices * sizeof(CadScene::Vertex));\n            glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV,0,         geo.iboADDR, (geo.numIndexSolid+geo.numIndexWire) * sizeof(GLuint));\n          }\n          else{\n            glBindVertexBuffer(0, geo.vboGL, 0, sizeof(CadScene::Vertex));\n            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, geo.iboGL);\n          }\n\n          lastGeometry = di.geometryIndex;\n        }\n\n        if (lastMatrix != di.matrixIndex){\n\n          if (vbum && s_bindless_ubo){\n            glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV,UBO_MATRIX, scene->m_matricesADDR + sizeof(CadScene::MatrixNode) * di.matrixIndex, sizeof(CadScene::MatrixNode));\n          }\n          else{\n            glBindBufferRange(GL_UNIFORM_BUFFER,UBO_MATRIX, scene->m_matricesGL, sizeof(CadScene::MatrixNode) * di.matrixIndex, sizeof(CadScene::MatrixNode));\n          }\n\n          lastMatrix = di.matrixIndex;\n        }\n\n        if (lastMaterial != di.materialIndex){\n\n          if (m_vbum && s_bindless_ubo){\n            glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV,UBO_MATERIAL, scene->m_materialsADDR +sizeof(CadScene::Material) * di.materialIndex, sizeof(CadScene::Material));\n          }\n          else{\n            glBindBufferRange(GL_UNIFORM_BUFFER,UBO_MATERIAL, scene->m_materialsGL, sizeof(CadScene::Material) * di.materialIndex, sizeof(CadScene::Material));\n          }\n\n          lastMaterial = di.materialIndex;\n        }\n\n        glDrawElements( di.solid ? GL_TRIANGLES : GL_LINES, di.range.count, GL_UNSIGNED_INT, (void*) di.range.offset);\n\n        lastSolid = di.solid;\n      }\n    }\n\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, 0);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATRIX, 0);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, 0);\n\n    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);\n    glBindVertexBuffer(0,0,0,0);\n\n    glDisable(GL_POLYGON_OFFSET_FILL);\n    glPolygonOffset(0,0);\n\n    if (vbum){\n      glDisableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);\n      glDisableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);\n      if (s_bindless_ubo){\n        glDisableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV);\n      }\n    }\n\n    scene->disableVertexFormat(VERTEX_POS,VERTEX_NORMAL);\n  }\n\n}\n"
  },
  {
    "path": "rendererubosub.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include <assert.h>\n#include <algorithm>\n#include \"renderer.hpp\"\n\n#include \"common.h\"\n\nnamespace csfviewer\n{\n  //////////////////////////////////////////////////////////////////////////\n\n  class RendererUboSub: public Renderer {\n  public:\n    class Type : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return true;\n      }\n      const char* name() const\n      {\n        return \"ubosub\";\n      }\n      Renderer* create() const\n      {\n        RendererUboSub* renderer = new RendererUboSub();\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 2;\n      }\n    };\n    class TypeVbum : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return !!has_GL_NV_vertex_buffer_unified_memory;\n      }\n      const char* name() const\n      {\n        return \"ubosub_bindless\";\n      }\n      Renderer* create() const\n      {\n        RendererUboSub* renderer = new RendererUboSub();\n        renderer->m_vbum = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 2;\n      }\n    };\n    class TypeSort : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return true;\n      }\n      const char* name() const\n      {\n        return \"ubosub_sorted\";\n      }\n      Renderer* create() const\n      {\n        RendererUboSub* renderer = new RendererUboSub();\n        renderer->m_sort = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 2;\n      }\n    };\n    class TypeSortVbum : public Renderer::Type \n    {\n      bool isAvailable() const\n      {\n        return !!has_GL_NV_vertex_buffer_unified_memory;\n      }\n      const char* name() const\n      {\n        return \"ubosub_sorted_bindless\";\n      }\n      Renderer* create() const\n      {\n        RendererUboSub* renderer = new RendererUboSub();\n        renderer->m_vbum = true;\n        renderer->m_sort = true;\n        return renderer;\n      }\n      unsigned int priority() const \n      {\n        return 2;\n      }\n    };\n\n  public:\n    void init(const CadScene* NV_RESTRICT scene, const Resources& resources);\n    void deinit();\n    void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager);\n\n    bool                        m_sort;\n    bool                        m_vbum;\n\n  private:\n\n    std::vector<DrawItem>       m_drawItems;\n\n    GLuint                      m_streamMatrix;\n    GLuint                      m_streamMaterial;\n\n    RendererUboSub()\n      : m_vbum(false)\n      , m_sort(false)\n    {\n\n    }\n\n  };\n\n  static RendererUboSub::Type s_ubosub;\n  static RendererUboSub::TypeVbum s_ubosub_vbum;\n  static RendererUboSub::TypeSort s_ubosub_sort;\n  static RendererUboSub::TypeSortVbum s_ubosub_vbum_sort;\n\n  void RendererUboSub::init(const CadScene* NV_RESTRICT scene, const Resources& resources)\n  {\n    resources.usingUboProgram(true);\n    m_scene = scene;\n\n    fillDrawItems(m_drawItems,0,scene->m_objects.size(), true, true);\n\n    if (m_sort){\n      std::sort(m_drawItems.begin(),m_drawItems.end(),DrawItem_compare_groups);\n    }\n\n    m_scene = scene;\n    glCreateBuffers(1,&m_streamMatrix);\n    glCreateBuffers(1,&m_streamMaterial);\n    glNamedBufferData( m_streamMatrix, sizeof(CadScene::MatrixNode), NULL, GL_STREAM_DRAW);\n    glNamedBufferData( m_streamMaterial, sizeof(CadScene::Material), NULL, GL_STREAM_DRAW);\n  }\n\n  void RendererUboSub::deinit()\n  {\n    glDeleteBuffers(1,&m_streamMatrix);\n    glDeleteBuffers(1,&m_streamMaterial);\n  }\n\n  void RendererUboSub::draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager)\n  {\n    const CadScene* NV_RESTRICT scene = m_scene;\n\n    bool vbum = m_vbum;\n\n    scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL);\n\n    glUseProgram(resources.programUbo);\n\n    SetWireMode(GL_FALSE);\n\n    if (shadetype == SHADE_SOLIDWIRE){\n      glEnable(GL_POLYGON_OFFSET_FILL);\n      glPolygonOffset(1,1);\n    }\n\n    if (vbum){\n      glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);\n      glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);\n    }\n\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE,     resources.sceneUbo);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATRIX,    m_streamMatrix);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL,  m_streamMaterial);\n\n    {\n      int lastMaterial = -1;\n      int lastGeometry = -1;\n      int lastMatrix   = -1;\n      bool lastSolid   = true;\n\n      GLenum mode = GL_TRIANGLES;\n\n      for (int i = 0; i < m_drawItems.size(); i++){\n        const DrawItem& di = m_drawItems[i];\n\n        if (shadetype == SHADE_SOLID && !di.solid){\n          if (m_sort) break;\n          continue;\n        }\n\n        if (lastSolid != di.solid){\n          SetWireMode( di.solid ? GL_FALSE : GL_TRUE );\n          if (shadetype == SHADE_SOLIDWIRE_SPLIT){\n            glBindFramebuffer(GL_FRAMEBUFFER, di.solid ? resources.fbo : resources.fbo2);\n          }\n        }\n\n        if (lastGeometry != di.geometryIndex){\n          const CadScene::Geometry &geo = scene->m_geometry[di.geometryIndex];\n\n          if (vbum){\n            glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0,  geo.vboADDR, geo.numVertices * sizeof(CadScene::Vertex));\n            glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV,0,         geo.iboADDR, (geo.numIndexSolid+geo.numIndexWire) * sizeof(GLuint));\n          }\n          else{\n            glBindVertexBuffer(0, geo.vboGL, 0, sizeof(CadScene::Vertex));\n            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, geo.iboGL);\n          }\n\n          lastGeometry = di.geometryIndex;\n        }\n\n        if (lastMatrix != di.matrixIndex){\n          glNamedBufferSubData(m_streamMatrix, 0, sizeof(CadScene::MatrixNode), &scene->m_matrices[di.matrixIndex]);\n          lastMatrix = di.matrixIndex;\n        }\n\n        if (lastMaterial != di.materialIndex){\n          glNamedBufferSubData(m_streamMaterial, 0, sizeof(CadScene::Material), &scene->m_materials[di.materialIndex]);\n          lastMaterial = di.materialIndex;\n        }\n\n        glDrawElements( di.solid ? GL_TRIANGLES : GL_LINES, di.range.count, GL_UNSIGNED_INT, (void*) di.range.offset);\n\n        lastSolid = di.solid;\n      }\n    }\n\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, 0);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATRIX, 0);\n    glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, 0);\n\n    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);\n    glBindVertexBuffer(0,0,0,0);\n\n    if (m_vbum){\n      glDisableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);\n      glDisableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);\n    }\n\n    if (shadetype == SHADE_SOLIDWIRE){\n      glDisable(GL_POLYGON_OFFSET_FILL);\n      glPolygonOffset(0,0);\n    }\n\n    scene->disableVertexFormat(VERTEX_POS,VERTEX_NORMAL);\n  }\n\n}\n"
  },
  {
    "path": "scan.comp.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 430\n/**/\n\n#define TASK_SUM      0\n#define TASK_OFFSETS  1\n#define TASK_COMBINE  2\n\n#ifndef TASK\n#define TASK TASK_SUM\n#endif\n\n#define THREADBLOCK_SIZE  512\n#define BATCH_SIZE        (THREADBLOCK_SIZE*4)\n\n\nuniform uint numElements;\n\n///////////////////////////////////////////////////////\n// based on CUDA Sample \"scan.cu\" \n\nlayout (local_size_x = THREADBLOCK_SIZE) in;\n\n#if TASK != TASK_COMBINE\n\nuint threadIdx = gl_LocalInvocationID.x;\n\n#extension GL_NV_shader_thread_group : enable\n#extension GL_NV_shader_thread_shuffle : enable\n\n#if GL_NV_shader_thread_group\n\n#define USESHUFFLE\n#define LOG2_WARP_SIZE 5U\n#define      WARP_SIZE (1U << LOG2_WARP_SIZE)\n\n// Almost the same as naive scan1Inclusive but doesn't need barriers\n// nor shared memory\n// and works only for size <= WARP_SIZE\n\n#if GL_NV_shader_thread_shuffle\n\nshared uint s_Data[(THREADBLOCK_SIZE / WARP_SIZE)];\n\nuint warpScanInclusive(uint idata, uint size){\n  uint sum = idata;\n  \n  for (int STEP = 0; STEP < 5 && (1<<(STEP+1)) <= size; STEP++){\n    bool valid = false;\n    uint temp = shuffleUpNV(sum, 1 << STEP, 32, valid);\n    if (valid) {\n      sum += temp;\n    }\n  }\n\n  return sum;\n}\n\n#else\n\nshared uint s_Data[THREADBLOCK_SIZE * 2];\n\n// Almost the same as naive scan1Inclusive but doesn't need barriers\n// and works only for size <= WARP_SIZE\n\nuint warpScanInclusive(uint idata, uint size){\n  uint pos = 2 * threadIdx.x - (threadIdx.x & (size - 1));\n  s_Data[pos] = 0;\n  pos += size;\n  s_Data[pos] = idata;\n\n  if(size >=  2) s_Data[pos] += s_Data[pos -  1];\n  if(size >=  4) s_Data[pos] += s_Data[pos -  2];\n  if(size >=  8) s_Data[pos] += s_Data[pos -  4];\n  if(size >= 16) s_Data[pos] += s_Data[pos -  8];\n  if(size >= 32) s_Data[pos] += s_Data[pos - 16];\n\n  return s_Data[pos];\n}\n\n#endif\n\nuint warpScanExclusive(uint idata, uint size){\n    return warpScanInclusive(idata, size) - idata;\n}\n\nuint scan1Inclusive(uint idata, uint size){\n  if(size > WARP_SIZE){\n    //Bottom-level inclusive warp scan\n    uint warpResult = warpScanInclusive(idata, WARP_SIZE);\n\n    //Save top elements of each warp for exclusive warp scan\n  #if !GL_NV_shader_thread_shuffle\n    //sync to wait for warp scans to complete (because l_Data is being overwritten)\n    memoryBarrierShared();\n    barrier();\n  #endif\n    if( (threadIdx & (WARP_SIZE - 1)) == (WARP_SIZE - 1) )\n        s_Data[threadIdx >> LOG2_WARP_SIZE] = warpResult;\n\n    //wait for warp scans to complete\n    memoryBarrierShared();\n    barrier();\n    if( threadIdx < (THREADBLOCK_SIZE / WARP_SIZE) ){\n        //grab top warp elements\n        uint val = s_Data[threadIdx];\n        //calculate exclsive scan and write back to shared memory\n        s_Data[threadIdx] = warpScanExclusive(val, size >> LOG2_WARP_SIZE);\n    }\n\n    //return updated warp scans with exclusive scan results\n    memoryBarrierShared();\n    barrier();\n    return warpResult + s_Data[threadIdx >> LOG2_WARP_SIZE];\n  }else{\n    return warpScanInclusive(idata, size);\n  }\n}\n\n#else\n\nshared uint s_Data[THREADBLOCK_SIZE * 2];\n\nuint scan1Inclusive(uint idata, uint size)\n{\n    uint pos = 2 * threadIdx.x - (threadIdx.x & (size - 1));\n    s_Data[pos] = 0;\n    pos += size;\n    s_Data[pos] = idata;\n\n    for (uint offset = 1; offset < size; offset <<= 1)\n    {\n        memoryBarrierShared();\n        barrier();\n        uint t = s_Data[pos] + s_Data[pos - offset];\n        memoryBarrierShared();\n        barrier();\n        s_Data[pos] = t;\n    }\n\n    return s_Data[pos];\n}\n\n#endif\n\nuint scan1Exclusive(uint idata, uint size)\n{\n    return scan1Inclusive(idata, size) - idata;\n}\n\nuvec4 scan4Inclusive(uvec4 idata4, uint size)\n{\n    //Level-0 inclusive scan\n    idata4.y += idata4.x;\n    idata4.z += idata4.y;\n    idata4.w += idata4.z;\n\n    //Level-1 exclusive scan\n    uint oval = scan1Exclusive(idata4.w, size / 4);\n\n    idata4.x += oval;\n    idata4.y += oval;\n    idata4.z += oval;\n    idata4.w += oval;\n\n    return idata4;\n}\n\n//Exclusive vector scan: the array to be scanned is stored\n//in local thread memory scope as uint4\nuvec4 scan4Exclusive(uvec4 idata4, uint size)\n{\n    uvec4 odata4 = scan4Inclusive(idata4, size);\n    odata4.x -= idata4.x;\n    odata4.y -= idata4.y;\n    odata4.z -= idata4.z;\n    odata4.w -= idata4.w;\n    return odata4;\n}\n\n#endif\n\n\n#if TASK == TASK_SUM\n\nlayout (std430, binding=1) buffer inputBuffer {\n  uvec4 indata[];\n};\n\nlayout (std430, binding=0) buffer outputBuffer {\n  uvec4 outdata[];\n};\n\nvoid main()\n{\n  uint idx = gl_GlobalInvocationID.x;\n  uint maxidx = ((numElements + 3) / 4);\n  \n  bool valid = idx < maxidx;\n\n  //Load data\n  uvec4 idata4 = valid ? indata[idx] : uvec4(0);\n\n  // Calculate scan\n  //uvec4 odata4 = scan4Inclusive(idata4, min(BATCH_SIZE,  (maxidx-idx)*4));\n  uvec4 odata4 = scan4Inclusive(idata4, BATCH_SIZE);\n\n  //Write back\n  if (valid) outdata[idx] = odata4;\n}\n#endif\n\n#if TASK == TASK_OFFSETS\n\nlayout (std430, binding=1) buffer inputBuffer {\n  uint indata[];\n};\n\nlayout (std430, binding=0) buffer outputBuffer {\n  uvec4 outdata[];\n};\n\nvoid main()\n{\n  uint idx = gl_GlobalInvocationID.x;\n  uint startIdx = (idx * BATCH_SIZE * 4);\n  \n  bool valid = false;\n  \n  //Load data\n  uvec4 idata4 = uvec4(0);\n  for (uint i = 0; i < 4; i++){\n    uint readIdx = startIdx + (i+1)*BATCH_SIZE - 1u;\n    if ( readIdx < numElements ){\n      idata4[i] = indata[readIdx];\n      valid = true;\n    }\n  }\n\n  //Calculate scan\n  uvec4 odata4 = scan4Inclusive(idata4, BATCH_SIZE);\n\n  //Write back\n  if (valid) outdata[idx] = odata4;\n}\n#endif\n\n#if TASK == TASK_COMBINE\n\nlayout (std430, binding=1) buffer inputBuffer {\n  uint indata[];\n};\n\nlayout (std430, binding=0) buffer outputBuffer {\n  uint outdata[];\n};\n\nvoid main()\n{\n  uint idx = gl_GlobalInvocationID.x;\n  \n  bool valid = idx < numElements;\n  uint batch = idx / BATCH_SIZE;\n  \n  if (valid && batch > 0) {\n    outdata[idx] += indata[batch-1];\n  }\n}\n#endif\n"
  },
  {
    "path": "scansystem.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include \"scansystem.hpp\"\n#include <assert.h>\n\ninline static GLuint snapdiv(GLuint input, GLuint align)\n{\n  return (input + align - 1) / align;\n}\n\nsize_t ScanSystem::getOffsetSize(GLuint elements)\n{\n  GLuint groups = snapdiv(elements,BATCH_ELEMENTS);\n\n  if (groups == 1) return 0;\n\n  GLuint groupcombines = snapdiv(groups,BATCH_ELEMENTS);\n  size_t size = groupcombines*BATCH_ELEMENTS*sizeof(GLuint);\n  \n  if (groupcombines > 1){\n    // add another layer\n    GLuint combines = snapdiv(groupcombines,BATCH_ELEMENTS);\n    size += combines*BATCH_ELEMENTS*sizeof(GLuint);\n  }\n\n  return size;\n}\n\nbool ScanSystem::scanData( GLuint elements, const Buffer& input, const Buffer& output, const Buffer& offsets )\n{\n  assert( (elements % 4) == 0 );\n  assert( elements < (GLuint64)BATCH_ELEMENTS*BATCH_ELEMENTS*BATCH_ELEMENTS);\n  assert( elements * sizeof(GLuint) <= size_t(input.size) );\n  assert( input.size <= output.size );\n\n  glUseProgram(programs.prefixsum);\n  glUniform1ui(0,elements);\n\n  input.BindBufferRange(GL_SHADER_STORAGE_BUFFER,1);\n  output.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0);\n\n  glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);\n\n  GLuint groups = snapdiv(elements,BATCH_ELEMENTS);\n\n  assert(groups <= maxGrpsPrefix);\n  glDispatchCompute(groups,1,1);\n\n  if (groups > 1){\n\n    GLuint groupcombines = snapdiv(groups,BATCH_ELEMENTS);\n\n    assert( groupcombines <= BATCH_ELEMENTS );\n    assert( getOffsetSize(elements) <= size_t(offsets.size));\n        \n    glUseProgram(programs.offsets);\n    glUniform1ui(0,elements);\n\n    output.BindBufferRange(GL_SHADER_STORAGE_BUFFER,  1);\n    offsets.BindBufferRange(GL_SHADER_STORAGE_BUFFER, 0);\n\n    glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);\n\n    assert(groupcombines <= maxGrpsOffsets);\n    glDispatchCompute(groupcombines,1,1);\n\n    if (groupcombines > 1){\n      glUniform1ui(0,groupcombines*BATCH_ELEMENTS);\n\n      Buffer additionaloffsets = offsets; // derive from offsets\n      GLintptr required = groupcombines*BATCH_ELEMENTS*sizeof(GLuint);;\n\n      additionaloffsets.offset += required;\n      additionaloffsets.size = offsets.size - required;\n\n      offsets.BindBufferRange(GL_SHADER_STORAGE_BUFFER,1);\n      additionaloffsets.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0);\n\n      glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);\n\n      glDispatchCompute(1,1,1);\n\n      combineWithOffsets(groupcombines*BATCH_ELEMENTS, offsets, additionaloffsets);\n    }\n  }\n\n  glBindBufferBase(GL_SHADER_STORAGE_BUFFER,0,0);\n  glBindBufferBase(GL_SHADER_STORAGE_BUFFER,0,1);\n  \n  return groups > 1;\n}\n\nvoid ScanSystem::combineWithOffsets(GLuint elements, const Buffer& output, const Buffer& offsets )\n{\n  //assert((elements % 4) == 0);\n  assert(elements * sizeof(GLuint) <= size_t(output.size));\n\n  glUseProgram(programs.combine);\n  glUniform1ui(0,elements);\n\n  offsets.BindBufferRange(GL_SHADER_STORAGE_BUFFER, 1);\n  output.BindBufferRange(GL_SHADER_STORAGE_BUFFER, 0);\n\n  glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);\n\n  GLuint groups = snapdiv(elements,GROUPSIZE);\n  assert(groups < maxGrpsCombine);\n  glDispatchCompute(groups,1,1);\n}\n\nvoid ScanSystem::init( const Programs& progs )\n{\n  update(progs);\n}\n\nvoid ScanSystem::update( const Programs& progs )\n{\n  GLuint    maxGroups[3];\n  glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT,0,(GLint*)&maxGroups[0]);\n\n  //GLuint    groupSize[3];\n  //glGetProgramiv(progs.combine,    GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupSize);\n  maxGrpsCombine = maxGroups[0];\n  //glGetProgramiv(progs.offsets,    GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupSize);\n  maxGrpsOffsets = maxGroups[0];\n  //glGetProgramiv(progs.prefixsum,    GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupSize);\n  maxGrpsPrefix = maxGroups[0];\n\n  programs = progs;\n}\n\nvoid ScanSystem::test()\n{\n  GLuint scanbuffers[3];\n  glCreateBuffers(3,scanbuffers);\n\n  GLuint low  = ScanSystem::BATCH_ELEMENTS/2;\n  GLuint mid  = ScanSystem::BATCH_ELEMENTS*ScanSystem::BATCH_ELEMENTS;\n  GLuint high = ScanSystem::BATCH_ELEMENTS*ScanSystem::BATCH_ELEMENTS*2;\n  size_t offsize = ScanSystem::getOffsetSize(high);\n\n  GLuint* data = new GLuint[high];\n  for (GLuint i = 0; i < high; i++){\n    data[i] = 1;\n  }\n\n  glNamedBufferStorage(scanbuffers[0], high * sizeof(GLuint), &data[0], 0 );\n  glNamedBufferStorage(scanbuffers[1], high * sizeof(GLuint),0, GL_MAP_READ_BIT );\n  glNamedBufferStorage(scanbuffers[2], offsize,0,GL_MAP_READ_BIT);\n\n  delete [] data;\n\n  GLuint result;\n  bool needcombine;\n\n  // low\n  needcombine = scanData(low, scanbuffers[0], scanbuffers[1], scanbuffers[2]);\n  assert(needcombine == false);\n  result = 0;\n  glGetNamedBufferSubData(scanbuffers[1],sizeof(GLuint) * (low-1), sizeof(GLuint), &result);\n  assert(result == low);\n\n  // med\n  needcombine = scanData(mid, scanbuffers[0], scanbuffers[1], scanbuffers[2]);\n  assert(needcombine == true);\n  result = 0;\n  glGetNamedBufferSubData(scanbuffers[2],sizeof(GLuint) * (ScanSystem::BATCH_ELEMENTS-1), sizeof(GLuint), &result);\n  assert(result == mid);\n\n  combineWithOffsets(mid, scanbuffers[1], scanbuffers[2]);\n  result = 0;\n  glGetNamedBufferSubData(scanbuffers[1],sizeof(GLuint) * (mid-1), sizeof(GLuint), &result);\n  assert(result == mid);\n\n  // high\n  needcombine = scanData(high, scanbuffers[0], scanbuffers[1], scanbuffers[2]);\n  assert(needcombine == true);\n  combineWithOffsets(high, scanbuffers[1], scanbuffers[2]);\n  result = 0;\n  glGetNamedBufferSubData(scanbuffers[1],sizeof(GLuint) * (high-1), sizeof(GLuint), &result);\n  assert(result == high);\n\n  glDeleteBuffers(3,scanbuffers);\n}\n\n"
  },
  {
    "path": "scansystem.hpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#ifndef SCANSYSTEM_H__\n#define SCANSYSTEM_H__\n\n#include <nvgl/extensions_gl.hpp>\n#include <cstddef>\n\nclass ScanSystem {\npublic:\n  const static size_t GROUPSIZE = 512;\n  const static size_t BATCH_ELEMENTS = GROUPSIZE*4;\n\n  struct Programs {\n    GLuint prefixsum;\n    GLuint offsets;\n    GLuint combine;\n  };\n\n  struct Buffer {\n    GLuint      buffer;\n    GLintptr    offset;\n    GLsizeiptr  size;\n\n    void create(size_t sizei, const void* data, GLbitfield flags)\n    {\n      size = sizei;\n      offset = 0;\n      glCreateBuffers(1,&buffer);\n      glNamedBufferStorage(buffer, size, data, flags);\n    }\n\n    Buffer(GLuint buffer)\n      : buffer(buffer)\n      , offset(0)\n    {\n      if (sizeof(GLsizeiptr) > 4)\n        glGetNamedBufferParameteri64v(buffer,GL_BUFFER_SIZE, (GLint64*)&size);\n      else\n        glGetNamedBufferParameteriv(buffer, GL_BUFFER_SIZE, (GLint*)&size);\n    }\n\n    Buffer()\n      : buffer(0)\n      , offset(0)\n      , size(0)\n    {\n\n    }\n\n    inline void BindBufferRange(GLenum target, GLuint index) const {\n      glBindBufferRange(target, index, buffer, offset, size);\n    }\n    inline void BindBufferRange(GLenum target, GLuint index, GLintptr offseta, GLsizeiptr sizea) const {\n      glBindBufferRange(target, index, buffer, offset+offseta, size+sizea);\n    }\n\n    inline void GetNamedBufferSubData(void* data){\n      glGetNamedBufferSubData(buffer,offset,size,data);\n    }\n\n  };\n\n  void init(const Programs& progs);\n  void update(const Programs& progs);\n\n  void test();\n\n  // returns true if offsets are needed\n  // the offset value needs to be added using the BATCH_ELEMENTS\n  bool scanData( GLuint elements, const Buffer& input, const Buffer& output, const Buffer& offsets);\n  void combineWithOffsets(GLuint elements, const Buffer& output, const Buffer& offsets);\n\n  static size_t getOffsetSize(GLuint elements);\n\npublic:\n  Programs    programs;\n\n  GLuint      maxGrpsPrefix;\n  GLuint      maxGrpsOffsets;\n  GLuint      maxGrpsCombine;\n };\n\n#endif\n \n"
  },
  {
    "path": "scene.frag.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 430\n/**/\n\n#extension GL_ARB_shading_language_include : enable\n#include \"common.h\"\n\n// must match cadscene\nstruct Side {\n  vec4 ambient;\n  vec4 diffuse;\n  vec4 specular;\n  vec4 emissive;\n};\n\nstruct Material {\n  Side  sides[2];\n  Side  _pad[2];\n};\n\nlayout(std140,binding=UBO_MATERIAL) uniform materialBuffer {\n#if USE_INDEXING\n  Material  materials[256];\n#else\n  Material  materials[1];\n#endif\n};\n\n\nin Interpolants {\n  vec3 wPos;\n  vec3 wNormal;\n#if USE_INDEXING\n  flat ivec2 assigns;\n#endif\n#if !defined(WIREMODE)\n  flat int wireMode;\n#endif\n} IN;\n\n\n#if !defined(WIREMODE)\nint wireMode = IN.wireMode;\n#else\nint wireMode = WIREMODE;\n#endif\n\nlayout(location=0,index=0) out vec4 out_Color;\n\nvec4 shade(const Side side)\n{\n  vec4 color = side.ambient + side.emissive;\n  \n  vec3 eyePos = vec3(scene.viewMatrixIT[0].w,scene.viewMatrixIT[1].w,scene.viewMatrixIT[2].w);\n\n  vec3 lightDir = normalize( scene.wLightPos.xyz - IN.wPos);\n  vec3 viewDir  = normalize( eyePos - IN.wPos);\n  vec3 halfDir  = normalize(lightDir + viewDir);\n  vec3 normal   = normalize(IN.wNormal) * (gl_FrontFacing ? 1 : -1);\n  \n  color += side.diffuse * max(dot(normal,lightDir),0);\n  color += side.specular * pow(max(0,dot(normal,halfDir)),16);\n  \n  return color;\n}\n\nvoid main()\n{\n  int mi = 0;\n#if USE_INDEXING\n  mi = IN.assigns.y;\n#endif\n\n  out_Color = shade(materials[mi].sides[gl_FrontFacing ? 1 : 0]);\n\n  if (wireMode != 0){\n    out_Color = materials[mi].sides[0].diffuse*1.5 + 0.3;\n  }\n}\n"
  },
  {
    "path": "scene.vert.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 430\n/**/\n\n#extension GL_ARB_shading_language_include : enable\n#include \"common.h\"\n\n#if USE_INDEXING && USE_BASEINSTANCE\n#extension GL_ARB_shader_draw_parameters : require\n#endif\nin layout(location=VERTEX_POS)      vec3 pos;\nin layout(location=VERTEX_NORMAL)   vec3 normal;\n\n#if USE_INDEXING\n#if USE_BASEINSTANCE\nivec2 assigns = ivec2( gl_BaseInstanceARB & 0xFFFFF, gl_BaseInstanceARB >> 20);\n#else\nin layout(location=VERTEX_ASSIGNS)  ivec2 assigns;\n#endif\n#define matrixIndex assigns.x\n#endif\n\n#if !defined(WIREMODE)\nin layout(location=VERTEX_WIREMODE) int wireMode;\n#endif\n\nout Interpolants {\n  vec3 wPos;\n  vec3 wNormal;\n#if USE_INDEXING\n  flat ivec2 assigns;\n#endif\n#if !defined(WIREMODE)\n  flat int wireMode;\n#endif\n} OUT;\n\n\n\nvoid main()\n{\n#if USE_INDEXING || USE_MIX\n  vec3 wPos     = (getIndexedMatrix(matrixIndex, NODE_MATRIX_WORLD)   * vec4(pos,1)).xyz;\n  vec3 wNormal  = mat3(getIndexedMatrix(matrixIndex, NODE_MATRIX_WORLDIT)) * normal;\n#else\n  vec3 wPos     = (object.worldMatrix   * vec4(pos,1)).xyz;\n  vec3 wNormal  = mat3(object.worldMatrixIT) * normal;\n#endif\n  gl_Position   = scene.viewProjMatrix * vec4(wPos,1);\n  OUT.wPos = wPos;\n  OUT.wNormal = wNormal;\n#if USE_INDEXING\n  OUT.assigns = assigns;\n#endif\n#if !defined(WIREMODE)\n  OUT.wireMode = wireMode;\n#endif\n}\n"
  },
  {
    "path": "statesystem.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include \"statesystem.hpp\"\n#include <string.h> // memcmp\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::ClipDistanceState::applyGL() const\n{\n  for (GLuint i = 0; i < MAX_CLIPPLANES; i++){\n    if (isBitSet(enabled,i))  glEnable  (GL_CLIP_DISTANCE0 + i);\n    else                      glDisable (GL_CLIP_DISTANCE0 + i);\n  }\n}\n\nvoid StateSystem::ClipDistanceState::getGL()\n{\n  enabled = 0;\n  for (GLuint i = 0; i < MAX_CLIPPLANES; i++){\n    setBitState(enabled,i,glIsEnabled(GL_CLIP_DISTANCE0 + i));\n  }\n}\n\n//////////////////////////////////////////////////////////////////////////\n\n#if STATESYSTEM_USE_DEPRECATED\nvoid StateSystem::AlphaStateDepr::applyGL() const\n{\n  glAlphaFunc(mode,refvalue);\n}\n\nvoid StateSystem::AlphaStateDepr::getGL()\n{\n  glGetIntegerv(GL_ALPHA_TEST_FUNC,(GLint*)&mode);\n  glGetFloatv(GL_ALPHA_TEST_REF, &refvalue);\n}\n#endif\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::StencilState::applyGL() const\n{\n  glStencilFuncSeparate(GL_FRONT, funcs[FACE_FRONT].func, funcs[FACE_FRONT].refvalue, funcs[FACE_FRONT].mask);\n  glStencilFuncSeparate(GL_BACK,  funcs[FACE_BACK ].func, funcs[FACE_BACK ].refvalue, funcs[FACE_BACK ].mask);\n  glStencilOpSeparate(GL_FRONT,   ops[FACE_FRONT].fail,   ops[FACE_FRONT].zfail,      ops[FACE_FRONT].zpass);\n  glStencilOpSeparate(GL_BACK,    ops[FACE_BACK ].fail,   ops[FACE_BACK ].zfail,      ops[FACE_BACK ].zpass);\n}\n\nvoid StateSystem::StencilState::getGL()\n{\n  glGetIntegerv(GL_STENCIL_FUNC,        (GLint*)&funcs[FACE_FRONT].func);\n  glGetIntegerv(GL_STENCIL_REF,         (GLint*)&funcs[FACE_FRONT].refvalue);\n  glGetIntegerv(GL_STENCIL_VALUE_MASK,  (GLint*)&funcs[FACE_FRONT].mask);\n\n  glGetIntegerv(GL_STENCIL_BACK_FUNC,         (GLint*)&funcs[FACE_BACK].func);\n  glGetIntegerv(GL_STENCIL_BACK_REF,          (GLint*)&funcs[FACE_BACK].refvalue);\n  glGetIntegerv(GL_STENCIL_BACK_VALUE_MASK,   (GLint*)&funcs[FACE_BACK].mask);\n\n  glGetIntegerv(GL_STENCIL_FAIL,              (GLint*)&ops[FACE_FRONT].fail);\n  glGetIntegerv(GL_STENCIL_PASS_DEPTH_FAIL,   (GLint*)&ops[FACE_FRONT].zfail);\n  glGetIntegerv(GL_STENCIL_PASS_DEPTH_PASS,   (GLint*)&ops[FACE_FRONT].zpass);\n\n  glGetIntegerv(GL_STENCIL_BACK_FAIL,             (GLint*)&ops[FACE_BACK].fail);\n  glGetIntegerv(GL_STENCIL_BACK_PASS_DEPTH_FAIL,  (GLint*)&ops[FACE_BACK].zfail);\n  glGetIntegerv(GL_STENCIL_BACK_PASS_DEPTH_PASS,  (GLint*)&ops[FACE_BACK].zpass);\n}\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::BlendState::applyGL() const\n{\n  if (separateEnable){\n    for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){\n      if (isBitSet(separateEnable,i)) glEnablei(GL_BLEND,i);\n      else                            glDisablei(GL_BLEND,i);\n    }\n  }\n\n  if (useSeparate){\n    for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){\n      glBlendFuncSeparatei(i,blends[i].rgb.srcw,blends[i].rgb.dstw,blends[i].alpha.srcw,blends[i].alpha.dstw);\n      glBlendEquationSeparatei(i,blends[i].rgb.equ,blends[i].alpha.equ);\n    }\n  }\n  else{\n    glBlendFuncSeparate(blends[0].rgb.srcw,blends[0].rgb.dstw,blends[0].alpha.srcw,blends[0].alpha.dstw);\n    glBlendEquationSeparate(blends[0].rgb.equ,blends[0].alpha.equ);\n  }\n\n  //glBlendColor(color[0],color[1],color[2],color[3]);\n}\n\nvoid StateSystem::BlendState::getGL()\n{\n  GLuint stateSet = 0;\n  separateEnable = 0;\n  for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){\n    if (setBitState(separateEnable,i, glIsEnabledi( GL_BLEND, i))) stateSet++;\n  }\n  if (stateSet == MAX_DRAWBUFFERS){\n    separateEnable = 0;\n  }\n\n  GLuint numEqual = 1;\n  for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){\n    glGetIntegeri_v(GL_BLEND_SRC_RGB,i,(GLint*)&blends[i].rgb.srcw);\n    glGetIntegeri_v(GL_BLEND_DST_RGB,i,(GLint*)&blends[i].rgb.dstw);\n    glGetIntegeri_v(GL_BLEND_EQUATION_RGB,i,(GLint*)&blends[i].rgb.equ);\n\n    glGetIntegeri_v(GL_BLEND_SRC_ALPHA,i,(GLint*)&blends[i].alpha.srcw);\n    glGetIntegeri_v(GL_BLEND_DST_ALPHA,i,(GLint*)&blends[i].alpha.dstw);\n    glGetIntegeri_v(GL_BLEND_EQUATION_ALPHA,i,(GLint*)&blends[i].alpha.equ);\n\n    if (i > 1 && memcmp(&blends[i].rgb,&blends[i-1].rgb,sizeof(blends[i].rgb))==0 && memcmp(&blends[i].alpha,&blends[i-1].alpha,sizeof(blends[i].alpha))==0){\n      numEqual++;\n    }\n  }\n\n  useSeparate = numEqual != MAX_DRAWBUFFERS;\n\n  //glGetFloatv(GL_BLEND_COLOR,color);\n}\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::DepthState::applyGL() const\n{\n  glDepthFunc(func);\n}\n\nvoid StateSystem::DepthState::getGL()\n{\n  glGetIntegerv(GL_DEPTH_FUNC,(GLint*)&func);\n}\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::LogicState::applyGL() const\n{\n  glLogicOp(op);\n}\n\nvoid StateSystem::LogicState::getGL()\n{\n  glGetIntegerv(GL_LOGIC_OP_MODE,(GLint*)&op);\n}\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::RasterState::applyGL() const\n{\n  //glFrontFace(frontFace);\n  glCullFace(cullFace);\n  //glPolygonOffset(polyOffsetFactor,polyOffsetUnits);\n  glPolygonMode(GL_FRONT_AND_BACK,polyMode);\n  //glLineWidth(lineWidth);\n  glPointSize(pointSize);\n  glPointParameterf(GL_POINT_FADE_THRESHOLD_SIZE,pointFade);\n  glPointParameteri(GL_POINT_SPRITE_COORD_ORIGIN,pointSpriteOrigin);\n}\n\nvoid StateSystem::RasterState::getGL()\n{\n  //glGetIntegerv(GL_FRONT_FACE, (GLint*)&frontFace);\n  glGetIntegerv(GL_CULL_FACE_MODE, (GLint*)&cullFace);\n  //glGetFloatv(GL_POLYGON_OFFSET_FACTOR,&polyOffsetFactor);\n  //glGetFloatv(GL_POLYGON_OFFSET_UNITS,&polyOffsetUnits);\n  //glGetFloatv(GL_LINE_WIDTH,&lineWidth);\n  glGetFloatv(GL_POINT_SIZE,&pointSize);\n  glGetFloatv(GL_POINT_FADE_THRESHOLD_SIZE,&pointFade);\n  glGetIntegerv(GL_POINT_SPRITE_COORD_ORIGIN,(GLint*)&pointSpriteOrigin);\n}\n\n//////////////////////////////////////////////////////////////////////////\n\n#if STATESYSTEM_USE_DEPRECATED\nvoid StateSystem::RasterStateDepr::applyGL() const\n{\n  glLineStipple(lineStippleFactor,lineStipplePattern);\n  glShadeModel(shadeModel);\n}\n\nvoid StateSystem::RasterStateDepr::getGL()\n{\n  GLint pattern;\n  glGetIntegerv(GL_LINE_STIPPLE_PATTERN,&pattern);\n  lineStipplePattern = pattern;\n  glGetIntegerv(GL_LINE_STIPPLE_REPEAT,(GLint*)&lineStippleFactor);\n  glGetIntegerv(GL_SHADE_MODEL,(GLint*)&shadeModel);\n}\n#endif\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::PrimitiveState::applyGL() const\n{\n  glPrimitiveRestartIndex(restartIndex);\n  glProvokingVertex(provokingVertex);\n  glPatchParameteri(GL_PATCH_VERTICES,patchVertices);\n}\n\nvoid StateSystem::PrimitiveState::getGL()\n{\n  glGetIntegerv(GL_PRIMITIVE_RESTART_INDEX, (GLint*)&restartIndex);\n  glGetIntegerv(GL_PROVOKING_VERTEX, (GLint*)&provokingVertex);\n  glGetIntegerv(GL_PATCH_VERTICES, (GLint*)&patchVertices);\n}\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::SampleState::applyGL() const\n{\n  glSampleCoverage(coverage,invert);\n  glSampleMaski(0,mask);\n}\n\nvoid StateSystem::SampleState::getGL()\n{\n  glGetIntegerv(GL_SAMPLE_COVERAGE_VALUE,(GLint*)&coverage);\n  glGetIntegerv(GL_SAMPLE_COVERAGE_INVERT,(GLint*)&invert);\n  glGetIntegeri_v(GL_SAMPLE_MASK_VALUE,0,(GLint*)&mask);\n}\n\n//////////////////////////////////////////////////////////////////////////\n/*\nvoid StateSystem::ViewportState::applyGL() const\n{\n  if (useSeparate){\n    glViewportArrayv(0,MAX_VIEWPORTS, &viewports[0].x);\n  }\n  else{\n    glViewport(GLint(viewports[0].x),GLint(viewports[0].y),GLsizei(viewports[0].width),GLsizei(viewports[0].height));\n  }\n}\n\nvoid StateSystem::ViewportState::getGL()\n{\n  int numEqual = 1;\n  for (GLuint i = 0; i < MAX_VIEWPORTS; i++){\n    glGetFloati_v(GL_VIEWPORT,i,&viewports[i].x);\n    if (i > 0 && memcmp(&viewports[i],&viewports[i-1],sizeof(viewports[i]))==0){\n      numEqual++;\n    }\n  }\n  \n  useSeparate = (numEqual != MAX_VIEWPORTS);\n}\n*/\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::DepthRangeState::applyGL() const\n{\n  if (useSeparate){\n    glDepthRangeArrayv(0,MAX_VIEWPORTS, &depths[0].nearPlane);\n  }\n  else{\n    glDepthRange(depths[0].nearPlane,depths[0].farPlane);\n  }\n}\n\nvoid StateSystem::DepthRangeState::getGL()\n{\n  GLuint numEqual = 1;\n  for (GLuint i = 0; i < MAX_VIEWPORTS; i++){\n    glGetDoublei_v(GL_DEPTH_RANGE,i,&depths[i].nearPlane);\n    if (i > 0 && memcmp(&depths[i],&depths[i-1],sizeof(depths[i]))==0){\n      numEqual++;\n    }\n  }\n\n  useSeparate = (numEqual != MAX_VIEWPORTS);\n}\n\n//////////////////////////////////////////////////////////////////////////\n/*\nvoid StateSystem::ScissorState::applyGL() const\n{\n  if (useSeparate){\n    glScissorArrayv(0,MAX_VIEWPORTS, &scissor[0].x);\n  }\n  else{\n    glScissor(scissor[0].x,scissor[0].y,scissor[0].width,scissor[0].height);\n  }\n}\n\nvoid StateSystem::ScissorState::getGL()\n{\n  GLuint numEqual = 1;\n  for (GLuint i = 0; i < MAX_VIEWPORTS; i++){\n    glGetIntegeri_v(GL_SCISSOR_BOX,i,&scissor[i].x);\n    if (i > 0 && memcmp(&scissor[i],&scissor[i-1],sizeof(scissor[i]))==0){\n      numEqual++;\n    }\n  }\n\n  useSeparate = (numEqual != MAX_VIEWPORTS);\n}\n*/\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::ScissorEnableState::applyGL() const\n{\n  if (separateEnable){\n    for (GLuint i = 0; i < MAX_VIEWPORTS; i++){\n      if (isBitSet(separateEnable,i))  glEnablei (GL_SCISSOR_TEST,i);\n      else                                    glDisablei(GL_SCISSOR_TEST,i);\n    }\n  }\n\n}\n\nvoid StateSystem::ScissorEnableState::getGL()\n{\n  GLuint stateSet = 0;\n  separateEnable = 0;\n  for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){\n    if (setBitState(separateEnable,i, glIsEnabledi( GL_BLEND, i))) stateSet++;\n  }\n  if (stateSet == MAX_DRAWBUFFERS){\n    separateEnable = 0;\n  }\n}\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::MaskState::applyGL() const\n{\n  if (colormaskUseSeparate){\n    for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){\n      glColorMaski(i, colormask[i][0],colormask[i][1],colormask[i][2],colormask[i][3]);\n    }\n  }\n  else{\n    glColorMask( colormask[0][0],colormask[0][1],colormask[0][2],colormask[0][3] );\n  }\n  glDepthMask(depth);\n  glStencilMaskSeparate(GL_FRONT, stencil[FACE_FRONT]);\n  glStencilMaskSeparate(GL_BACK,  stencil[FACE_BACK]);\n}\n\nvoid StateSystem::MaskState::getGL()\n{\n  glGetBooleanv(GL_DEPTH_WRITEMASK,&depth);\n  glGetIntegerv(GL_STENCIL_WRITEMASK, (GLint*)&stencil[FACE_FRONT]);\n  glGetIntegerv(GL_STENCIL_BACK_WRITEMASK, (GLint*)&stencil[FACE_BACK]);\n\n  int numEqual = 1;\n  for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){\n    glGetBooleani_v(GL_COLOR_WRITEMASK, i, colormask[i]);\n\n    if ( i > 0 && memcmp(colormask[i],colormask[i-1],sizeof(colormask[i]))==0){\n      numEqual++;\n    }\n  }\n\n  colormaskUseSeparate = numEqual != MAX_DRAWBUFFERS;\n}\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::FBOState::applyGL(bool skipFboBinding) const\n{\n  if (!skipFboBinding){\n    glBindFramebuffer(GL_DRAW_FRAMEBUFFER,fboDraw);\n    glBindFramebuffer(GL_READ_FRAMEBUFFER,fboRead);\n  }\n  glDrawBuffers(numBuffers,drawBuffers);\n  glReadBuffer(readBuffer);\n}\n\nvoid StateSystem::FBOState::getGL()\n{\n  glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING,(GLint*)&fboDraw);\n  glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING,(GLint*)&fboRead);\n\n  glGetIntegerv(GL_READ_BUFFER,(GLint*)&readBuffer);\n\n  for (int i = 0; i < MAX_DRAWBUFFERS; i++){\n    glGetIntegerv(GL_DRAW_BUFFER0 + i,(GLint*)&drawBuffers[i]);\n    if (drawBuffers[i] != GL_NONE){\n      numBuffers = i+1;\n    }\n  }\n}\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::VertexEnableState::applyGL(GLbitfield changed) const\n{\n  for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){\n    if (isBitSet(changed,i)){\n      if (isBitSet(enabled,i))  glEnableVertexAttribArray(i);\n      else                      glDisableVertexAttribArray(i);\n    }\n  }\n}\n\nvoid StateSystem::VertexEnableState::getGL()\n{\n  enabled = 0;\n  for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){\n    GLint status;\n    glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_ARRAY_ENABLED, (GLint*)&status);\n    setBitState(enabled,i, status);\n  }\n}\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::VertexFormatState::applyGL(GLbitfield changedFormat, GLbitfield changedBinding) const\n{\n  for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){\n    if (!isBitSet(changedFormat,i)) continue;\n\n    switch(formats[i].mode){\n    case VERTEXMODE_FLOAT:\n      glVertexAttribFormat(i, formats[i].size, formats[i].type, formats[i].normalized, formats[i].relativeoffset);\n      break;\n    case VERTEXMODE_INT:\n    case VERTEXMODE_UINT:\n      glVertexAttribIFormat(i, formats[i].size, formats[i].type, formats[i].relativeoffset);\n      break;\n    }\n    glVertexAttribBinding(i,formats[i].binding);\n  }\n\n  for (GLuint i = 0; i < MAX_VERTEXBINDINGS; i++){\n    if (!isBitSet(changedBinding,i)) continue;\n\n    glVertexBindingDivisor(i,bindings[i].divisor);\n    glBindVertexBuffer(i,0,0,bindings[i].stride);\n  }\n}\n\nvoid StateSystem::VertexFormatState::getGL()\n{\n  for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){\n    GLint status = 0;\n    glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_RELATIVE_OFFSET, (GLint*)&formats[i].relativeoffset);\n    glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_ARRAY_SIZE, (GLint*)&formats[i].size);\n    glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_ARRAY_TYPE, (GLint*)&formats[i].type);\n    glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_ARRAY_NORMALIZED, (GLint*)&status);\n    formats[i].normalized = status;\n    glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_ARRAY_INTEGER, (GLint*)&status);\n    if (status){\n      formats[i].mode = VERTEXMODE_INT;\n    }\n    else{\n      formats[i].mode = VERTEXMODE_FLOAT;\n    }\n    glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_BINDING, (GLint*)&formats[i].binding);\n  }\n\n  for (GLuint i = 0; i < MAX_VERTEXBINDINGS; i++){\n    glGetIntegeri_v(GL_VERTEX_BINDING_DIVISOR,i,(GLint*)&bindings[i].divisor);\n    glGetIntegeri_v(GL_VERTEX_BINDING_STRIDE, i,(GLint*)&bindings[i].stride);\n  }\n}\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::VertexImmediateState::applyGL(GLbitfield changed) const\n{\n  for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){\n    if (!isBitSet(changed,i)) continue;\n\n    switch(data[i].mode){\n    case VERTEXMODE_FLOAT:\n      glVertexAttrib4fv(i,data[i].floats);\n      break;\n    case VERTEXMODE_INT:\n      glVertexAttribI4iv(i,data[i].ints);\n      break;\n    case VERTEXMODE_UINT:\n      glVertexAttribI4uiv(i,data[i].uints);\n      break;\n    }\n  }\n}\n\nvoid StateSystem::VertexImmediateState::getGL()\n{\n  for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){\n    switch(data[i].mode){\n    case VERTEXMODE_FLOAT:\n      glGetVertexAttribfv(i,GL_CURRENT_VERTEX_ATTRIB,data[i].floats);\n      break;\n    case VERTEXMODE_INT:\n      glGetVertexAttribIiv(i,GL_CURRENT_VERTEX_ATTRIB,data[i].ints);\n      break;\n    case VERTEXMODE_UINT:\n      glGetVertexAttribIuiv(i,GL_CURRENT_VERTEX_ATTRIB,data[i].uints);\n      break;\n    }\n  }\n}\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::ProgramState::applyGL() const\n{\n  glUseProgram(program);\n}\n\nvoid StateSystem::ProgramState::getGL()\n{\n  glGetIntegerv(GL_CURRENT_PROGRAM, (GLint*)&program);\n}\n\n//////////////////////////////////////////////////////////////////////////\n\n// keep in sync!\nstatic GLenum s_stateEnums[StateSystem::NUM_STATEBITS] = {\n  GL_BLEND,\n  GL_COLOR_LOGIC_OP,\n  GL_CULL_FACE,\n  GL_DEPTH_CLAMP,\n  GL_DEPTH_TEST,\n  GL_DITHER,\n  GL_FRAMEBUFFER_SRGB,\n  GL_LINE_SMOOTH,\n  GL_MULTISAMPLE,\n  GL_POLYGON_OFFSET_FILL,\n  GL_POLYGON_OFFSET_LINE,\n  GL_POLYGON_OFFSET_POINT,\n  GL_POLYGON_SMOOTH,\n  GL_PRIMITIVE_RESTART,\n  GL_PRIMITIVE_RESTART_FIXED_INDEX,\n  GL_RASTERIZER_DISCARD,\n  GL_SAMPLE_ALPHA_TO_COVERAGE,\n  GL_SAMPLE_ALPHA_TO_ONE,\n  GL_SAMPLE_COVERAGE,\n  GL_SAMPLE_SHADING,\n  GL_SAMPLE_MASK,\n  GL_STENCIL_TEST,\n  GL_SCISSOR_TEST,\n  GL_TEXTURE_CUBE_MAP_SEAMLESS,\n  GL_PROGRAM_POINT_SIZE,\n};\n\nvoid StateSystem::EnableState::applyGL(GLbitfield changedBits) const\n{\n  for (GLuint i = 0; i < NUM_STATEBITS; i++){\n    if (isBitSet(changedBits,i)){\n      if (isBitSet(stateBits,i))  glEnable  (s_stateEnums[i]);\n      else                        glDisable (s_stateEnums[i]);\n    }\n  }\n}\n\nvoid StateSystem::EnableState::getGL()\n{\n  for (GLuint i = 0; i < NUM_STATEBITS; i++){\n    setBitState(stateBits,i, glIsEnabled(s_stateEnums[i]));\n  }\n}\n\n//////////////////////////////////////////////////////////////////////////\n#if STATESYSTEM_USE_DEPRECATED\nstatic GLenum s_stateEnumsDepr[StateSystem::NUM_STATEBITSDEPR] = {\n  GL_ALPHA_TEST,\n  GL_LINE_STIPPLE,\n  GL_POINT_SMOOTH,\n  GL_POINT_SPRITE,\n  GL_POLYGON_STIPPLE,\n};\n\nvoid StateSystem::EnableStateDepr::applyGL(GLbitfield changedBits) const\n{\n  for (GLuint i = 0; i < NUM_STATEBITSDEPR; i++){\n    if (isBitSet(changedBits,i)){\n      if (isBitSet(stateBitsDepr,i))  glEnable  (s_stateEnumsDepr[i]);\n      else                            glDisable (s_stateEnumsDepr[i]);\n    }\n  }\n}\n\nvoid StateSystem::EnableStateDepr::getGL()\n{\n  for (GLuint i = 0; i < NUM_STATEBITSDEPR; i++){\n    setBitState(stateBitsDepr,i, glIsEnabled(s_stateEnumsDepr[i]));\n  }\n}\n#endif\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::State::applyGL(bool coreonly, bool skipFboBinding) const\n{\n  enable.applyGL();\n#if STATESYSTEM_USE_DEPRECATED\n  if (!coreonly) enableDepr.applyGL();\n#endif\n  program.applyGL();\n  clip.applyGL();\n#if STATESYSTEM_USE_DEPRECATED\n  if (!coreonly) alpha.applyGL();\n#endif\n  blend.applyGL();\n  depth.applyGL();\n  stencil.applyGL();\n  logic.applyGL();\n  primitive.applyGL();\n  sample.applyGL();\n  raster.applyGL();\n#if STATESYSTEM_USE_DEPRECATED\n  if (!coreonly) rasterDepr.applyGL();\n#endif\n  /*if (!isBitSet(dynamicState,DYNAMIC_VIEWPORT)){\n    viewport.applyGL();\n  }*/\n  depthrange.applyGL();\n  /*if (!isBitSet(dynamicState,DYNAMIC_SCISSOR)){\n    scissor.applyGL();\n  }*/\n  scissorenable.applyGL();\n  mask.applyGL();\n  fbo.applyGL(skipFboBinding);\n  vertexenable.applyGL();\n  vertexformat.applyGL();\n  verteximm.applyGL();\n}\n\nvoid StateSystem::State::getGL(bool coreonly)\n{\n  enable.getGL();\n#if STATESYSTEM_USE_DEPRECATED\n  if (!coreonly) enableDepr.applyGL();\n#endif\n  program.getGL();\n  clip.getGL();\n#if STATESYSTEM_USE_DEPRECATED\n  if (!coreonly) alpha.applyGL();\n#endif\n  blend.getGL();\n  depth.getGL();\n  stencil.getGL();\n  logic.getGL();\n  primitive.getGL();\n  sample.getGL();\n  raster.getGL();\n#if STATESYSTEM_USE_DEPRECATED\n  if (!coreonly) rasterDepr.applyGL();\n#endif\n  //viewport.getGL();\n  depthrange.getGL();\n  //scissor.getGL();\n  scissorenable.getGL();\n  mask.getGL();\n  fbo.getGL();\n  vertexenable.getGL();\n  vertexformat.getGL();\n  verteximm.getGL();\n}\n\n\n//////////////////////////////////////////////////////////////////////////\n\nvoid StateSystem::init(bool coreonly)\n{\n  m_coreonly = coreonly;\n}\n\nvoid StateSystem::deinit()\n{\n  m_states.resize(0);\n  m_freeIDs.resize(0);\n}\n\nvoid StateSystem::generate( GLuint num, StateID* objects )\n{\n\n  GLuint i;\n  for ( i = 0; i < num && !m_freeIDs.empty(); i++){\n    objects[i] = m_freeIDs.back();\n    m_freeIDs.pop_back();\n  }\n\n  GLuint begin = GLuint(m_states.size());\n\n  if ( i < num){\n    m_states.resize( begin + num - i);\n  }\n\n  for ( i = i; i < num; i++){\n    objects[i] = begin + i;\n  }\n}\n\nvoid StateSystem::destroy( GLuint num, const StateID* objects )\n{\n  for (GLuint i = 0; i < num; i++){\n    m_freeIDs.push_back(objects[i]);\n  }\n}\n\nvoid StateSystem::set( StateID id, const State& state, GLenum basePrimitiveMode )\n{\n  StateInternal& intstate   = m_states[id];\n  intstate.changeID++;\n  intstate.state = state;\n  intstate.state.basePrimitiveMode = basePrimitiveMode;\n\n  intstate.usedDiff = 0;\n  for (int i = 0; i < MAX_DIFFS; i++){\n    intstate.others[i].state = INVALID_ID;\n  }\n}\n\nconst StateSystem::State& StateSystem::get( StateID id ) const\n{\n  return m_states[id].state;\n}\n\nint inline StateSystem::prepareTransitionCache(StateID prev, StateInternal& to )\n{\n  StateInternal& from = m_states[prev];\n\n  int index = -1;\n\n  for (int i = 0; i < MAX_DIFFS; i++){\n    if ( to.others[i].state == prev && to.others[i].changeID == from.changeID) {\n      index = i;\n      break;\n    }\n  }\n\n  if (index < 0){\n    index = to.usedDiff;\n    to.usedDiff = (to.usedDiff + 1) % MAX_DIFFS;\n\n    to.others[index].state = prev;\n    to.others[index].changeID = from.changeID;\n\n    makeDiff(to.diffs[index], from, to);\n  }\n\n  return index;\n}\n\nvoid StateSystem::applyGL( StateID id, bool skipFboBinding ) const\n{\n  m_states[id].state.applyGL( m_coreonly, skipFboBinding );\n}\n\nvoid StateSystem::applyGL( StateID id, StateID prev, bool skipFboBinding )\n{\n  StateInternal& to   = m_states[id];\n\n  if (prev == INVALID_ID){\n    applyGL(id, skipFboBinding);\n    return;\n  }\n\n  int index = prepareTransitionCache(prev, to);\n  applyDiffGL( to.diffs[index], to.state, skipFboBinding );\n\n}\n\nvoid StateSystem::applyDiffGL( const StateDiff& diff, const State &state, bool skipFboBinding )\n{\n  if (isBitSet(diff.changedContentBits,StateDiff::ENABLE))\n    state.enable.applyGL(diff.changedStateBits);\n#if STATESYSTEM_USE_DEPRECATED\n  if (!m_coreonly && isBitSet(diff.changedContentBits,StateDiff::ENABLE_DEPR))\n    state.enableDepr.applyGL(diff.changedStateDeprBits);\n#endif\n  if (isBitSet(diff.changedContentBits,StateDiff::PROGRAM))\n    state.program.applyGL();\n  if (isBitSet(diff.changedContentBits,StateDiff::CLIP))\n    state.clip.applyGL();\n#if STATESYSTEM_USE_DEPRECATED\n  if (!m_coreonly && isBitSet(diff.changedContentBits,StateDiff::ALPHA_DEPR))\n    state.alpha.applyGL();\n#endif\n  if (isBitSet(diff.changedContentBits,StateDiff::BLEND))\n    state.blend.applyGL();\n  if (isBitSet(diff.changedContentBits,StateDiff::DEPTH))\n    state.depth.applyGL();\n  if (isBitSet(diff.changedContentBits,StateDiff::STENCIL))\n    state.stencil.applyGL();\n  if (isBitSet(diff.changedContentBits,StateDiff::LOGIC))\n    state.logic.applyGL();\n  if (isBitSet(diff.changedContentBits,StateDiff::PRIMITIVE))\n    state.primitive.applyGL();\n  if (isBitSet(diff.changedContentBits,StateDiff::RASTER))\n    state.raster.applyGL();\n#if STATESYSTEM_USE_DEPRECATED\n  if (!m_coreonly && isBitSet(diff.changedContentBits,StateDiff::RASTER_DEPR))\n    state.rasterDepr.applyGL();\n#endif\n  /*if (isBitSet(diff.changedContentBits,StateDiff::VIEWPORT))\n    state.viewport.applyGL();*/\n  if (isBitSet(diff.changedContentBits,StateDiff::DEPTHRANGE))\n    state.depthrange.applyGL();\n  /*if (isBitSet(diff.changedContentBits,StateDiff::SCISSOR))\n    state.scissor.applyGL();*/\n  if (isBitSet(diff.changedContentBits,StateDiff::SCISSORENABLE))\n    state.scissorenable.applyGL();\n  if (isBitSet(diff.changedContentBits,StateDiff::MASK))\n    state.mask.applyGL();\n  if (isBitSet(diff.changedContentBits,StateDiff::FBO))\n    state.fbo.applyGL(skipFboBinding);\n  if (isBitSet(diff.changedContentBits,StateDiff::VERTEXENABLE))\n    state.vertexenable.applyGL(diff.changedVertexEnable);\n  if (isBitSet(diff.changedContentBits,StateDiff::VERTEXFORMAT))\n    state.vertexformat.applyGL(diff.changedVertexFormat, diff.changedVertexBinding);\n  if (isBitSet(diff.changedContentBits,StateDiff::VERTEXIMMEDIATE))\n    state.verteximm.applyGL(diff.changedVertexImm);\n}\n\n\nvoid StateSystem::makeDiff( StateDiff& diff, const StateInternal &fromInternal, const StateInternal &toInternal )\n{\n  const State &from = fromInternal.state;\n  const State &to   = toInternal.state;\n\n  diff.changedStateBits     = from.enable.stateBits ^ to.enable.stateBits;\n#if STATESYSTEM_USE_DEPRECATED\n  diff.changedStateDeprBits = from.enableDepr.stateBitsDepr ^ to.enableDepr.stateBitsDepr;\n#endif\n  diff.changedContentBits   = 0;\n  \n  if (memcmp(&from.enable         ,&to.enable         ,sizeof(from.enable         )) != 0) setBit(diff.changedContentBits,StateDiff::ENABLE);\n#if STATESYSTEM_USE_DEPRECATED\n  if (memcmp(&from.enableDepr     ,&to.enableDepr     ,sizeof(from.enableDepr     )) != 0) setBit(diff.changedContentBits,StateDiff::ENABLE_DEPR);\n#endif\n  if (memcmp(&from.program        ,&to.program        ,sizeof(from.program        )) != 0) setBit(diff.changedContentBits,StateDiff::PROGRAM);\n  if (memcmp(&from.clip           ,&to.clip           ,sizeof(from.clip           )) != 0) setBit(diff.changedContentBits,StateDiff::CLIP);\n#if STATESYSTEM_USE_DEPRECATED\n  if (memcmp(&from.alpha          ,&to.alpha          ,sizeof(from.alpha          )) != 0) setBit(diff.changedContentBits,StateDiff::ALPHA_DEPR);\n#endif\n  if (memcmp(&from.blend          ,&to.blend          ,sizeof(from.blend          )) != 0) setBit(diff.changedContentBits,StateDiff::BLEND);\n  if (memcmp(&from.depth          ,&to.depth          ,sizeof(from.depth          )) != 0) setBit(diff.changedContentBits,StateDiff::DEPTH);\n  if (memcmp(&from.stencil        ,&to.stencil        ,sizeof(from.stencil        )) != 0) setBit(diff.changedContentBits,StateDiff::STENCIL);\n  if (memcmp(&from.logic          ,&to.logic          ,sizeof(from.logic          )) != 0) setBit(diff.changedContentBits,StateDiff::LOGIC);\n  if (memcmp(&from.primitive      ,&to.primitive      ,sizeof(from.primitive      )) != 0) setBit(diff.changedContentBits,StateDiff::PRIMITIVE);\n  if (memcmp(&from.raster         ,&to.raster         ,sizeof(from.raster         )) != 0) setBit(diff.changedContentBits,StateDiff::RASTER);\n#if STATESYSTEM_USE_DEPRECATED\n  if (memcmp(&from.rasterDepr     ,&to.rasterDepr     ,sizeof(from.rasterDepr     )) != 0) setBit(diff.changedContentBits,StateDiff::RASTER_DEPR);\n#endif\n  //if (memcmp(&from.viewport       ,&to.viewport       ,sizeof(from.viewport       )) != 0) setBit(diff.changedContentBits,StateDiff::VIEWPORT);\n  if (memcmp(&from.depth          ,&to.depth          ,sizeof(from.depth          )) != 0) setBit(diff.changedContentBits,StateDiff::DEPTHRANGE);\n  //if (memcmp(&from.scissor        ,&to.scissor        ,sizeof(from.scissor        )) != 0) setBit(diff.changedContentBits,StateDiff::SCISSOR);\n  if (memcmp(&from.scissorenable  ,&to.scissorenable  ,sizeof(from.scissorenable  )) != 0) setBit(diff.changedContentBits,StateDiff::SCISSORENABLE);\n  if (memcmp(&from.mask           ,&to.mask           ,sizeof(from.mask           )) != 0) setBit(diff.changedContentBits,StateDiff::MASK);\n  if (memcmp(&from.fbo            ,&to.fbo            ,sizeof(from.fbo            )) != 0) setBit(diff.changedContentBits,StateDiff::FBO);\n\n  // special case vertex stuff, more likely to change then rest\n\n  diff.changedVertexEnable  = from.vertexenable.enabled ^ to.vertexenable.enabled;\n\n  diff.changedVertexImm = 0;\n  diff.changedVertexFormat = 0;\n  \n  for (GLint i = 0; i < MAX_VERTEXATTRIBS; i++){\n    if (memcmp(&from.vertexformat.formats[i], &to.vertexformat.formats[i], sizeof(to.vertexformat.formats[i])) != 0)  setBit(diff.changedVertexFormat,i);\n    if (memcmp(&from.verteximm.data[i], &to.verteximm.data[i], sizeof(to.verteximm.data[i])) != 0)                    setBit(diff.changedVertexImm,i);\n  }\n\n  diff.changedVertexBinding = 0;\n  for (GLint i = 0; i < MAX_VERTEXBINDINGS; i++){\n    if (memcmp(&from.vertexformat.bindings[i], &to.vertexformat.bindings[i], sizeof(to.vertexformat.bindings[i])) != 0)  setBit(diff.changedVertexBinding,i);\n  }\n\n  if (diff.changedVertexEnable)                               setBit(diff.changedContentBits,StateDiff::VERTEXENABLE);\n  if (diff.changedVertexBinding || diff.changedVertexFormat)  setBit(diff.changedContentBits,StateDiff::VERTEXFORMAT);\n  if (diff.changedVertexImm)                                  setBit(diff.changedContentBits,StateDiff::VERTEXIMMEDIATE);\n}\n\nvoid StateSystem::prepareTransition( StateID id, StateID prev )\n{\n  StateInternal& to   = m_states[id];\n\n  prepareTransitionCache(prev,to);\n}\n\n\n"
  },
  {
    "path": "statesystem.hpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n\n#ifndef STATESYSTEM_H__\n#define STATESYSTEM_H__\n\n\n#include <nvgl/extensions_gl.hpp>\n#include <vector>\n\nclass StateSystem {\npublic:\n\n  static inline bool isBitSet(GLbitfield bits, GLuint key)\n  {\n    return  (bits & (1<<key)) ? true : false;\n  }\n\n  static inline void setBit(GLbitfield& bits, GLuint key)\n  {\n    bits |= (1<<key);\n  }\n\n  static GLbitfield getBit(GLuint key)\n  {\n    return (1<<key);\n  }\n\n  static inline GLboolean setBitState(GLbitfield& bits, GLuint key, GLboolean state)\n  {\n    if (state)  bits |=  (1<<key);\n    else        bits &= ~(1<<key);\n    return state;\n  }\n  \n  static const GLuint MAX_DRAWBUFFERS    = 8;\n  static const GLuint MAX_CLIPPLANES     = 8;\n  static const GLuint MAX_VIEWPORTS      = 16;\n  static const GLuint MAX_VERTEXATTRIBS  = 16;\n  static const GLuint MAX_VERTEXBINDINGS = 16;\n  static const GLuint MAX_COLORS         = 4;\n    \n  enum StateBits {\n    BLEND,\n    COLOR_LOGIC_OP,\n    CULL_FACE,\n    DEPTH_CLAMP,\n    DEPTH_TEST,\n    DITHER,\n    FRAMEBUFFER_SRGB,\n    LINE_SMOOTH,\n    MULTISAMPLE,\n    POLYGON_OFFSET_FILL,\n    POLYGON_OFFSET_LINE,\n    POLYGON_OFFSET_POINT,\n    POLYGON_SMOOTH,\n    PRIMITIVE_RESTART,\n    PRIMITIVE_RESTART_FIXED_INDEX,\n    RASTERIZER_DISCARD,\n    SAMPLE_ALPHA_TO_COVERAGE,\n    SAMPLE_ALPHA_TO_ONE,\n    SAMPLE_COVERAGE,\n    SAMPLE_SHADING,\n    SAMPLE_MASK,\n    STENCIL_TEST,\n    SCISSOR_TEST,\n    TEXTURE_CUBE_MAP_SEAMLESS,\n    PROGRAM_POINT_SIZE,\n    NUM_STATEBITS,\n  };\n#if STATESYSTEM_USE_DEPRECATED\n  enum StateBitsDepr {\n    DEPR_ALPHA_TEST,\n    DEPR_LINE_STIPPLE,\n    DEPR_POINT_SMOOTH,\n    DEPR_POINT_SPRITE,\n    DEPR_POLYGON_STIPPLE,\n    NUM_STATEBITSDEPR,\n  };\n#endif\n    \n  enum Faces {\n    FACE_FRONT,\n    FACE_BACK,\n    MAX_FACES,\n  };\n\n  //////////////////////////////////////////////////////////////////////////\n\n  struct ClipDistanceState {\n    GLbitfield  enabled;\n\n    ClipDistanceState()\n    {\n      enabled = 0;\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n\n  //////////////////////////////////////////////////////////////////////////\n#if STATESYSTEM_USE_DEPRECATED\n  struct AlphaStateDepr {\n    GLenum    mode;\n    GLfloat   refvalue;\n\n    AlphaStateDepr()\n    {\n      mode      = GL_ALWAYS;\n      refvalue  = 1.0;\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n#endif\n  //////////////////////////////////////////////////////////////////////////\n\n  struct StencilOp\n  {\n    GLenum  fail;\n    GLenum  zfail;\n    GLenum  zpass;\n  };\n  struct StencilFunc\n  {\n    GLenum  func;\n    GLuint  refvalue;\n    GLuint  mask;\n  };\n  struct StencilState{\n    StencilFunc funcs[MAX_FACES];\n    StencilOp   ops[MAX_FACES];\n\n    StencilState()\n    {\n      for (GLuint i = 0; i < MAX_FACES; i++){\n        funcs[i].func = GL_ALWAYS;\n        funcs[i].refvalue = 0;\n        funcs[i].mask = ~0;\n      }\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n\n  //////////////////////////////////////////////////////////////////////////\n  struct BlendMode{\n    GLenum srcw;\n    GLenum dstw;\n    GLenum equ;\n  };\n  struct BlendStage{\n    BlendMode rgb;\n    BlendMode alpha;\n  };\n  struct BlendState{\n    GLbitfield  separateEnable; // only set this if you want per draw enable\n    //GLfloat     color[4];\n    GLuint      useSeparate;    // if set uses per draw, otherwise first\n    BlendStage  blends[MAX_DRAWBUFFERS];\n\n    BlendState() {\n      separateEnable = 0;\n      useSeparate = GL_FALSE;\n      for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){\n        blends[i].alpha.srcw = GL_ONE;\n        blends[i].alpha.dstw = GL_ZERO;\n        blends[i].alpha.equ  = GL_FUNC_ADD;\n        blends[i].rgb = blends[i].alpha;\n      }\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n  //////////////////////////////////////////////////////////////////////////\n  \n  struct DepthState {\n    GLenum  func;\n    // depth bounds for NV?\n\n    DepthState() {\n      func = GL_LESS;\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n  //////////////////////////////////////////////////////////////////////////\n  \n  struct LogicState {\n    GLenum  op;\n\n    LogicState() {\n      op = GL_COPY;\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n  //////////////////////////////////////////////////////////////////////////\n  \n  struct RasterState {\n    //GLenum    frontFace;\n    GLenum    cullFace;\n    //GLfloat   polyOffsetFactor;\n    //GLfloat   polyOffsetUnits;\n    GLenum    polyMode;   // front and back, no separate support\n    //GLfloat   lineWidth;\n    GLfloat   pointSize;\n    GLfloat   pointFade;\n    GLenum    pointSpriteOrigin;\n\n    RasterState() {\n      //frontFace = GL_CCW;\n      cullFace = GL_BACK;\n      //polyOffsetFactor = 0;\n      //polyOffsetUnits  = 0;\n      polyMode = GL_FILL;\n      //lineWidth = 1.0f;\n      pointSize = 1.0f;\n      pointFade = 1.0f;\n      pointSpriteOrigin = GL_UPPER_LEFT;\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n\n#if STATESYSTEM_USE_DEPRECATED\n  struct RasterStateDepr {\n    GLint     lineStippleFactor;\n    GLushort  lineStipplePattern;\n    GLenum    shadeModel;\n    // ignore polygonStipple\n\n    RasterStateDepr() {\n      lineStippleFactor   = 1;\n      lineStipplePattern  = ~0;\n      shadeModel  = GL_SMOOTH;\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n#endif\n\n  //////////////////////////////////////////////////////////////////////////\n\n  struct PrimitiveState {\n    GLuint    restartIndex;\n    GLint     patchVertices;\n    GLenum    provokingVertex;\n\n    PrimitiveState() {\n      restartIndex = ~0;\n      patchVertices = 3;\n      provokingVertex = GL_LAST_VERTEX_CONVENTION;\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n\n  //////////////////////////////////////////////////////////////////////////\n\n  struct SampleState {\n    GLfloat   coverage;\n    GLboolean invert;\n    GLuint    mask;\n\n    SampleState() {\n      coverage = 1.0;\n      invert = GL_FALSE;\n      mask = ~0;\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n  //////////////////////////////////////////////////////////////////////////\n\n  struct Viewport {\n    float   x;\n    float   y;\n    float   width;\n    float   height;\n  };\n  struct DepthRange {\n    double  nearPlane;\n    double  farPlane;\n  };\n  struct Scissor {\n    GLint   x;\n    GLint   y;\n    GLsizei width;\n    GLsizei height;\n  };\n\n  /*\n  struct ViewportState {\n    GLuint        useSeparate;  // if set uses per view, otherwise first\n    Viewport      viewports[MAX_VIEWPORTS];\n\n    ViewportState() {\n      useSeparate = GL_FALSE;\n      for (GLuint i = 0; i < MAX_VIEWPORTS; i++){\n        viewports[i].x = 0;\n        viewports[i].y = 0;\n        viewports[i].width = 0;\n        viewports[i].height = 0;\n      }\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n  */\n\n  struct DepthRangeState {\n    GLuint        useSeparate;  // if set uses per view, otherwise first\n    DepthRange    depths[MAX_VIEWPORTS];\n\n    DepthRangeState() {\n      useSeparate = GL_FALSE;\n      for (GLuint i = 0; i < MAX_VIEWPORTS; i++){\n        depths[i].nearPlane = 0;\n        depths[i].farPlane  = 1;\n      }\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n\n  /*\n  struct ScissorState {\n    GLuint        useSeparate;    // if set uses per draw, otherwise first\n    Scissor       scissor[MAX_VIEWPORTS];\n\n    ScissorState() {\n      useSeparate = GL_FALSE;\n      for (GLuint i = 0; i < MAX_VIEWPORTS; i++){\n        scissor[i].x = 0;\n        scissor[i].y = 0;\n        scissor[i].width = 0;\n        scissor[i].height = 0;\n      }\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n  */\n\n  struct ScissorEnableState {\n    GLbitfield    separateEnable; // only set this if you want per view enable\n\n    ScissorEnableState() {\n      separateEnable = 0;\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n\n  //////////////////////////////////////////////////////////////////////////\n\n  struct MaskState {\n    GLuint    colormaskUseSeparate;\n    GLboolean colormask[MAX_DRAWBUFFERS][MAX_COLORS];\n    GLboolean depth;\n    GLuint    stencil[MAX_FACES];\n\n    MaskState() {\n      colormaskUseSeparate = GL_FALSE;\n      depth = GL_TRUE;\n      stencil[FACE_FRONT] = ~0;\n      stencil[FACE_BACK] = ~0;\n      for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){\n        for (GLuint c = 0; c < MAX_COLORS; c++){\n          colormask[i][c] = GL_TRUE;\n        }\n      }\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n\n  //////////////////////////////////////////////////////////////////////////\n  \n  struct FBOState {\n    GLuint  fboDraw;\n    GLuint  fboRead;\n    GLenum  readBuffer;\n    GLenum  drawBuffers[MAX_DRAWBUFFERS];\n    GLuint  numBuffers;\n\n    FBOState() {\n      fboDraw = 0;\n      fboRead = 0;\n      readBuffer = GL_BACK;\n      for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){\n        drawBuffers[i] = GL_NONE;\n      }\n      drawBuffers[0] = GL_BACK;\n      numBuffers = 1;\n    }\n\n    void setFbo(GLuint fbo){\n      fboDraw = fbo;\n      fboRead = fbo;\n      readBuffer = GL_COLOR_ATTACHMENT0;\n      drawBuffers[0] = GL_COLOR_ATTACHMENT0;\n      numBuffers = 1;\n    }\n\n    void applyGL(bool noBind=false) const;\n    void getGL();\n  };\n\n  //////////////////////////////////////////////////////////////////////////\n\n  struct VertexEnableState {\n    GLbitfield    enabled;\n\n    VertexEnableState() {\n      enabled = 0;\n    }\n\n    void applyGL(GLbitfield changed=~0) const;\n    void getGL();\n  };\n\n  enum VertexModeType {\n    VERTEXMODE_FLOAT,\n    VERTEXMODE_INT,\n    VERTEXMODE_UINT,\n    // ignore double and int64 for now\n  };\n\n  struct VertexFormat {\n    VertexModeType  mode;\n\n    GLboolean normalized;\n    \n    GLuint    size;\n    GLenum    type;\n    GLsizei   relativeoffset;\n\n    GLuint    binding;\n  };\n\n  struct VertexBinding {\n    GLsizei       divisor;\n    GLsizei       stride;\n  };\n\n  struct VertexFormatState {\n    VertexFormat  formats[MAX_VERTEXATTRIBS];\n    VertexBinding bindings[MAX_VERTEXBINDINGS];\n\n    VertexFormatState() {\n      for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){\n        formats[i].mode           = VERTEXMODE_FLOAT;\n        formats[i].size           = 4;\n        formats[i].type           = GL_FLOAT;\n        formats[i].normalized     = GL_FALSE;\n        formats[i].relativeoffset = 0;\n        formats[i].binding        = i;\n      }\n\n      for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){\n        bindings[i].divisor = 0;\n        bindings[i].stride  = 0;\n      }\n    }\n\n    void applyGL(GLbitfield changedFormat = ~0,GLbitfield changedBinding = ~0) const;\n    void getGL();\n  };\n\n  struct VertexData {\n    VertexModeType  mode;\n    union {\n      float         floats[4];\n      int           ints[4];\n      unsigned int  uints[4];\n    };\n  };\n\n  struct VertexImmediateState {\n    VertexData  data[MAX_VERTEXATTRIBS];\n\n    VertexImmediateState() {\n      for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){\n        data[i].mode = VERTEXMODE_FLOAT;\n        data[i].floats[0] = 0;\n        data[i].floats[1] = 0;\n        data[i].floats[2] = 0;\n        data[i].floats[3] = 1;\n      }\n    }\n\n    void applyGL(GLbitfield changed = ~0) const;\n    void getGL(); // ensure proper mode, otherwise will get garbage\n  };\n\n  //////////////////////////////////////////////////////////////////////////\n\n  struct ProgramState {\n    // for sake of simplicity this mechanism only support programs\n    // and not program pipelines, nor use of subroutines\n    GLuint    program;\n\n    ProgramState() {\n      program = 0;\n    }\n\n    void applyGL() const;\n    void getGL();\n  };\n\n  //////////////////////////////////////////////////////////////////////////\n\n  struct EnableState {\n    GLbitfield      stateBits;\n\n    EnableState() {\n      stateBits = 0;\n    }\n\n    void applyGL(GLbitfield changed = ~0) const;\n    void getGL();\n  };\n\n#if STATESYSTEM_USE_DEPRECATED\n  struct EnableStateDepr {\n    GLbitfield      stateBitsDepr;\n\n    EnableStateDepr() {\n      stateBitsDepr = 0;\n    }\n\n    void applyGL(GLbitfield changed = ~0) const;\n    void getGL();\n  };\n#endif\n\n  //////////////////////////////////////////////////////////////////////////\n  \n  struct State {\n    EnableState           enable;\n  #if STATESYSTEM_USE_DEPRECATED\n    EnableStateDepr       enableDepr;\n  #endif\n    ProgramState          program;\n    ClipDistanceState     clip;\n  #if STATESYSTEM_USE_DEPRECATED\n    AlphaStateDepr        alpha;\n  #endif\n    BlendState            blend;\n    DepthState            depth;\n    StencilState          stencil;\n    LogicState            logic;\n    PrimitiveState        primitive;\n    SampleState           sample;\n    RasterState           raster;\n  #if STATESYSTEM_USE_DEPRECATED\n    RasterStateDepr       rasterDepr;\n  #endif\n    //ViewportState         viewport;\n    DepthRangeState       depthrange;\n    //ScissorState          scissor;\n    ScissorEnableState    scissorenable;\n    MaskState             mask;\n    FBOState              fbo;\n    VertexEnableState     vertexenable;\n    VertexFormatState     vertexformat;\n    VertexImmediateState  verteximm;\n\n    // This value only exists to ease compatibility with NV_command_list\n    // and is unaffected by apply or get operations, its value\n    // is set during StateSystem::set\n    GLenum                basePrimitiveMode; \n\n    State() \n      : basePrimitiveMode(GL_TRIANGLES)\n    {\n\n    }\n\n    void    applyGL(bool coreonly=false, bool skipFboBinding=false) const;\n    void    getGL(bool coreonly=false);\n  };\n  \n  typedef unsigned int StateID;\n  static const StateID  INVALID_ID = ~0;\n\n  void    init(bool coreonly=false);\n  void    deinit();\n  \n  void    generate(GLuint num, StateID* objects);\n  void    destroy( GLuint num, const StateID* objects );\n  void          set(StateID id, const State& state, GLenum basePrimitiveMode);\n  const State&  get(StateID id) const;\n  \n  void    applyGL(StateID id, bool skipFboBinding) const;         // brute force sets everything\n  void    applyGL(StateID id, StateID prev,bool skipFboBinding);  // tries to avoid redundant, can pass INVALID_ID as previous\n\n  void    prepareTransition(StateID id, StateID prev); // can speed up state apply\n  \n  \nprivate:\n  static const int MAX_DIFFS = 16;\n\n  struct StateDiffKey{\n    StateID   state;\n    GLuint    changeID;\n  };\n\n  struct StateDiff {\n\n    enum ContentBits {\n      ENABLE,\n      ENABLE_DEPR,\n      PROGRAM,\n      CLIP,\n      ALPHA_DEPR,\n      BLEND,\n      DEPTH,\n      STENCIL,\n      LOGIC,\n      PRIMITIVE,\n      RASTER,\n      RASTER_DEPR,\n      //VIEWPORT,\n      DEPTHRANGE,\n      //SCISSOR,\n      SCISSORENABLE,\n      MASK,\n      FBO,\n      VERTEXENABLE,\n      VERTEXFORMAT,\n      VERTEXIMMEDIATE,\n    };\n\n    GLbitfield    changedContentBits;\n    GLbitfield    changedStateBits;\n    GLbitfield    changedStateDeprBits;\n    GLbitfield    changedVertexEnable;\n    GLbitfield    changedVertexImm;\n    GLbitfield    changedVertexFormat;\n    GLbitfield    changedVertexBinding;\n    GLuint        pad;\n  };\n\n  struct StateInternal {\n    State       state;\n    GLuint      changeID;\n    \n    int           usedDiff;\n    StateDiffKey  others[MAX_DIFFS];\n    StateDiff     diffs[MAX_DIFFS];\n\n    StateInternal() {\n      changeID = 0;\n    }\n  };\n\n  bool                          m_coreonly;\n  std::vector<StateInternal>    m_states;\n  std::vector<StateID>          m_freeIDs;\n\n  void  makeDiff(StateDiff& diff, const StateInternal &fromInternal, const StateInternal &toInternal);\n  void  applyDiffGL(const StateDiff& diff, const State &to, bool skipFboBinding);\n  int   prepareTransitionCache(StateID prev, StateInternal& to );\n};\n\n\n#endif"
  },
  {
    "path": "tokenbase.cpp",
    "content": "/*\n * Copyright (c) 2014-2023, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include \"tokenbase.hpp\"\n\nusing namespace nvtoken;\n\n#include \"common.h\"\n\nnamespace csfviewer\n{\n\n  bool TokenRendererBase::hasNativeCommandList()\n  {\n    return !!has_GL_NV_command_list;\n  }\n\n  void TokenRendererBase::init(bool bindlessUbo, bool bindlessVbo)\n  {\n    m_bindlessVboUbo = bindlessVbo && bindlessUbo;\n    m_hwsupport = hasNativeCommandList() && !m_emulate;\n\n    for (int i = 0; i < NUM_SHADES; i++){\n      m_tokenAddresses[i] = 0;\n    }\n\n    if (m_hwsupport){\n      glCreateStatesNV(NUM_STATES,m_stateObjects);\n\n      if (m_uselist){\n        glCreateCommandListsNV(NUM_SHADES,m_commandLists);\n      }\n    }\n    else{\n      // we use a fast mode for glBufferAddressRangeNV where we ignore precise buffer boundaries\n      // this will trigger the driver to throw warnings, which may cause a crash\n#if !defined (NDEBUG)\n      if (m_bindlessVboUbo){\n        glDisable(GL_DEBUG_OUTPUT_SYNCHRONOUS);\n        glDisable(GL_DEBUG_OUTPUT);\n      }\n#endif\n\n      m_stateSystem.init(false);\n      m_stateSystem.generate(NUM_STATES,m_stateIDs);\n      for (int i = 0; i < NUM_STATES; i++){\n        m_stateObjects[i]  = m_stateIDs[i];\n      }\n    }\n\n    nvtokenInitInternals(m_hwsupport, m_bindlessVboUbo);\n  }\n\n  void TokenRendererBase::printStats( ShadeType shadeType )\n  {\n    int stats[NVTOKEN_TYPES] = {0};\n\n    ShadeCommand& sc = m_shades[shadeType];\n\n    size_t num = sc.states.size();\n    size_t size = sc.offsets[num-1] + sc.sizes[num-1] - sc.offsets[0];\n\n    nvtokenGetStats(&m_tokenStreams[shadeType][sc.offsets[0]], size, stats);\n\n    LOGI(\"type: %s\\n\",toString(shadeType));\n    LOGI(\"commandsize: %zu\\n\",size);\n    LOGI(\"state toggles: %zu\\n\", num);\n    LOGI(\"tokens:\\n\");\n    for (int i = 0; i < NVTOKEN_TYPES; i++){\n      const char* what = nvtokenCommandToString(i);\n      if (what && stats[i]){\n        LOGI(\"%s:\\t %6d\\n\", what,stats[i]);\n      }\n    }\n    LOGI(\"\\n\");\n  }\n\n  void TokenRendererBase::finalize(const Resources &resources, bool fillBuffers)\n  {\n    {\n      m_tokenStreams[SHADE_SOLIDWIRE_SPLIT] = m_tokenStreams[SHADE_SOLIDWIRE];\n      m_shades[SHADE_SOLIDWIRE_SPLIT] = m_shades[SHADE_SOLIDWIRE];\n      if (USE_STATEFBO_SPLIT){\n        ShadeCommand& sc = m_shades[SHADE_SOLIDWIRE_SPLIT];\n        for (size_t i = 0; i < sc.sizes.size(); i++){\n          if (sc.states[i] == m_stateObjects[STATE_LINES]){\n            sc.states[i] = m_stateObjects[STATE_LINES_SPLIT];\n          }\n        }\n      }\n      else{\n        ShadeCommand& sc = m_shades[SHADE_SOLIDWIRE_SPLIT];\n        for (size_t i = 0; i < sc.sizes.size(); i++)\n        {\n          if (sc.states[i] == m_stateObjects[STATE_LINES]){\n            sc.fbos[i] = resources.fbo2;\n          }\n          else{\n            sc.fbos[i] = resources.fbo;\n          }\n        }\n      }\n    }\n\n    glCreateBuffers(NUM_SHADES,m_tokenBuffers);\n    if (m_hwsupport && fillBuffers){\n      for (int i = 0; i < NUM_SHADES; i++){\n        glNamedBufferStorage(m_tokenBuffers[i],m_tokenStreams[i].size(), &m_tokenStreams[i][0], 0);\n        if (m_useaddress){\n          glGetNamedBufferParameterui64vNV(m_tokenBuffers[i], GL_BUFFER_GPU_ADDRESS_NV, &m_tokenAddresses[i]);\n          glMakeNamedBufferResidentNV(m_tokenBuffers[i], GL_READ_ONLY);\n\n          ShadeCommand& sc = m_shades[i];\n          sc.addresses.clear();\n          sc.addresses.reserve( sc.offsets.size() );\n          for (size_t n = 0; n < sc.offsets.size(); n++){\n            sc.addresses.push_back( m_tokenAddresses[i] + sc.offsets[n] );\n          }\n        }\n      }\n    }\n  }\n\n  void TokenRendererBase::deinit()\n  {\n    if (m_useaddress){\n      for (int i = 0; i < NUM_SHADES; i++){\n        if (m_tokenAddresses[i]){\n          glMakeNamedBufferNonResidentNV( m_tokenBuffers[i] );\n        }\n      }\n    }\n\n    glDeleteBuffers(NUM_SHADES,m_tokenBuffers);\n\n    if (m_hwsupport){\n      glDeleteStatesNV(NUM_STATES,m_stateObjects);\n      if (m_uselist){\n        glDeleteCommandListsNV(NUM_SHADES,m_commandLists);\n      }\n    }\n    else {\n#if !defined (NDEBUG)\n      if (m_bindlessVboUbo){\n        glEnable(GL_DEBUG_OUTPUT);\n        glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);\n      }\n#endif\n    }\n\n    m_stateSystem.deinit();\n  }\n\n\n  void TokenRendererBase::captureState( const Resources &resources )\n  {\n    bool stateChanged  = m_stateChangeID != resources.stateChangeID;\n    bool fboTexChanged = m_fboStateChangeID != resources.fboTextureChangeID;\n\n    m_stateChangeID = resources.stateChangeID;\n    m_fboStateChangeID = resources.fboTextureChangeID;\n\n    if (stateChanged){\n      StateSystem::State state;\n      state.verteximm.data[VERTEX_WIREMODE].mode = StateSystem::VERTEXMODE_INT; // need to set this properly\n\n\n      if (m_bindlessVboUbo){\n        // temp workaround\n#if USE_RESETADDRESSES\n        glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV,0,0,0);\n        glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV,0,0,0);\n        glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV,UBO_MATERIAL,0,0);\n        glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV,UBO_MATRIX,0,0);\n        glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV,UBO_SCENE,0,0);\n#endif\n      }\n\n      // we will do a series of state captures\n      glBindFramebuffer(GL_FRAMEBUFFER, resources.fbo);\n      glUseProgram(resources.programUsed);\n\n      SetWireMode(GL_FALSE);\n\n      if (m_hwsupport){\n        glStateCaptureNV(m_stateObjects[STATE_TRIS],GL_TRIANGLES);\n      }\n      else {\n        state.getGL(); // very costly, smarter would be setting this manually\n        m_stateSystem.set(m_stateIDs[STATE_TRIS], state, GL_TRIANGLES);\n      }\n\n      glEnable(GL_POLYGON_OFFSET_FILL);\n      // glPolygonOffset(1,1); //not captured\n\n\n      if (m_hwsupport){\n        glStateCaptureNV(m_stateObjects[STATE_TRISOFFSET],GL_TRIANGLES);\n      }\n      else {\n        state.getGL(); // very costly, smarter would be setting this manually\n        m_stateSystem.set(m_stateIDs[STATE_TRISOFFSET], state, GL_TRIANGLES);\n      }\n\n      SetWireMode(GL_TRUE);\n\n      if (m_hwsupport){\n        glStateCaptureNV(m_stateObjects[STATE_LINES],GL_LINES);\n      }\n      else {\n        state.getGL(); // very costly, smarter would be setting this manually\n        m_stateSystem.set(m_stateIDs[STATE_LINES], state, GL_LINES);\n      }\n\n      glBindFramebuffer(GL_FRAMEBUFFER, resources.fbo2);\n\n      if (m_hwsupport){\n        glStateCaptureNV(m_stateObjects[STATE_LINES_SPLIT], GL_LINES);\n      }\n      else {\n        state.getGL(); // very costly, smarter would be setting this manually\n        m_stateSystem.set(m_stateIDs[STATE_LINES_SPLIT], state, GL_LINES);\n      }\n\n      if (!m_hwsupport){\n        m_stateSystem.prepareTransition(m_stateIDs[STATE_TRISOFFSET], m_stateObjects[STATE_LINES]);\n        m_stateSystem.prepareTransition(m_stateIDs[STATE_LINES],      m_stateObjects[STATE_TRISOFFSET]);\n        m_stateSystem.prepareTransition(m_stateIDs[STATE_TRISOFFSET], m_stateObjects[STATE_LINES_SPLIT]);\n        m_stateSystem.prepareTransition(m_stateIDs[STATE_LINES_SPLIT],m_stateObjects[STATE_TRISOFFSET]);\n      }\n\n      // reset, stored in stateobjects\n      glUseProgram(0);\n      glDisable(GL_POLYGON_OFFSET_FILL);\n      glPolygonOffset(0,0); \n#if 1\n      // workaround\n      glBindFramebuffer(GL_FRAMEBUFFER, resources.fbo);\n#else\n      glBindFramebuffer(GL_FRAMEBUFFER, 0);\n#endif\n    }\n\n    if (m_hwsupport && m_uselist && (stateChanged || fboTexChanged)){\n      for (int i = 0; i < NUM_SHADES; i++){\n        ShadeCommand& shade = m_shades[i];\n\n        std::vector<const void*>  ptrs;\n        ptrs.reserve(shade.offsets.size());\n        for (size_t p = 0; p < shade.offsets.size(); p++){\n          ptrs.push_back(&m_tokenStreams[i][shade.offsets[p]]);\n        }\n\n        glCommandListSegmentsNV(m_commandLists[i],1);\n        glListDrawCommandsStatesClientNV(m_commandLists[i],0, &ptrs[0], &shade.sizes[0], &shade.states[0], &shade.fbos[0], int(shade.states.size()) );\n        glCompileCommandListNV(m_commandLists[i]);\n      }\n    }\n  }\n\n  void TokenRendererBase::renderShadeCommandSW( const void* NV_RESTRICT stream, size_t streamSize, ShadeCommand &shade )\n  {\n    nvtokenDrawCommandsStatesSW(stream, streamSize, &shade.offsets[0], &shade.sizes[0], &shade.states[0], &shade.fbos[0], GLuint(shade.states.size()), m_stateSystem);\n  }\n\n}\n"
  },
  {
    "path": "tokenbase.hpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n// a few performance tests\n// only affect TOKEN techniques\n#define USE_RESETADDRESSES    1\n#define USE_FASTDRAWS         1\n#define USE_STATEFBO_SPLIT    0 //otherwise fbo[] as used\n#define USE_POLYOFFSETTOKEN   1\n\n// only affects TOKEN\n#define USE_STATEOBJ_REBUILD  0 // does 100 statecaptures per frame\n#define USE_NOFILTER          0\n\n// only affects TOKENSORT\n#define USE_PERFRAMEBUILD     0\n\n\n\n\n#include <assert.h>\n#include <algorithm>\n#include \"renderer.hpp\"\n#include \"nvtoken.hpp\"\n\nusing namespace nvtoken;\n\nnamespace csfviewer\n{\n#define UBOSTAGE_VERTEX     (nvtoken::s_nvcmdlist_stages[NVTOKEN_STAGE_VERTEX])\n#define UBOSTAGE_FRAGMENT   (nvtoken::s_nvcmdlist_stages[NVTOKEN_STAGE_FRAGMENT])\n\n\n\n#if USE_FASTDRAWS\n  #define NVTokenDrawElemsUsed  NVTokenDrawElems\n#else\n  #define NVTokenDrawElemsUsed  NVTokenDrawElemsInstanced\n#endif\n\n  class TokenRendererBase {\n  public:\n    enum StateType {\n      STATE_TRIS,\n      STATE_TRISOFFSET,\n      STATE_LINES,\n      STATE_LINES_SPLIT,\n      NUM_STATES,\n    };\n\n    struct ShadeCommand {\n      std::vector<GLuint64>   addresses;\n      std::vector<GLintptr>   offsets;\n      std::vector<GLsizei>    sizes;\n      std::vector<GLuint>     states;\n      std::vector<GLuint>     fbos;\n    };\n\n    bool  m_emulate;\n    bool  m_sort;\n    bool  m_uselist;\n    bool  m_useaddress;\n\n    TokenRendererBase()\n      : m_hwsupport(false)\n      , m_bindlessVboUbo(false)\n      , m_useaddress(false)\n      , m_emulate(false)\n      , m_uselist(false)\n      , m_sort(false)\n      , m_stateChangeID(~0)\n      , m_fboStateChangeID(~0)\n    {\n\n    }\n\n    static bool hasNativeCommandList();\n\n  protected:\n\n    bool                        m_hwsupport;\n    bool                        m_bindlessVboUbo;\n\n    GLuint                      m_tokenBuffers[NUM_SHADES];\n    GLuint64                    m_tokenAddresses[NUM_SHADES];\n    std::string                 m_tokenStreams[NUM_SHADES];\n    GLuint                      m_commandLists[NUM_SHADES];\n    ShadeCommand                m_shades[NUM_SHADES];\n\n    size_t                      m_stateChangeID;\n    size_t                      m_fboStateChangeID;\n\n    StateSystem                 m_stateSystem;\n    StateSystem::StateID        m_stateIDs[NUM_STATES];\n    GLuint                      m_stateObjects[NUM_STATES];\n\n    void init(bool bindlessUbo, bool bindlessVbo);\n    void printStats(ShadeType shadeType);\n    void finalize(const Resources &resources, bool fillBuffers=true);\n    void deinit();\n\n    void captureState(const Resources &resources);\n\n    void renderShadeCommandSW( const void* NV_RESTRICT stream, size_t streamSize, ShadeCommand &shade );\n  };\n}\n"
  },
  {
    "path": "transform-leaves.comp.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 430\n/**/\n\n#ifndef USE_COMPUTE\n#define USE_COMPUTE 1\n#endif\n\n#define MAX_LEVELS 10\n\n#define LEVELBITS 8\n\n#define MATRIX_BASE     0\n#define MATRIX_INVTRANS 1\n\n#define MATRIX_BEGIN_WORLD  0\n#define MATRIX_BEGIN_OBJECT 2\n#define MATRICES            4\n\n\n\n#if USE_COMPUTE\n\n  layout (local_size_x = 256) in;\n\n  layout(std430,binding=2) buffer scratchBuffer {\n    int nodes[];\n  };\n\n  layout(location=0) uniform int count;\n  layout(location=1) uniform int levelcap; // must be >= 1\n  \n  #define BAILOUT gl_GlobalInvocationID.x >= count\n  int self = nodes[gl_GlobalInvocationID.x];\n\n#else\n  layout(location=0) uniform int levelcap; // must be >= 1\n\n  #define BAILOUT false\n  layout(location=0) in int self;\n\n#endif\n\nlayout(binding=0) uniform isamplerBuffer parentsBuffer;\n\nlayout(std430,binding=0) restrict buffer worldMatricesBuffer {\n  mat4 worldMatrices[];\n};\n\nlayout(binding=1) uniform samplerBuffer texWorldMatrices;\nlayout(binding=2) uniform samplerBuffer texObjectMatrices;\n\nmat4 getMatrix(samplerBuffer texbuffer, int idx)\n{\n  return mat4(texelFetch(texbuffer,idx*4 + 0),\n              texelFetch(texbuffer,idx*4 + 1),\n              texelFetch(texbuffer,idx*4 + 2),\n              texelFetch(texbuffer,idx*4 + 3));\n}\n\nmat4 getObjectMatrix(int idx, int what){\n  return getMatrix(texObjectMatrices,idx*MATRICES + what + MATRIX_BEGIN_OBJECT);\n};\n\nmat4 getWorldMatrix(int idx, int what){\n  return getMatrix(texWorldMatrices,idx*MATRICES + what + MATRIX_BEGIN_WORLD);\n};\n\nvoid main()\n{\n  if (BAILOUT){\n    return;\n  }\n  \n  int  levels[MAX_LEVELS];\n  int  curlevel = 0;\n  \n  // build path to root\n  while (curlevel < MAX_LEVELS){\n    levels[curlevel++] = self;\n    int info = texelFetch(parentsBuffer,self).x;\n        self = info >> LEVELBITS;\n    int lvl  = info & ((1<<LEVELBITS)-1);\n    if (lvl == levelcap){\n      break;\n    }\n  }\n  \n  // init root\n  mat4 parentBase = getWorldMatrix(self,MATRIX_BASE);\n  \n  while( curlevel-- > 0) {\n    self = levels[curlevel];\n    \n    // walk downwards, save matrix in registers & save at end\n    // never read worldmatrices due to read/write hazards\n   \n    parentBase = parentBase * getObjectMatrix(self,MATRIX_BASE);\n\n    worldMatrices[self*MATRICES + MATRIX_BEGIN_WORLD + MATRIX_BASE]     = parentBase;\n    worldMatrices[self*MATRICES + MATRIX_BEGIN_WORLD + MATRIX_INVTRANS] = transpose(inverse(parentBase));\n  }\n}\n"
  },
  {
    "path": "transform-level.comp.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 430\n/**/\n\n#ifndef USE_COMPUTE\n#define USE_COMPUTE 1\n#endif\n\n#define LEVELBITS 8\n\n#define MATRIX_BASE         0\n#define MATRIX_INVTRANS     1\n\n#define MATRIX_BEGIN_WORLD  0\n#define MATRIX_BEGIN_OBJECT 2\n#define MATRICES            4\n\n#if USE_COMPUTE\n\n  layout (local_size_x = 256) in;\n\n  layout(std430,binding=2) buffer scratchBuffer {\n    int nodes[];\n  };\n\n  layout(location=0) uniform int count;\n\n  #define BAILOUT gl_GlobalInvocationID.x >= count\n  int self = nodes[gl_GlobalInvocationID.x];\n\n#else\n\n  #define BAILOUT false\n  layout(location=0) in int self;\n\n#endif\n\nlayout(binding=0) uniform isamplerBuffer parentsBuffer;\n\nlayout(std430,binding=0) restrict buffer worldMatricesBuffer {\n  mat4 worldMatrices[];\n};\n\nlayout(binding=1) uniform samplerBuffer texWorldMatrices;\nlayout(binding=2) uniform samplerBuffer texObjectMatrices;\n\nmat4 getMatrix(samplerBuffer texbuffer, int idx)\n{\n  return mat4(texelFetch(texbuffer,idx*4 + 0),\n              texelFetch(texbuffer,idx*4 + 1),\n              texelFetch(texbuffer,idx*4 + 2),\n              texelFetch(texbuffer,idx*4 + 3));\n}\n\nmat4 getObjectMatrix(int idx, int what){\n  return getMatrix(texObjectMatrices,idx*MATRICES + what + MATRIX_BEGIN_OBJECT);\n};\n\nmat4 getWorldMatrix(int idx, int what){\n  return getMatrix(texWorldMatrices,idx*MATRICES + what + MATRIX_BEGIN_WORLD);\n};\n\n\nvoid main()\n{\n  if (BAILOUT){\n    return;\n  }\n\n  int parent = texelFetch(parentsBuffer,self).x >> LEVELBITS;\n  \n  // world base matrix\n  mat4 world = \n    getWorldMatrix(parent,MATRIX_BASE) *\n    getObjectMatrix(self,MATRIX_BASE);\n\n\n#if 0\n  // world inv trans matrix\n  mat4 parentInv = transpose(getWorldMatrix(parent,MATRIX_INVTRANS));\n  mat4 objectInv = transpose(getObjectMatrix(self, MATRIX_INVTRANS));\n\n  mat4 worldInv  = objectInv * parentInv;\n#else\n  mat4 worldInv = inverse(world);\n#endif\n\n  worldMatrices[self*MATRICES + MATRIX_BEGIN_WORLD + MATRIX_BASE]     = world;\n  worldMatrices[self*MATRICES + MATRIX_BEGIN_WORLD + MATRIX_INVTRANS] = transpose(worldInv);\n}\n"
  },
  {
    "path": "transformsystem.cpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#include <assert.h>\n\n#include \"transformsystem.hpp\"\n#include <nvgl/base_gl.hpp>\n\nvoid TransformSystem::process(const NodeTree& nodeTree, Buffer& ids, Buffer& matricesObject, Buffer& matricesWorld )\n{\n  glUseProgram(m_programs.transform_leaves);\n\n  glBindBuffer    (GL_SHADER_STORAGE_BUFFER,  m_scratchGL);\n  glBufferData    (GL_SHADER_STORAGE_BUFFER,  sizeof(GLuint)*nodeTree.getNumActiveNodes(),NULL,GL_STREAM_DRAW);\n\n#if 0\n  // APIC hack\n  glTextureBufferEXT(m_texsGL[TEXTURE_IDS],   GL_TEXTURE_BUFFER, GL_R32I,    ids.buffer);\n  glTextureBufferEXT(m_texsGL[TEXTURE_OBJECT],GL_TEXTURE_BUFFER, GL_RGBA32F, matricesObject.buffer);\n  glTextureBufferEXT(m_texsGL[TEXTURE_WORLD], GL_TEXTURE_BUFFER, GL_RGBA32F, matricesWorld.buffer);\n#else\n  glTextureBufferRange(m_texsGL[TEXTURE_IDS],     GL_R32I, ids.buffer, ids.offset, ids.size);\n  glTextureBufferRange(m_texsGL[TEXTURE_OBJECT],  GL_RGBA32F, matricesObject.buffer, matricesObject.offset, matricesObject.size);\n  glTextureBufferRange(m_texsGL[TEXTURE_WORLD],   GL_RGBA32F, matricesWorld.buffer, matricesWorld.offset, matricesWorld.size);\n#endif\n\n  for (int i = 0; i < TEXTURES; i++){\n    nvgl::bindMultiTexture(GL_TEXTURE0 + i, GL_TEXTURE_BUFFER, m_texsGL[i]);\n  }\n\n  matricesWorld.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0);\n  matricesObject.BindBufferRange(GL_SHADER_STORAGE_BUFFER,1);\n  glBindBufferBase(GL_SHADER_STORAGE_BUFFER,2,m_scratchGL);\n\n  const int maxshaderlevels = 10;\n  int maxlevels = maxshaderlevels;\n  int totalNodes = 0;\n  bool useLeaves = true;\n\n  int currentDepth = 1;\n  const NodeTree::Level* level = nodeTree.getUsedLevel(currentDepth);\n\n  // TODO:\n  //\n  // This code lacks a proper heuristic for switching between level and leaves based processing.\n  // One should prefer level if there is enough nodes per level, otherwise descend and gather \n  // many leaves from multiple levels.\n  //\n  while (level){\n    // dispatch on last level, or if we have reached maxlevels\n    bool willdispatch = currentDepth && (!nodeTree.getUsedLevel(currentDepth+1) || currentDepth+1 % maxlevels == 0);\n\n    // the last level in leaf mode, must use all level nodes, and not just the leaves of this level\n    // as subsequent leaves operate in level mode\n    const std::vector<NodeTree::nodeID>& nodes = useLeaves && !willdispatch ? level->leaves : level->nodes;\n\n    if (!nodes.empty()){\n      glBufferSubData(GL_SHADER_STORAGE_BUFFER,totalNodes*sizeof(GLuint),sizeof(GLuint)*nodes.size(),&nodes[0]);\n      totalNodes += (int)nodes.size();\n    }\n\n    currentDepth++;\n    level = nodeTree.getUsedLevel(currentDepth);\n    if (willdispatch){\n      int groupsize = useLeaves ? m_leavesGroup : m_levelsGroup;\n      if (useLeaves){\n        glUniform1i(0,totalNodes);\n        glUniform1i(1,1);\n      }\n      else{\n        glUniform1i(0,totalNodes);\n      }\n      \n      glDispatchCompute((totalNodes+groupsize-1)/groupsize,1,1);\n      glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);\n\n      if (useLeaves){\n        // switch to per-level mode after first batch of leaves is over (tip of hierarchy)\n        glUseProgram(m_programs.transform_level);\n        useLeaves = false;\n        maxlevels = 1; // assure we dispatch every level\n      }\n\n      totalNodes = 0;\n    }\n  }\n\n  glUseProgram(0);\n  glBindBufferBase(GL_SHADER_STORAGE_BUFFER,0,0);\n  glBindBufferBase(GL_SHADER_STORAGE_BUFFER,1,0);\n  glBindBufferBase(GL_SHADER_STORAGE_BUFFER,2,0);\n\n  for (int i = 0; i < TEXTURES; i++){\n    nvgl::bindMultiTexture(GL_TEXTURE0 + i, GL_TEXTURE_BUFFER, 0);\n  }\n  \n}\n\nvoid TransformSystem::init( const Programs &programs )\n{\n  m_programs = programs;\n  glCreateBuffers(1,&m_scratchGL);\n  glCreateTextures(GL_TEXTURE_BUFFER, TEXTURES, m_texsGL);\n}\n\nvoid TransformSystem::deinit()\n{\n  glDeleteBuffers(1,&m_scratchGL);\n  glDeleteTextures(TEXTURES,m_texsGL);\n}\n\nvoid TransformSystem::update( const Programs &programs )\n{\n  m_programs = programs;\n\n  GLuint groupsizes[3];\n  glGetProgramiv(programs.transform_leaves, GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupsizes);\n  m_leavesGroup = groupsizes[0];\n\n  glGetProgramiv(programs.transform_level, GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupsizes);\n  m_levelsGroup = groupsizes[0];\n}\n\n\n"
  },
  {
    "path": "transformsystem.hpp",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */\n\n#ifndef TRANSFORMSYSTEM_H__\n#define TRANSFORMSYSTEM_H__\n\n\n#include <nvgl/extensions_gl.hpp>\n#include <cstddef>\n\n#include \"nodetree.hpp\"\n\nclass TransformSystem {\npublic:\n\n  struct Programs {\n    GLuint  transform_level;\n    GLuint  transform_leaves;\n  };\n\n  struct Buffer {\n    GLuint      buffer;\n    GLintptr    offset;\n    GLsizeiptr  size;\n\n    Buffer(GLuint buffer, size_t sizei=0)\n      : buffer(buffer)\n      , offset(0)\n    {\n      glBindBuffer(GL_COPY_READ_BUFFER, buffer);\n      if (!sizei){\n        if (sizeof(GLsizeiptr) > 4)\n          glGetBufferParameteri64v(GL_COPY_READ_BUFFER,GL_BUFFER_SIZE, (GLint64*)&size);\n        else\n          glGetBufferParameteriv(GL_COPY_READ_BUFFER, GL_BUFFER_SIZE, (GLint*)&size);\n        glBindBuffer(GL_COPY_READ_BUFFER, 0);\n      }\n      else{\n        size = sizei;\n      }\n    }\n\n    Buffer()\n      : buffer(0)\n      , offset(0)\n      , size(0)\n    {\n\n    }\n\n    inline void BindBufferRange(GLenum target, GLuint index) const {\n      glBindBufferRange(target, index, buffer, offset, size);\n    }\n    inline void TexBuffer(GLenum target, GLenum internalformat) const {\n      glTexBufferRange(target, internalformat, buffer, offset, size);\n    }\n   \n  };\n  \n  void init( const Programs &programs );\n  void deinit();\n  void update( const Programs &programs );\n  \n  void process(const NodeTree&, Buffer& ids, Buffer& matricesObject, Buffer& matricesWorld );\n  \nprivate:\n\n  enum Textures {\n    TEXTURE_IDS,\n    TEXTURE_WORLD,\n    TEXTURE_OBJECT,\n    TEXTURES,\n  };\n\n  GLuint    m_leavesGroup;\n  GLuint    m_levelsGroup;\n\n  Programs  m_programs;\n  GLuint    m_scratchGL;\n  GLuint    m_texsGL[TEXTURES];\n};\n\n#endif\n\n"
  },
  {
    "path": "xplode-animation.comp.glsl",
    "content": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION\n * SPDX-License-Identifier: Apache-2.0\n */\n\n\n\n#version 430\n/**/\n\n#ifndef USE_COMPUTE\n#define USE_COMPUTE 1\n#endif\n\n#define MATRIX_BASE         0\n#define MATRIX_INVTRANS     1\n\n#define MATRIX_BEGIN_WORLD  0\n#define MATRIX_BEGIN_OBJECT 2\n#define MATRICES            4\n\nlayout(location=0) uniform float scale;\n\n#if USE_COMPUTE\n\n  layout (local_size_x = 256) in;\n\n  layout(location=1) uniform int count;\n\n  #define BAILOUT gl_GlobalInvocationID.x >= count\n  int self = int(gl_GlobalInvocationID.x);\n\n#else\n\n  #define BAILOUT false\n  int self = int(gl_VertexID);\n\n#endif\n\nlayout(std430,binding=0) restrict buffer matricesBuffer {\n  mat4 matrices[];\n};\n\nlayout(binding=0) uniform samplerBuffer texMatricesOrig;\nmat4 getMatrix(samplerBuffer texbuffer, int idx)\n{\n  return mat4(texelFetch(texbuffer,idx*4 + 0),\n              texelFetch(texbuffer,idx*4 + 1),\n              texelFetch(texbuffer,idx*4 + 2),\n              texelFetch(texbuffer,idx*4 + 3));\n}\n\nmat4 getObjectMatrixOrig(int idx, int what){\n  return getMatrix(texMatricesOrig,idx*MATRICES + what + MATRIX_BEGIN_OBJECT);\n};\n\nmat4 getWorldMatrixOrig(int idx, int what){\n  return getMatrix(texMatricesOrig,idx*MATRICES + what + MATRIX_BEGIN_WORLD);\n};\n\nvoid main()\n{\n  if (BAILOUT){\n    return;\n  }\n  \n  mat4 matrixOrig     = getObjectMatrixOrig(self,MATRIX_BASE);\n  mat4 matrixITOrig   = getObjectMatrixOrig(self,MATRIX_INVTRANS);\n  \n#if 0\n  // compiler bug\n  mat4 matrixBase = matrixOrig;\n  mat4 matrixIT   = matrixITOrig;\n  matrixBase[3].xyz *= scale;\n  matrixIT[0].w /= scale;\n  matrixIT[1].w /= scale;\n  matrixIT[2].w /= scale;\n#else\n  vec4 basescale  = vec4(scale,scale,scale,1);\n  vec4 itscale    = vec4(1,1,1,1/scale);\n  mat4 matrixBase = mat4(matrixOrig[0], matrixOrig[1], matrixOrig[2], matrixOrig[3]*basescale);\n  mat4 matrixIT   = mat4(matrixITOrig[0]*itscale,matrixITOrig[1]*itscale,matrixITOrig[2]*itscale,matrixITOrig[3]);\n#endif\n\n  matrices[self*MATRICES + MATRIX_BEGIN_OBJECT + MATRIX_BASE]     = matrixBase;\n  matrices[self*MATRICES + MATRIX_BEGIN_OBJECT + MATRIX_INVTRANS] = matrixIT;\n}\n"
  }
]