Full Code of nvpro-samples/gl_cadscene_rendertechniques for AI

master bd7e727c8b03 cached

47 files

347.6 KB

94.0k tokens

404 symbols

1 requests

Download .txt

Showing preview only (363K chars total). Download the full file or copy to clipboard to get everything.

Repository: nvpro-samples/gl_cadscene_rendertechniques
Branch: master
Commit: bd7e727c8b03
Files: 47
Total size: 347.6 KB

Directory structure:
gitextract_wdi1bw94/

├── .gitignore
├── CMakeLists.txt
├── CONTRIBUTING
├── LICENSE
├── README.md
├── cadscene.cpp
├── cadscene.hpp
├── common.h
├── csf.cpp
├── csfviewer.cpp
├── cull-bitpack.vert.glsl
├── cull-downsample.frag.glsl
├── cull-downsample.vert.glsl
├── cull-raster.frag.glsl
├── cull-raster.geo.glsl
├── cull-raster.vert.glsl
├── cull-tokencmds.vert.glsl
├── cull-tokensizes.vert.glsl
├── cull-xfb.vert.glsl
├── cullingsystem.cpp
├── cullingsystem.hpp
├── nodetree.cpp
├── nodetree.hpp
├── nvtoken.cpp
├── nvtoken.hpp
├── renderer.cpp
├── renderer.hpp
├── rendererindexedmdi.cpp
├── renderertoken.cpp
├── renderertokensortcull.cpp
├── renderertokenstream.cpp
├── rendereruborange.cpp
├── rendererubosub.cpp
├── scan.comp.glsl
├── scansystem.cpp
├── scansystem.hpp
├── scene.frag.glsl
├── scene.vert.glsl
├── statesystem.cpp
├── statesystem.hpp
├── tokenbase.cpp
├── tokenbase.hpp
├── transform-leaves.comp.glsl
├── transform-level.comp.glsl
├── transformsystem.cpp
├── transformsystem.hpp
└── xplode-animation.comp.glsl

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
.clang-format
.editorconfig

#############################
#Spirv
#############################
*.spv
*.spva
*.sass
*.sassbin
*.bat

#############################
#specific to the project
#############################
cmake_built
cmake_build
build
_install
bin_x64
NVPRO_EXTERNAL
nvpro_core

================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.5)
get_filename_component(PROJNAME ${CMAKE_CURRENT_SOURCE_DIR} NAME)
Project(${PROJNAME})
Message(STATUS "-------------------------------")
Message(STATUS "Processing Project ${PROJNAME}:")

#####################################################################################
# look for nvpro_core 1) as a sub-folder 2) at some other locations
# this cannot be put anywhere else since we still didn't find setup.cmake yet
#
if(NOT BASE_DIRECTORY)

  find_path(BASE_DIRECTORY
    NAMES nvpro_core/cmake/setup.cmake
    PATHS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/../.. 
    REQUIRED
    DOC "Directory containing nvpro_core"
    )
endif()
if(EXISTS ${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake)
  include(${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake)
  include(${BASE_DIRECTORY}/nvpro_core/cmake/utilities.cmake)
else()
  message(FATAL_ERROR "could not find base directory, please set BASE_DIRECTORY to folder containing nvpro_core")
endif()

_add_project_definitions(${PROJNAME})

#--------------------------------------------------------------------------------------------------
# Resources
#
download_files(FILENAMES geforce.csf.gz)

#####################################################################################
# additions from packages needed for this sample
# add refs  in LIBRARIES_OPTIMIZED
# add refs  in LIBRARIES_DEBUG
# add files in PACKAGE_SOURCE_FILES
#
_add_package_OpenGL()
_add_package_ImGUI()
_add_package_ZLIB()

add_definitions(-DCSF_SUPPORT_ZLIB=1)

#####################################################################################
# process the rest of some cmake code that needs to be done *after* the packages add
_add_nvpro_core_lib()

#####################################################################################
# Source files for this project
#
file(GLOB SOURCE_FILES *.cpp *.hpp *.inl *.h *.c)
file(GLOB GLSL_FILES *.glsl)


#####################################################################################
# Executable
#
if(WIN32)
  add_definitions(-D_CRT_SECURE_NO_WARNINGS)
endif()

add_executable(${PROJNAME} ${SOURCE_FILES} ${COMMON_SOURCE_FILES} ${PACKAGE_SOURCE_FILES} ${GLSL_FILES})

#####################################################################################
# common source code needed for this sample
#
source_group(common FILES 
  ${COMMON_SOURCE_FILES}
  ${PACKAGE_SOURCE_FILES}
)
source_group(shaders FILES 
  ${GLSL_FILES}
)

#####################################################################################
# Linkage
#
target_link_libraries(${PROJNAME} ${PLATFORM_LIBRARIES} nvpro_core)

foreach(DEBUGLIB ${LIBRARIES_DEBUG})
  target_link_libraries(${PROJNAME} debug ${DEBUGLIB})
endforeach(DEBUGLIB)

foreach(RELEASELIB ${LIBRARIES_OPTIMIZED})
  target_link_libraries(${PROJNAME} optimized ${RELEASELIB})
endforeach(RELEASELIB)

#####################################################################################
# copies binaries that need to be put next to the exe files (ZLib, etc.)
#
_finalize_target( ${PROJNAME} )
LIST(APPEND GLSL_FILES "common.h")
install(FILES ${GLSL_FILES} CONFIGURATIONS Release DESTINATION "bin_${ARCH}/GLSL_${PROJNAME}")
install(FILES ${GLSL_FILES} CONFIGURATIONS Debug DESTINATION "bin_${ARCH}_debug/GLSL_${PROJNAME}")


================================================
FILE: CONTRIBUTING
================================================
https://developercertificate.org/

Developer Certificate of Origin
Version 1.1

Copyright (C) 2004, 2006 The Linux Foundation and its contributors.

Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.


Developer's Certificate of Origin 1.1

By making a contribution to this project, I certify that:

(a) The contribution was created in whole or in part by me and I
    have the right to submit it under the open source license
    indicated in the file; or

(b) The contribution is based upon previous work that, to the best
    of my knowledge, is covered under an appropriate open source
    license and I have the right under that license to submit that
    work with modifications, whether created in whole or in part
    by me, under the same open source license (unless I am
    permitted to submit under a different license), as indicated
    in the file; or

(c) The contribution was provided directly to me by some other
    person who certified (a), (b) or (c) and I have not modified
    it.

(d) I understand and agree that this project and the contribution
    are public and that a record of the contribution (including all
    personal information I submit with it, including my sign-off) is
    maintained indefinitely and may be redistributed consistent with
    this project or the open source license(s) involved.

================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

================================================
FILE: README.md
================================================
# gl cadscene render techniques

This sample implements several scene rendering techniques that target mostly static data, such as often found in CAD or DCC applications. In this context, 'static' means that the vertex and index buffers for the scene's objects rarely change. This can include editing the geometry of a few scene objects, but the matrix and material values are the properties that are modified the most across frames. Imagine making edits to the wheel topology of a car, or positioning an engine; the rest of the assembly remains the same.

The principal OpenGL mechanisms that are used here are described in the [SIGGRAPH 2014 presentation slides](http://on-demand.gputechconf.com/siggraph/2014/presentation/SG4117-OpenGL-Scene-Rendering-Techniques.pdf). It is highly recommended to go through the slides first.

The sample makes use of multiple OpenGL 4 core features, such as **ARB_multi_draw_indirect**, but also showcases OpenGL 3 style rendering techniques.

There are also several techniques built around the **NV_command_list** extension. Please refer to [gl commandlist basic](https://github.com/nvpro-samples/gl_commandlist_basic) for an introduction to NV_command_list.

> Note: This is just a sample to illustrate several techniques and possibilities for how to approach rendering. Its purpose is not to provide production-level, highly optimized implementations.

### Scene Setup

The sample loads a cadscene file (csf). This file format is inspired by CAD applications' data organization, but (for simplicity) everything is stored in a single RAW file.

The scene is organized into:

 * Matrices: object transforms as well as concatenated world matrices 
 * TreeNodes: a tree consisting hierarchical information, mapping to Matrix indices

 * Materials: just classic two-sided OpenGL Blinn-Phong material parameters
 * Geometries: storing vertex and index information, organized into
  * GeometryParts, which reference a sub-range within index buffer, for either "wireframe" or "solid" surfaces

 * Objects, that reference Geometry and have corresponding
  * ObjectParts, that encode part-level Material and Matrix assignment. Typically, an object uses just one Matrix for all its parts.

### Shademodes

![sample screenshot](https://github.com/nvpro-samples/gl_cadscene_rendertechniques/blob/master/doc/sample.jpg)

- **solid**: only triangles are drawn
- **solid with edges**: triangles and edge outlines on top (using PolygonOffset to push triangles back). When no global sorting (see later) is performed, this means we toggle between the two modes for every object.
- **solid with edges (split test, only in sorted)**: an artificial mode that also separates triangles and edges into different FBOs, and is available in "sorted" and "token" renderers. The implementation has no real use-case character and is more or less for internal benchmarking of FBO toggles.

### Strategies

These influence the number of drawcalls we generate for the hardware and software. Using OpenGL's MultiDraw* functions we can have less software calls than hardware drawcalls, which helps trigger faster paths in the driver as there is less validation overhead. A strategy is applied on a per-object level.

Imagine an object whose parts use two materials, red and blue:

```
material: r b b r
parts:    A B C D
```

- **materialgroups**
Here we create a per-object cache of drawcall ranges for MultiDraw* based on the object's material and matrix assignments. We also "grow" drawcalls if subsequent ranges in the index buffer have the same assignments. Our sample object would be drawn using 2 states one glMultiDrawElements each, which are creating 3 hardware drawcalls: red are ranges A, D and blue is B+C joined together as they are next to each other in the indexbuffer.
- **drawcall join**
As we traverse we combine drawcalls under same state, this means 3 drawcalls for hardware, and 3 for software as well as 3 states: red A, blue B+C, red D.
- **drawcall individual**
We render each piece individually:
red A, blue B, C, red D.

Typically we do all rendering with basic state redundancy filtering so we don't setup a matrix/material change if the same is still active. To keep things simple for state redundancy filtering, you should not go too fine-grained, otherwise all the tracking causes too much memory hopping. In our case we have 3 indices we track: geometry (handles vertex / index buffer setup), material, and matrix.

### Renderers
Most renderers will traverse the scene data every frame. The organization of the data is cache-friendly foremost, everything is stored in arrays, without too much memory hopping. Some renderers may implement additional caching for rendering.

#### Variants:

 - **bindless**: these variants make use of NVIDIA's bindless extensions NV_vertex_buffer_unified_memory and NV_uniform_buffer_unified_memory, which allows a lower-overhead path in the driver for faster drawcall submission. Classic glBindVertexBuffer or glBindBufferRange are replaced with glBufferAddressRangeNV.
 - **sorted**: indicates we do a global scene sort once, to minimize state changes in subsequent frames.
 - **cullsorted**: next to global sorting by state, we also apply occlusion culling as presented in [end of the slides](http://on-demand.gputechconf.com/siggraph/2014/presentation/SG4117-OpenGL-Scene-Rendering-Techniques.pdf) or in the [gl occlusion culling](https://github.com/nvpro-samples/gl_occlusion_culling) sample.
 - **emulated**: several of the NV_command_list techniques can be run in emulated mode.

#### Techniques:

We are mostly looking into accelerating our matrix and material parameter switching performance.

- **uborange**
All matrices and materials are stored in big buffer objects, which allows us to efficiently bind the required sub-range for a drawcall via glBindBufferRange(GL_UNIFORM_BUFFER, usageSlot, buffer, index * itemSize, itemSize). NVIDIA provides optimized paths if you keep the buffer and itemSize for a usageSlot constant for many glBindBufferRange calls. Be aware of GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, which is 256 bytes for most current NVIDIA hardware (Fermi, Kepler, Maxwell).

- **ubosub**
Not as efficient as the above, but maybe appropriate if you cannot afford to cache parameter data. We make use of one streaming buffer per usage slot and continously update it via glBufferSubData. NVIDIA's drivers do particularly well if you never bind this buffer as anything but a GL_UNIFORM_BUFFER and keep size and offsets a multiple of 4.

- **indexedmdi**
Similar to uborange we make use of all data stored in a bigger buffers in advance. It doesn't make this data "static"; you can always update the portions you need, but there is a high chance a lot of data is the same frame to frame. This time, we do not bind memory ranges through the OpenGL API, but let the shader do an indirection and only pass the required matrix and material indices. 
For the matrix data we use GL_TEXTURE_BUFFER as it's particularly performant for high frequency / potentially divergent access. We typically have far more matrices than materials in our scene. For material data, it's a bit "ugly" to use lots of texelFetch instructions decoding all our parameters; it's much easier to write them as structs and store the array either as GL_UNIFORM_BUFFER or GL_SHADER_STORAGE_BUFFER. The latter is only recommended if you have divergent shader access or exceed the 64 KB limit of UBOs.
To pass the indices per-drawcall we make use of GL_ARB_multi_draw_indirect and "instanced" vertex attributes as described at [GTC 2013 on slide 27](http://on-demand.gputechconf.com/gtc/2013/presentations/S3032-Advanced-Scenegraph-Rendering-Pipeline.pdf).
Therefore this renderer requires two additional buffers: one encoding our object's matrix and material index assignments, and one encoding the scene's drawcalls as GL_DRAW_INDIRECT_BUFFER. 

A hybrid approach, where the parameter index like "indexedmdi" is used for matrices and uborange bind is used for materials, is not yet implemented, but would be a good compromise.

The following renderers make use of the **NV_command_list** extension. In principle they **behave as "uborange"**, however all buffer bindings and drawcalls are encoded into binary tokens that are submitted in bulk. In preparation for drawing, the appropriate stateobjects are created and reused when rendering (one for lines and for triangles). While stateobject capturing is not extremely expensive, it is still best to cache it across frames.

- **tokenbuffer**
Similar to indexedmdi we create a buffer that describes our scene by storing all the relevant token commands. This buffer is filled only once and then later reused.
- **tokenlist**
Instead of storing the tokens inside a buffer we make use of the commandlist object, and create and compile one for each shademode for later reuse. Every time our state changes (for instance, when resizing FBOs), we have to recreate these lists, which makes it less flexible than buffer but faster when there are lots of statechanges within the list.
- **tokenstream**
This approach does not reuse the tokens across frames, but instead dynamically creates the tokenstream every frame. By default, the demo fills and submits tokens in chunks of 256 KB; better values may exist depending on the scene.

### Performance

All timings are preliminary results for *Timer Draw* on a win7-64, i7-860, Quadro K5000 system. 

**Important Note About Timer Query Results:** The GPU time reported below is measured via timer queries, those values however can be skewed by CPU bottlenecks. The "begin" timestamp may be part of a different command submission to the GPU than the "end" timestamp. That means a long delay on the CPU side between those submissions will also increase the reported GPU time. That is why in CPU-bottlenecked scenarios with tons of OpenGL commands, the GPU times below are close to the CPU time.

```
scene statistics:
geometries:    110
materials:      66
nodes:        5004
objects:      2497

tokenbuffer/glstream complexities:
type: solid              materialgroups | drawcall individual
commandsize:                     347292 | 1301692
statetoggles:                         1 | 1
tokens:                 
GL_DRAW_ELEMENTS_COMMAND_NV:      11103 |   68452
GL_ELEMENT_ADDRESS_COMMAND_NV:      807 |     807
GL_ATTRIBUTE_ADDRESS_COMMAND_NV:    807 |     807
GL_UNIFORM_ADDRESS_COMMAND_NV:     8988 |   11289
GL_POLYGON_OFFSET_COMMAND_NV:         1 |       1

type: solid w edges
commandsize:                     629644 | 2534412
statetoggles:                      4994 |    4994
tokens:
GL_DRAW_ELEMENTS_COMMAND_NV:      22281 |  136750
GL_ELEMENT_ADDRESS_COMMAND_NV:      807 |     807
GL_ATTRIBUTE_ADDRESS_COMMAND_NV:    807 |     807
GL_UNIFORM_ADDRESS_COMMAND_NV:    15457 |   20036
GL_POLYGON_OFFSET_COMMAND_NV:         1 |       1
```

As one can see from the statistics the key difference is the number of drawcalls for the hardware:
- **materialgroups**: ~ 10 000 drawcalls (inner two columns)
- **drawcall individual**: ~ 70 000 drawcalls (rightmost two columns)

*shademode: solid*

renderer | GPU time | CPU time | GPU time | CPU time (microseconds)
------------ | ------------- | ------------- | ------------- | -------------
**strategy** | **material-** | **-groups** | **drawcall-** | **-individual**
ubosub | 1550 | 1870 |  6000 | 7420
uborange | 1010| 1890 | 3720 | 7660
uborange_bindless | 1010 | 1200 | 2560 | 4900
indexedmdi | 1120 | 1200 | 2080 | 1100
tokenstream | 860 | 300 | 1520 | 1400
tokenbuffer | 780 | <10 | 1230 | <10
tokenlist | 780 | <10 | 880 | <10
tokenbuffer_cullsorted | 540 | 120 | 760 | 120

The results are of course very scene dependent; this model was specifically chosen as it is made of many parts with very few triangles. If the complexity per drawcall were higher (say more triangles or complex shading), then the CPU impact would be lower and we would be GPU-bound. However the CPU time recovered by faster submission mechanisms can always be used elsewhere. So even if we are GPU-bound, time should not be wasted.

We can see that the "token" techniques do very well and are never CPU-bound, and the "indexedmdi" technique is also quite good. This technique is especially useful for very high-frequency parameters, for example when rendering "id-buffers" for selection, but also for matrix indices. For general use-cases, working with uborange binds is recommended. 

*shademode: solid with edges*

Unless "sorted", around 5000 toggles are done between triangles/line rendering. The shader
is manipulated through an immediate vertex attribute to toggle between lit/unlit rendering respectively.

renderer | GPU time | CPU time | GPU time | CPU time (microseconds)
------------ | ------------- | ------------- | ------------- | -------------
**strategy** | **material-** | **-groups** | **drawcall-** | **-individual**
ubosub | 2890 | 3350 | 13000 | 15000 | 
uborange | 2150 | 3700 | 12500 | 15200 | 
uborange_bindless | 2150 | 2640 | 8300 | 10000
indexedmdi | 2340 | 2200 | 4050 | 2050
tokenstream | 1860 | 1250 | 3360 | 3200
tokenbuffer | 1750 | 450 | 2650 | 350
tokenlist | 1650 | <10 | 1890 | <10
tokenbuffer_cullsorted | 770 | 120 | 1250 | 120

Compared to the "solid" results, the tokenbuffer and tokenlist techniques show a greater difference in CPU time.


### Model Explosion View

The simple viewer allows you to add animation to the scene and artificially increase scene complexity via "clones".

![xplodeclones](https://github.com/nvpro-samples/gl_cadscene_rendertechniques/blob/master/doc/xplodeclones.jpg)

To "emulate" typical interaction where users might move objects around or have animated scenes, the sample also implements the matrix transform system sketched on [slide 30](http://on-demand.gputechconf.com/siggraph/2014/presentation/SG4117-OpenGL-Scene-Rendering-Techniques.pdf). 

The effect works by first moving all object matrices a bit (*xplode-animation.comp.glsl*), and afterwards the transform hierarchy is updated via a system that is implemented in the *transformsystem.cpp / hpp* files.

The code is not particularly tuned but naively assumes that upper levels of the hierarchy contain fewer nodes than lower levels (pyramid). Therefore it uses leaf-processing (which redundantly calculates matrices) instead of level-wise processing for the first 10 levels, to avoid dependencies (one small compute task waiting for the previous). Later levels are always processed level-wise. A better strategy would be to switch between the two approaches based on the actual number of nodes per level. The shaders for this are *transform-leaves.comp.glsl* and *transform-level.comp.glsl*. 

The hierarchy is managed by *nodetree.cpp/hpp*, which stores the tree as array of 32bit values. Each value represents a node, and encodes the "level" in the hierarchy in 8 bits and their parent index in the rest of the bits. Which means you can traverse a node up to the root:

``` cpp
// sample traversal of "idx" node to root
self = array[idx];
while( self.level != 0) {
  self = array[self.parent];
}
// self is now the top root for the idx node
```

The nodetree also stores two node index lists for each level: one storing all nodes of a level, and one for all leaves in this level. We feed these two index lists to the appropriate shader. When leaf processing is used we append the leaves level-wise, which should minimize divergence within a warp (ideally most threads have the same number of levels to ascend in the hierarchy).

Many CAD applications tend to use double-precision matrices, and the system could be adjusted for this. For rendering, however, float matrices should be used. To account for large translation values, one could run a concatenation of view-projection (double) and object-world-matrix (double) per-frame and generate the matrices (float) for actual vertex transforms. To improve memory performance, it might be beneficial to use double only for storing translations within the matrices.

> Note: Only the GPU matrices are updated. CPU techniques such as "ubosub" will not show animations.

### Sample Highlights

This sample is a bit more complex than most others as it contains several subsystems. Don't hesitate to contact the author if something is unclear (commenting was not a priority ;) ).

#### csfviewer.cpp
The principle setup of the sample is in this main file. However, most of the interesting bits happen in the renderers.

- Sample::think - prepares the frame and calls the renderer's draw function

#### renderer... and tokenbase...
Each renderer has its own file and is derived from the **Renderer** class in *renderer.hpp*

- Renderer::init - some renderers may allocate extra buffers or create their own data structures for the scene.
- Renderer::deinit 
- Renderer::draw

The renderers may have additional functions. The "token" renderers using NV_command_list or "indexedmdi", for instance, must create their own scene representation.

#### cadscene...
The "csf" (cadscene file) format is a simple binary format that encodes a scene as is typical for CAD. It closely matches the description at the beginning of the readme. It is not very sophisticated, and is meant for demo purposes.

> *Note*: The **geforce.csf.gz** assembly binary file that ships with this sample **may NOT be redistributed.**

#### nodetree... and transform...
Implement the matrix hierarchy updates as described in the "model explosion view" section.

#### cull... and scan...
For files related to culling, it is best to refer to the [gl occlusion cullling](https://github.com/nvpro-samples/gl_occlusion_cullling) sample, as it leverages the same system and focuses on just that topic.

*renderertokensortcull.cpp* implements *RendererCullSortToken::CullJobToken::resultFromBits*, which contains the details of how the occlusion results are handled in this sample. The implementation uses the "raster" "temporal" approach.

#### statesystem... nvtoken... and nvcommandlist...
These files contain helpers when using the NV_command_list extension. Please see [gl commandlist basic](https://github.com/nvpro-samples/gl_commandlist_basic) for a smaller sample.

### Building
Ideally, clone this and other interesting [nvpro-samples](https://github.com/nvpro-samples) repositories into a common subdirectory. You will always need [nvpro_core](https://github.com/nvpro-samples/nvpro_core). The nvpro_core is searched either as a subdirectory of the sample, or one directory up.

If you are interested in multiple samples, you can use the [build_all](https://github.com/nvpro-samples/build_all) CMAKE as entry point. This will also give you options to enable or disable individual samples when creating the solutions.

### Related Samples
[gl commandlist basic](https://github.com/nvpro-samples/gl_commandlist_basic) illustrates the core principle of the NV_command_list extension.
[gl occlusion cullling](https://github.com/nvpro-samples/gl_occlusion_cullling) also uses the occlusion system of this sample, but in a simpler usage scenario.

When using classic scenegraphs, there is typically a lot of overhead in traversing the scene. For this reason, it is highly recommended to use simpler representations for actual rendering. Consider using flattened hierarchies, arrays, memory-friendly data structures, data-oriented design patterns, and similar techniques.
If you are still working with a classic scenegraph, then [nvpro-pipeline](https://github.com/nvpro-pipeline/pipeline) may provide some acceleration strategies to avoid full scenegraph traversal. Some of these strategies are also described in this [GTC 2013 presentation](http://on-demand.gputechconf.com/gtc/2013/presentations/S3032-Advanced-Scenegraph-Rendering-Pipeline.pdf).


================================================
FILE: cadscene.cpp
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */


/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */

#include "cadscene.hpp"
#include <fileformats/cadscenefile.h>

#include <algorithm>
#include <assert.h>
#include <cstddef>
#include "glm/gtc/type_ptr.hpp"

#define USE_CACHECOMBINE 1


glm::vec4 randomVector(float from, float to)
{
  glm::vec4 vec;
  float     width = to - from;
  for(int i = 0; i < 4; i++)
  {
    vec[i] = from + (float(rand()) / float(RAND_MAX)) * width;
  }
  return vec;
}

static void recursiveHierarchy(NodeTree& tree, CSFile* csf, int idx, int cloneoffset)
{
  for(int i = 0; i < csf->nodes[idx].numChildren; i++)
  {
    tree.setNodeParent((NodeTree::nodeID)csf->nodes[idx].children[i] + cloneoffset, (NodeTree::nodeID)idx + cloneoffset);
  }

  for(int i = 0; i < csf->nodes[idx].numChildren; i++)
  {
    recursiveHierarchy(tree, csf, csf->nodes[idx].children[i], cloneoffset);
  }
}

bool CadScene::loadCSF(const char* filename, int clones, int cloneaxis)
{
  CSFile*         csf;
  CSFileMemoryPTR mem = CSFileMemory_new();
  if(CSFile_loadExt(&csf, filename, mem) != CADSCENEFILE_NOERROR || !(csf->fileFlags & CADSCENEFILE_FLAG_UNIQUENODES))
  {
    CSFileMemory_delete(mem);
    return false;
  }

  int copies = clones + 1;

  CSFile_transform(csf);

  srand(234525);

  // materials
  m_materials.resize(csf->numMaterials);
  for(int n = 0; n < csf->numMaterials; n++)
  {
    CSFMaterial* csfmaterial = &csf->materials[n];
    Material&    material    = m_materials[n];

    for(int i = 0; i < 2; i++)
    {
      material.sides[i].ambient  = randomVector(0.0f, 0.1f);
      material.sides[i].diffuse  = glm::make_vec4(csf->materials[n].color) + randomVector(0.0f, 0.07f);
      material.sides[i].specular = randomVector(0.25f, 0.55f);
      material.sides[i].emissive = randomVector(0.0f, 0.05f);
    }
  }

  glCreateBuffers(1, &m_materialsGL);
  glNamedBufferStorage(m_materialsGL, sizeof(Material) * m_materials.size(), &m_materials[0], 0);
  //glMapNamedBufferRange(m_materialsGL, 0, sizeof(Material) * m_materials.size(), GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT);

  // geometry
  int numGeoms = csf->numGeometries;
  m_geometry.resize(csf->numGeometries * copies);
  m_geometryBboxes.resize(csf->numGeometries * copies);
  for(int n = 0; n < csf->numGeometries; n++)
  {
    CSFGeometry* csfgeom = &csf->geometries[n];
    Geometry&    geom    = m_geometry[n];

    geom.cloneIdx = -1;

    geom.numVertices   = csfgeom->numVertices;
    geom.numIndexSolid = csfgeom->numIndexSolid;
    geom.numIndexWire  = csfgeom->numIndexWire;

    std::vector<Vertex> vertices(csfgeom->numVertices);
    for(int i = 0; i < csfgeom->numVertices; i++)
    {
      vertices[i].position[0] = csfgeom->vertex[3 * i + 0];
      vertices[i].position[1] = csfgeom->vertex[3 * i + 1];
      vertices[i].position[2] = csfgeom->vertex[3 * i + 2];
      vertices[i].position[3] = 1.0f;
      if(csfgeom->normal)
      {
        vertices[i].normal[0] = csfgeom->normal[3 * i + 0];
        vertices[i].normal[1] = csfgeom->normal[3 * i + 1];
        vertices[i].normal[2] = csfgeom->normal[3 * i + 2];
        vertices[i].normal[3] = 0.0f;
      }
      else
      {
        vertices[i].normal = glm::vec4(normalize(glm::vec3(vertices[i].position)), 0.0f);
      }


      m_geometryBboxes[n].merge(vertices[i].position);
    }

    geom.vboSize = sizeof(Vertex) * vertices.size();

    glCreateBuffers(1, &geom.vboGL);
    glNamedBufferStorage(geom.vboGL, geom.vboSize, &vertices[0], 0);

    std::vector<GLuint> indices(csfgeom->numIndexSolid + csfgeom->numIndexWire);
    memcpy(&indices[0], csfgeom->indexSolid, sizeof(GLuint) * csfgeom->numIndexSolid);
    if(csfgeom->indexWire)
    {
      memcpy(&indices[csfgeom->numIndexSolid], csfgeom->indexWire, sizeof(GLuint) * csfgeom->numIndexWire);
    }

    geom.iboSize = sizeof(GLuint) * indices.size();

    glCreateBuffers(1, &geom.iboGL);
    glNamedBufferStorage(geom.iboGL, geom.iboSize, &indices[0], 0);

    if(has_GL_NV_vertex_buffer_unified_memory)
    {
      glGetNamedBufferParameterui64vNV(geom.vboGL, GL_BUFFER_GPU_ADDRESS_NV, &geom.vboADDR);
      glMakeNamedBufferResidentNV(geom.vboGL, GL_READ_ONLY);

      glGetNamedBufferParameterui64vNV(geom.iboGL, GL_BUFFER_GPU_ADDRESS_NV, &geom.iboADDR);
      glMakeNamedBufferResidentNV(geom.iboGL, GL_READ_ONLY);
    }

    geom.parts.resize(csfgeom->numParts);

    size_t offsetSolid = 0;
    size_t offsetWire  = csfgeom->numIndexSolid * sizeof(GLuint);
    for(int i = 0; i < csfgeom->numParts; i++)
    {
      geom.parts[i].indexWire.count  = csfgeom->parts[i].numIndexWire;
      geom.parts[i].indexSolid.count = csfgeom->parts[i].numIndexSolid;

      geom.parts[i].indexWire.offset  = offsetWire;
      geom.parts[i].indexSolid.offset = offsetSolid;

      offsetWire += csfgeom->parts[i].numIndexWire * sizeof(GLuint);
      offsetSolid += csfgeom->parts[i].numIndexSolid * sizeof(GLuint);
    }
  }
  for(int c = 1; c <= clones; c++)
  {
    for(int n = 0; n < numGeoms; n++)
    {
      m_geometryBboxes[n + numGeoms * c] = m_geometryBboxes[n];

      const Geometry& geomorig = m_geometry[n];
      Geometry&       geom     = m_geometry[n + numGeoms * c];

      geom = geomorig;

#if 1
      geom.cloneIdx = n;
#else
      geom.cloneIdx = -1;
      glCreateBuffers(1, &geom.vboGL);
      glNamedBufferStorage(geom.vboGL, geom.vboSize, 0, 0);

      glCreateBuffers(1, &geom.iboGL);
      glNamedBufferStorage(geom.iboGL, geom.iboSize, 0, 0);

      if(has_GL_NV_vertex_buffer_unified_memory)
      {
        glGetNamedBufferParameterui64vNV(geom.vboGL, GL_BUFFER_GPU_ADDRESS_NV, &geom.vboADDR);
        glMakeNamedBufferResidentNV(geom.vboGL, GL_READ_ONLY);

        glGetNamedBufferParameterui64vNV(geom.iboGL, GL_BUFFER_GPU_ADDRESS_NV, &geom.iboADDR);
        glMakeNamedBufferResidentNV(geom.iboGL, GL_READ_ONLY);
      }

      glCopyNamedBufferSubData(geomorig.vboGL, geom.vboGL, 0, 0, geom.vboSize);
      glCopyNamedBufferSubData(geomorig.iboGL, geom.iboGL, 0, 0, geom.iboSize);
#endif
    }
  }


  glCreateBuffers(1, &m_geometryBboxesGL);
  glNamedBufferStorage(m_geometryBboxesGL, sizeof(BBox) * m_geometryBboxes.size(), &m_geometryBboxes[0], 0);
  glCreateTextures(GL_TEXTURE_BUFFER, 1, &m_geometryBboxesTexGL);
  glTextureBuffer(m_geometryBboxesTexGL, GL_RGBA32F, m_geometryBboxesGL);

  // nodes
  int numObjects = 0;
  m_matrices.resize(csf->numNodes * copies);

  for(int n = 0; n < csf->numNodes; n++)
  {
    CSFNode* csfnode = &csf->nodes[n];

    memcpy(glm::value_ptr(m_matrices[n].objectMatrix), csfnode->objectTM, sizeof(float) * 16);
    memcpy(glm::value_ptr(m_matrices[n].worldMatrix), csfnode->worldTM, sizeof(float) * 16);

    m_matrices[n].objectMatrixIT = glm::transpose(glm::inverse(m_matrices[n].objectMatrix));
    m_matrices[n].worldMatrixIT  = glm::transpose(glm::inverse(m_matrices[n].worldMatrix));

    if(csfnode->geometryIDX < 0)
      continue;

    numObjects++;
  }


  // objects
  m_objects.resize(numObjects * copies);
  m_objectAssigns.resize(numObjects * copies);
  numObjects = 0;
  for(int n = 0; n < csf->numNodes; n++)
  {
    CSFNode* csfnode = &csf->nodes[n];

    if(csfnode->geometryIDX < 0)
      continue;

    Object& object = m_objects[numObjects];

    object.matrixIndex   = n;
    object.geometryIndex = csfnode->geometryIDX;

    m_objectAssigns[numObjects] = glm::ivec2(object.matrixIndex, object.geometryIndex);

    object.parts.resize(csfnode->numParts);
    for(int i = 0; i < csfnode->numParts; i++)
    {
      object.parts[i].active        = 1;
      object.parts[i].matrixIndex   = csfnode->parts[i].nodeIDX < 0 ? object.matrixIndex : csfnode->parts[i].nodeIDX;
      object.parts[i].materialIndex = csfnode->parts[i].materialIDX;
    }

    BBox bbox = m_geometryBboxes[object.geometryIndex].transformed(m_matrices[n].worldMatrix);
    m_bbox.merge(bbox);

    updateObjectDrawCache(object);

    numObjects++;
  }

  // compute clone move delta based on m_bbox;

  glm::vec4 dim = m_bbox.max - m_bbox.min;

  int sq      = 1;
  int numAxis = 0;
  for(int i = 0; i < 3; i++)
  {
    numAxis += (cloneaxis & (1 << i)) ? 1 : 0;
  }

  assert(numAxis);

  switch(numAxis)
  {
    case 1:
      sq = copies;
      break;
    case 2:
      while(sq * sq < copies)
      {
        sq++;
      }
      break;
    case 3:
      while(sq * sq * sq < copies)
      {
        sq++;
      }
      break;
  }


  for(int c = 1; c <= clones; c++)
  {
    int numNodes = csf->numNodes;

    glm::vec4 shift = dim * 1.05f;

    float u = 0;
    float v = 0;
    float w = 0;

    switch(numAxis)
    {
      case 1:
        u = float(c);
        break;
      case 2:
        u = float(c % sq);
        v = float(c / sq);
        break;
      case 3:
        u = float(c % sq);
        v = float((c / sq) % sq);
        w = float(c / (sq * sq));
        break;
    }

    float use = u;

    if(cloneaxis & (1 << 0))
    {
      shift.x *= -use;
      if(numAxis > 1)
        use = v;
    }
    else
    {
      shift.x = 0;
    }

    if(cloneaxis & (1 << 1))
    {
      shift.y *= use;
      if(numAxis > 2)
        use = w;
      else if(numAxis > 1)
        use = v;
    }
    else
    {
      shift.y = 0;
    }

    if(cloneaxis & (1 << 2))
    {
      shift.z *= -use;
    }
    else
    {
      shift.z = 0;
    }

    shift.w = 0;

    // move all world matrices
    for(int n = 0; n < numNodes; n++)
    {
      MatrixNode& node     = m_matrices[n + numNodes * c];
      MatrixNode& nodeOrig = m_matrices[n];
      node                 = nodeOrig;
      node.worldMatrix[3]  = node.worldMatrix[3] + shift;
      node.worldMatrixIT   = glm::transpose(glm::inverse(node.worldMatrix));
    }

    {
      // patch object matrix of root
      MatrixNode& node     = m_matrices[csf->rootIDX + numNodes * c];
      node.objectMatrix[3] = node.objectMatrix[3] + shift;
      node.objectMatrixIT  = glm::transpose(glm::inverse(node.objectMatrix));
    }

    // clone objects
    for(int n = 0; n < numObjects; n++)
    {
      const Object& objectorig = m_objects[n];
      Object&       object     = m_objects[n + numObjects * c];

      object = objectorig;
      object.geometryIndex += c * numGeoms;
      object.matrixIndex += c * numNodes;
      for(size_t i = 0; i < object.parts.size(); i++)
      {
        object.parts[i].matrixIndex += c * numNodes;
      }
      for(size_t i = 0; i < object.cacheSolid.state.size(); i++)
      {
        object.cacheSolid.state[i].matrixIndex += c * numNodes;
      }
      for(size_t i = 0; i < object.cacheWire.state.size(); i++)
      {
        object.cacheWire.state[i].matrixIndex += c * numNodes;
      }

      m_objectAssigns[n + numObjects * c] = glm::ivec2(object.matrixIndex, object.geometryIndex);
    }
  }

  glCreateBuffers(1, &m_matricesGL);
  glNamedBufferStorage(m_matricesGL, sizeof(MatrixNode) * m_matrices.size(), &m_matrices[0], 0);
  //glMapNamedBufferRange(m_matricesGL, 0, sizeof(MatrixNode) * m_matrices.size(), GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT);

  glCreateTextures(GL_TEXTURE_BUFFER, 1, &m_matricesTexGL);
  glTextureBuffer(m_matricesTexGL, GL_RGBA32F, m_matricesGL);

  glCreateBuffers(1, &m_objectAssignsGL);
  glNamedBufferStorage(m_objectAssignsGL, sizeof(glm::ivec2) * m_objectAssigns.size(), &m_objectAssigns[0], 0);

  if(has_GL_NV_vertex_buffer_unified_memory)
  {
    glGetNamedBufferParameterui64vNV(m_materialsGL, GL_BUFFER_GPU_ADDRESS_NV, &m_materialsADDR);
    glMakeNamedBufferResidentNV(m_materialsGL, GL_READ_ONLY);

    glGetNamedBufferParameterui64vNV(m_matricesGL, GL_BUFFER_GPU_ADDRESS_NV, &m_matricesADDR);
    glMakeNamedBufferResidentNV(m_matricesGL, GL_READ_ONLY);

    if(has_GL_ARB_bindless_texture)
    {
      m_matricesTexGLADDR = glGetTextureHandleARB(m_matricesTexGL);
      glMakeTextureHandleResidentARB(m_matricesTexGLADDR);
    }
  }

  m_nodeTree.create(copies * csf->numNodes);
  for(int i = 0; i < copies; i++)
  {
    int cloneoffset = (csf->numNodes) * i;
    int root        = csf->rootIDX + cloneoffset;
    recursiveHierarchy(m_nodeTree, csf, csf->rootIDX, cloneoffset);

    m_nodeTree.setNodeParent((NodeTree::nodeID)root, m_nodeTree.getTreeRoot());
    m_nodeTree.addToTree((NodeTree::nodeID)root);
  }

  glCreateBuffers(1, &m_parentIDsGL);
  glNamedBufferStorage(m_parentIDsGL, m_nodeTree.getTreeCompactNodes().size() * sizeof(GLuint),
                       &m_nodeTree.getTreeCompactNodes()[0], 0);

  glCreateBuffers(1, &m_matricesOrigGL);
  glNamedBufferStorage(m_matricesOrigGL, sizeof(MatrixNode) * m_matrices.size(), &m_matrices[0], 0);
  glCreateTextures(GL_TEXTURE_BUFFER, 1, &m_matricesOrigTexGL);
  glTextureBuffer(m_matricesOrigTexGL, GL_RGBA32F, m_matricesOrigGL);

  CSFileMemory_delete(mem);
  return true;
}


struct ListItem
{
  CadScene::DrawStateInfo state;
  CadScene::DrawRange     range;
};

static bool ListItem_compare(const ListItem& a, const ListItem& b)
{
  int diff = 0;
  diff     = diff != 0 ? diff : (a.state.materialIndex - b.state.materialIndex);
  diff     = diff != 0 ? diff : (a.state.matrixIndex - b.state.matrixIndex);
  diff     = diff != 0 ? diff : int(a.range.offset - b.range.offset);

  return diff < 0;
}

static void fillCache(CadScene::DrawRangeCache& cache, const std::vector<ListItem>& list)
{
  cache = CadScene::DrawRangeCache();

  if(!list.size())
    return;

  CadScene::DrawStateInfo state = list[0].state;
  CadScene::DrawRange     range = list[0].range;

  int stateCount = 0;

  for(size_t i = 1; i < list.size() + 1; i++)
  {
    bool newrange = false;
    if(i == list.size() || list[i].state != state)
    {
      // push range
      stateCount++;
      cache.offsets.push_back(range.offset);
      cache.counts.push_back(range.count);

      // emit
      cache.state.push_back(state);
      cache.stateCount.push_back(stateCount);

      stateCount = 0;

      if(i == list.size())
      {
        break;
      }
      else
      {
        state        = list[i].state;
        range.offset = list[i].range.offset;
        range.count  = 0;
        newrange     = true;
      }
    }

    const CadScene::DrawRange& currange = list[i].range;
    if(newrange || (USE_CACHECOMBINE && currange.offset == (range.offset + sizeof(GLuint) * range.count)))
    {
      // merge
      range.count += currange.count;
    }
    else
    {
      // push
      stateCount++;
      cache.offsets.push_back(range.offset);
      cache.counts.push_back(range.count);

      range = currange;
    }
  }
}

void CadScene::updateObjectDrawCache(Object& object)
{
  Geometry& geom = m_geometry[object.geometryIndex];

  std::vector<ListItem> listSolid;
  std::vector<ListItem> listWire;

  listSolid.reserve(geom.parts.size());
  listWire.reserve(geom.parts.size());

  for(size_t i = 0; i < geom.parts.size(); i++)
  {
    if(!object.parts[i].active)
      continue;

    ListItem item;
    item.state.materialIndex = object.parts[i].materialIndex;

    item.range             = geom.parts[i].indexSolid;
    item.state.matrixIndex = object.parts[i].matrixIndex;
    listSolid.push_back(item);

    item.range             = geom.parts[i].indexWire;
    item.state.matrixIndex = object.parts[i].matrixIndex;
    listWire.push_back(item);
  }

  std::sort(listSolid.begin(), listSolid.end(), ListItem_compare);
  std::sort(listWire.begin(), listWire.end(), ListItem_compare);

  fillCache(object.cacheSolid, listSolid);
  fillCache(object.cacheWire, listWire);
}

void CadScene::enableVertexFormat(int attrPos, int attrNormal)
{
  glVertexAttribFormat(attrPos, 3, GL_FLOAT, GL_FALSE, 0);
  glVertexAttribFormat(attrNormal, 3, GL_FLOAT, GL_FALSE, offsetof(CadScene::Vertex, normal));
  glVertexAttribBinding(attrPos, 0);
  glVertexAttribBinding(attrNormal, 0);
  glEnableVertexAttribArray(attrPos);
  glEnableVertexAttribArray(attrNormal);
  glBindVertexBuffer(0, 0, 0, sizeof(CadScene::Vertex));
}

void CadScene::disableVertexFormat(int attrPos, int attrNormal)
{
  glDisableVertexAttribArray(attrPos);
  glDisableVertexAttribArray(attrNormal);
  glBindVertexBuffer(0, 0, 0, sizeof(CadScene::Vertex));
}

void CadScene::unload()
{
  if(m_geometry.empty())
    return;

  glFinish();

  if(has_GL_NV_vertex_buffer_unified_memory)
  {
    if(has_GL_ARB_bindless_texture)
    {
      glMakeTextureHandleNonResidentARB(m_matricesTexGLADDR);
    }

    glMakeNamedBufferNonResidentNV(m_matricesGL);
    glMakeNamedBufferNonResidentNV(m_materialsGL);
  }

  glDeleteTextures(1, &m_matricesOrigTexGL);
  glDeleteTextures(1, &m_matricesTexGL);
  glDeleteTextures(1, &m_geometryBboxesTexGL);

  glDeleteBuffers(1, &m_matricesOrigGL);
  glDeleteBuffers(1, &m_matricesGL);
  glDeleteBuffers(1, &m_materialsGL);
  glDeleteBuffers(1, &m_objectAssignsGL);
  glDeleteBuffers(1, &m_geometryBboxesGL);
  glDeleteBuffers(1, &m_parentIDsGL);


  for(size_t i = 0; i < m_geometry.size(); i++)
  {
    if(m_geometry[i].cloneIdx >= 0)
      continue;

    if(has_GL_NV_vertex_buffer_unified_memory)
    {
      glMakeNamedBufferNonResidentNV(m_geometry[i].iboGL);
      glMakeNamedBufferNonResidentNV(m_geometry[i].vboGL);
    }
    glDeleteBuffers(1, &m_geometry[i].iboGL);
    glDeleteBuffers(1, &m_geometry[i].vboGL);
  }

  m_matrices.clear();
  m_geometryBboxes.clear();
  m_geometry.clear();
  m_objectAssigns.clear();
  m_objects.clear();
  m_geometryBboxes.clear();
  m_nodeTree.clear();

  glFinish();
}

void CadScene::resetMatrices()
{
  glCopyNamedBufferSubData(m_matricesOrigGL, m_matricesGL, 0, 0, sizeof(CadScene::MatrixNode) * m_matrices.size());
}


================================================
FILE: cadscene.hpp
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */


#ifndef CADSCENE_H__
#define CADSCENE_H__

#include <cstring> // memset
#include <nvgl/extensions_gl.hpp>
#include <glm/glm.hpp>
#include <vector>
#include "nodetree.hpp"

class CadScene {

public:

  struct BBox {
    glm::vec4    min;
    glm::vec4    max;

    BBox() : min(FLT_MAX), max(-FLT_MAX) {}

    inline void merge( const glm::vec4& point )
    {
      min = glm::min(min, point);
      max = glm::max(max, point);
    }

    inline void merge( const BBox& bbox )
    {
      min = glm::min(min, bbox.min);
      max = glm::max(max, bbox.max);
    }

    inline BBox transformed ( const glm::mat4 &matrix, int dim=3)
    {
      int i;
      glm::vec4 box[16];
      // create box corners
      box[0] = glm::vec4(min.x,min.y,min.z,min.w);
      box[1] = glm::vec4(max.x,min.y,min.z,min.w);
      box[2] = glm::vec4(min.x,max.y,min.z,min.w);
      box[3] = glm::vec4(max.x,max.y,min.z,min.w);
      box[4] = glm::vec4(min.x,min.y,max.z,min.w);
      box[5] = glm::vec4(max.x,min.y,max.z,min.w);
      box[6] = glm::vec4(min.x,max.y,max.z,min.w);
      box[7] = glm::vec4(max.x,max.y,max.z,min.w);

      box[8] = glm::vec4(min.x,min.y,min.z,max.w);
      box[9] = glm::vec4(max.x,min.y,min.z,max.w);
      box[10] = glm::vec4(min.x,max.y,min.z,max.w);
      box[11] = glm::vec4(max.x,max.y,min.z,max.w);
      box[12] = glm::vec4(min.x,min.y,max.z,max.w);
      box[13] = glm::vec4(max.x,min.y,max.z,max.w);
      box[14] = glm::vec4(min.x,max.y,max.z,max.w);
      box[15] = glm::vec4(max.x,max.y,max.z,max.w);

      // transform box corners
      // and find new mins,maxs
      BBox bbox;

      for (i = 0; i < (1<<dim) ; i++){
        glm::vec4 point = matrix * box[i];
        bbox.merge(point);
      }

      return bbox;
    }
  };

  struct MaterialSide {
    glm::vec4 ambient;
    glm::vec4 diffuse;
    glm::vec4 specular;
    glm::vec4 emissive;
  };

  // need to keep this 256 byte aligned (UBO range)
  struct Material {
    MaterialSide  sides[2];
    GLuint64      texturesADDR[4];
    GLuint        textures[4];
    GLuint        _pad[4+16];

    Material() {
      memset(this,0,sizeof(Material));
    }
  };

  // need to keep this 256 byte aligned (UBO range)
  struct MatrixNode {
    glm::mat4  worldMatrix;
    glm::mat4  worldMatrixIT;
    glm::mat4  objectMatrix;
    glm::mat4  objectMatrixIT;
  };

  struct Vertex {
    glm::vec4 position;
    glm::vec4 normal;
  };

  struct DrawRange {
    size_t        offset;
    int           count;

    DrawRange() : offset(0) , count(0) {}
  };

  struct DrawStateInfo {
    int           materialIndex;
    int           matrixIndex;

    friend bool operator != ( const DrawStateInfo &lhs,  const DrawStateInfo &rhs){
      return lhs.materialIndex != rhs.materialIndex || lhs.matrixIndex != rhs.matrixIndex;
    }

    friend bool operator == ( const DrawStateInfo &lhs,  const DrawStateInfo &rhs){
      return lhs.materialIndex == rhs.materialIndex && lhs.matrixIndex == rhs.matrixIndex;
    }
  };

  struct DrawRangeCache {
    std::vector<DrawStateInfo>    state;
    std::vector<int>          stateCount;

    std::vector<size_t>       offsets;
    std::vector<int>          counts;
  };

  struct GeometryPart {
    DrawRange     indexSolid;
    DrawRange     indexWire;
  };

  struct Geometry {
    GLuint    vboGL;
    GLuint    iboGL;
    GLuint64  vboADDR;
    GLuint64  iboADDR;
    size_t    vboSize;
    size_t    iboSize;

    std::vector<GeometryPart> parts;

    int       numVertices;
    int       numIndexSolid;
    int       numIndexWire;
    
    int       cloneIdx;
  };

  struct ObjectPart {
    int   active;
    int   materialIndex;
    int   matrixIndex;
  };

  struct Object {
    int             matrixIndex;
    int             geometryIndex;

    std::vector<ObjectPart> parts;

    DrawRangeCache  cacheSolid;
    DrawRangeCache  cacheWire;
  };

  std::vector<Material>       m_materials;
  std::vector<BBox>           m_geometryBboxes;
  std::vector<Geometry>       m_geometry;
  std::vector<MatrixNode>     m_matrices;
  std::vector<Object>         m_objects;
  std::vector<glm::ivec2>  m_objectAssigns;


  BBox      m_bbox;

  GLuint    m_materialsGL;
  GLuint64  m_materialsADDR;
  GLuint    m_matricesGL;
  GLuint64  m_matricesADDR;
  GLuint    m_matricesTexGL;
  GLuint64  m_matricesTexGLADDR;
  GLuint    m_geometryBboxesGL;
  GLuint    m_geometryBboxesTexGL;
  GLuint    m_objectAssignsGL;

  GLuint    m_parentIDsGL;

  GLuint    m_matricesOrigGL;
  GLuint    m_matricesOrigTexGL;

  NodeTree  m_nodeTree;

  void  updateObjectDrawCache(Object& object);
  
  bool  loadCSF(const char* filename, int clones = 0, int cloneaxis=3);
  void  unload();

  static void enableVertexFormat(int attrPos, int attrNormal);
  static void disableVertexFormat(int attrPos, int attrNormal);
  void resetMatrices();
};


#endif



================================================
FILE: common.h
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */




#define VERTEX_POS      0
#define VERTEX_NORMAL   1
#define VERTEX_ASSIGNS  2
#define VERTEX_WIREMODE 3

#define UBO_SCENE     0
#define UBO_MATRIX    1
#define UBO_MATERIAL  2

#define TEX_MATRICES  0

#define USE_BASEINSTANCE  0

//#define UNI_WIREFRAME 0


#ifdef __cplusplus
namespace csfviewer
{
  using namespace glm;
#endif

struct SceneData {
  mat4  viewProjMatrix;
  mat4  viewMatrix;
  mat4  viewMatrixIT;

  vec4  viewPos;
  vec4  viewDir;
  
  vec4  wLightPos;
  
  ivec2 viewport;
  uvec2 tboMatrices;
};

#ifdef __cplusplus
}
#endif


#if defined(GL_core_profile) || defined(GL_compatibility_profile) || defined(GL_es_profile)

#extension GL_NV_command_list : enable
#if GL_NV_command_list
layout(commandBindableNV) uniform;
#endif

// prevent this to be used by c++

layout(std140,binding=UBO_SCENE) uniform sceneBuffer {
  SceneData   scene;
};

// must match cadscene!
layout(std140,binding=UBO_MATRIX) uniform matrixBuffer {
  mat4 worldMatrix;
  mat4 worldMatrixIT;
  mat4 objectMatrix;
  mat4 objectMatrixIT;
} object;

#extension GL_ARB_bindless_texture : enable
#extension GL_NV_bindless_texture : enable
#if GL_NV_bindless_texture
#define matricesBuffer  samplerBuffer(scene.tboMatrices)
#else
layout(binding=TEX_MATRICES) uniform samplerBuffer matricesBuffer;
#endif
// must match cadscene!
#define NODE_MATRIX_WORLD     0
#define NODE_MATRIX_WORLDIT   1
#define NODE_MATRIX_OBJECT    2
#define NODE_MATRIX_OBJECTIT  3
#define NODE_MATRICES         4

mat4 getIndexedMatrix(int idx, int what)
{
  int i = idx * NODE_MATRICES + what;
  return mat4(  texelFetch(matricesBuffer, i*4 + 0),
                texelFetch(matricesBuffer, i*4 + 1),
                texelFetch(matricesBuffer, i*4 + 2),
                texelFetch(matricesBuffer, i*4 + 3));
}

#endif

================================================
FILE: csf.cpp
================================================
/*
 * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */


#define CSF_IMPLEMENTATION
#define CSF_SUPPORT_GLTF2       1
#define CSF_SUPPORT_FILEMAPPING 1

#include <fileformats/cadscenefile.h>

#define CGLTF_IMPLEMENTATION
#include <cgltf.h>





================================================
FILE: csfviewer.cpp
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */

/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */

#define DEBUG_FILTER 1

#include <nvgl/extensions_gl.hpp>

#include <imgui/backends/imgui_impl_gl.h>
#include <imgui/imgui_helper.h>

#include <nvgl/glsltypes_gl.hpp>

#include <nvh/cameracontrol.hpp>
#include <nvh/fileoperations.hpp>
#include <nvh/geometry.hpp>
#include <nvh/misc.hpp>

#include <nvgl/appwindowprofiler_gl.hpp>
#include <nvgl/base_gl.hpp>
#include <nvgl/error_gl.hpp>
#include <nvgl/programmanager_gl.hpp>

#include "transformsystem.hpp"

#include "cadscene.hpp"
#include "renderer.hpp"

#include <algorithm>

#include "common.h"
#include "glm/gtc/matrix_access.hpp"
#include "glm/gtc/type_ptr.hpp"


namespace csfviewer {
int const SAMPLE_SIZE_WIDTH(800);
int const SAMPLE_SIZE_HEIGHT(600);
int const SAMPLE_MAJOR_VERSION(4);
int const SAMPLE_MINOR_VERSION(5);


class Sample : public nvgl::AppWindowProfilerGL
{
public:
  enum GuiEnums
  {
    GUI_RENDERER,
    GUI_MSAA,
    GUI_SHADE,
    GUI_STRATEGY,
  };

  struct
  {
    nvgl::ProgramID draw_object, draw_object_tris, draw_object_line, draw_object_indexed, draw_object_indexed_tris,
        draw_object_indexed_line,

        cull_object_frustum, cull_object_hiz, cull_object_raster, cull_bit_temporallast, cull_bit_temporalnew,
        cull_bit_regular, cull_depth_mips,

        scan_prefixsum, scan_offsets, scan_combine,

        transform_leaves, transform_level,

        xplode;

  } programs;

  struct
  {
    GLuint scene  = 0;
    GLuint scene2 = 0;
  } fbos;

  struct
  {
    GLuint scene_ubo = 0;
  } buffers;

  struct
  {
    GLuint64 scene_ubo;
  } addresses;

  struct
  {
    GLuint scene_color         = 0;
    GLuint scene_color2        = 0;
    GLuint scene_depthstencil  = 0;
    GLuint scene_depthstencil2 = 0;
  } textures;

  struct Tweak
  {
    int       renderer      = 0;
    ShadeType shade         = SHADE_SOLID;
    Strategy  strategy      = STRATEGY_GROUPS;
    int       clones        = 0;
    bool      cloneaxisX    = true;
    bool      cloneaxisY    = true;
    bool      cloneaxisZ    = false;
    bool      animateActive = false;
    float     animateMin    = 1;
    float     animateDelta  = 1;
    int       zoom          = 100;
    int       msaa          = 0;
    bool      noUI          = false;
  };

  nvgl::ProgramManager m_progManager;

  ImGuiH::Registry m_ui;
  double           m_uiTime = 0;

  Tweak m_tweak;
  Tweak m_lastTweak;

  std::string m_modelFilename;

  SceneData       m_sceneUbo;
  CadScene        m_scene;
  TransformSystem m_transformSystem;

  GLuint m_xplodeGroupSize;

  std::vector<unsigned int> m_renderersSorted;
  std::string               m_rendererName;

  Renderer* NV_RESTRICT m_renderer;
  Resources             m_resources;

  size_t m_stateChangeID;


  void updateProgramDefine();
  bool initProgram();
  bool initScene(const char* filename, int clones, int cloneaxis);
  bool initFramebuffers(int width, int height);
  void initRenderer(int type, Strategy strategy);
  void deinitRenderer();

  void getCullPrograms(CullingSystem::Programs& cullprograms);
  void getScanPrograms(ScanSystem::Programs& scanprograms);
  void getTransformPrograms(TransformSystem::Programs& xfromPrograms);

  void updatedPrograms();

  void setupConfigParameters();
  void setRendererFromName();


public:
  Sample() { setupConfigParameters(); }

  bool validateConfig() override;

  bool begin() override;
  void think(double time) override;
  void resize(int width, int height) override;

  void processUI(double time);

  nvh::CameraControl m_control;

  void end() override { ImGui::ShutdownGL(); }
  // return true to prevent m_windowState updates
  bool mouse_pos(int x, int y) override
  {
    if(m_tweak.noUI)
      return false;
    return ImGuiH::mouse_pos(x, y);
  }
  bool mouse_button(int button, int action) override
  {
    if(m_tweak.noUI)
      return false;
    return ImGuiH::mouse_button(button, action);
  }
  bool mouse_wheel(int wheel) override
  {
    if(m_tweak.noUI)
      return false;
    return ImGuiH::mouse_wheel(wheel);
  }
  bool key_char(int button) override
  {
    if(m_tweak.noUI)
      return false;
    return ImGuiH::key_char(button);
  }
  bool key_button(int button, int action, int mods) override
  {
    if(m_tweak.noUI)
      return false;
    return ImGuiH::key_button(button, action, mods);
  }
};

void Sample::updateProgramDefine() {}

void Sample::getTransformPrograms(TransformSystem::Programs& xformPrograms)
{
  xformPrograms.transform_leaves = m_progManager.get(programs.transform_leaves);
  xformPrograms.transform_level  = m_progManager.get(programs.transform_level);
}

void Sample::getCullPrograms(CullingSystem::Programs& cullprograms)
{
  cullprograms.bit_regular      = m_progManager.get(programs.cull_bit_regular);
  cullprograms.bit_temporallast = m_progManager.get(programs.cull_bit_temporallast);
  cullprograms.bit_temporalnew  = m_progManager.get(programs.cull_bit_temporalnew);
  cullprograms.depth_mips       = m_progManager.get(programs.cull_depth_mips);
  cullprograms.object_frustum   = m_progManager.get(programs.cull_object_frustum);
  cullprograms.object_hiz       = m_progManager.get(programs.cull_object_hiz);
  cullprograms.object_raster    = m_progManager.get(programs.cull_object_raster);
}

void Sample::getScanPrograms(ScanSystem::Programs& scanprograms)
{
  scanprograms.prefixsum = m_progManager.get(programs.scan_prefixsum);
  scanprograms.offsets   = m_progManager.get(programs.scan_offsets);
  scanprograms.combine   = m_progManager.get(programs.scan_combine);
}

bool Sample::initProgram()
{
  bool validated(true);
  m_progManager.m_filetype = nvh::ShaderFileManager::FILETYPE_GLSL;
  m_progManager.addDirectory(std::string("GLSL_" PROJECT_NAME));
  m_progManager.addDirectory(exePath() + std::string(PROJECT_RELDIRECTORY));

  m_progManager.registerInclude("common.h");

  updateProgramDefine();

  programs.draw_object =
      m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "scene.vert.glsl"),
                                  nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "scene.frag.glsl"));

  programs.draw_object_tris = m_progManager.createProgram(
      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define WIREMODE 0\n", "scene.vert.glsl"),
      nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "#define WIREMODE 0\n", "scene.frag.glsl"));

  programs.draw_object_line = m_progManager.createProgram(
      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define WIREMODE 1\n", "scene.vert.glsl"),
      nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "#define WIREMODE 1\n", "scene.frag.glsl"));

  programs.draw_object_indexed = m_progManager.createProgram(
      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define USE_INDEXING 1\n", "scene.vert.glsl"),
      nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "#define USE_INDEXING 1\n", "scene.frag.glsl"));

  programs.draw_object_indexed_tris = m_progManager.createProgram(
      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define USE_INDEXING 1\n#define WIREMODE 0\n",
                                       "scene.vert.glsl"),
      nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "#define USE_INDEXING 1\n#define WIREMODE 0\n",
                                       "scene.frag.glsl"));

  programs.draw_object_indexed_line = m_progManager.createProgram(
      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define USE_INDEXING 1\n#define WIREMODE 1\n",
                                       "scene.vert.glsl"),
      nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "#define USE_INDEXING 1\n#define WIREMODE 1\n",
                                       "scene.frag.glsl"));


  programs.cull_object_raster = m_progManager.createProgram(
      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define DUALINDEX 1\n#define MATRICES 4\n",
                                       "cull-raster.vert.glsl"),
      nvgl::ProgramManager::Definition(GL_GEOMETRY_SHADER, "#define DUALINDEX 1\n#define MATRICES 4\n",
                                       "cull-raster.geo.glsl"),
      nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "#define DUALINDEX 1\n#define MATRICES 4\n",
                                       "cull-raster.frag.glsl"));

  programs.cull_object_frustum = m_progManager.createProgram(nvgl::ProgramManager::Definition(
      GL_VERTEX_SHADER, "#define DUALINDEX 1\n#define MATRICES 4\n", "cull-xfb.vert.glsl"));

  programs.cull_object_hiz = m_progManager.createProgram(nvgl::ProgramManager::Definition(
      GL_VERTEX_SHADER, "#define DUALINDEX 1\n#define MATRICES 4\n#define OCCLUSION\n", "cull-xfb.vert.glsl"));

  programs.cull_bit_regular = m_progManager.createProgram(
      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define TEMPORAL 0\n", "cull-bitpack.vert.glsl"));
  programs.cull_bit_temporallast = m_progManager.createProgram(
      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define TEMPORAL TEMPORAL_LAST\n", "cull-bitpack.vert.glsl"));
  programs.cull_bit_temporalnew = m_progManager.createProgram(
      nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define TEMPORAL TEMPORAL_NEW\n", "cull-bitpack.vert.glsl"));

  programs.cull_depth_mips =
      m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "cull-downsample.vert.glsl"),
                                  nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "cull-downsample.frag.glsl"));

  programs.scan_prefixsum = m_progManager.createProgram(
      nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, "#define TASK TASK_SUM\n", "scan.comp.glsl"));
  programs.scan_offsets = m_progManager.createProgram(
      nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, "#define TASK TASK_OFFSETS\n", "scan.comp.glsl"));
  programs.scan_combine = m_progManager.createProgram(
      nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, "#define TASK TASK_COMBINE\n", "scan.comp.glsl"));

  programs.transform_leaves =
      m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, "transform-leaves.comp.glsl"));
  programs.transform_level =
      m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, "transform-level.comp.glsl"));

  programs.xplode =
      m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, "xplode-animation.comp.glsl"));

  validated = m_progManager.areProgramsValid();

  return validated;
}

bool Sample::initScene(const char* filename, int clones, int cloneaxis)
{
  m_scene.unload();

  if(buffers.scene_ubo && has_GL_NV_shader_buffer_load)
  {
    glMakeNamedBufferNonResidentNV(buffers.scene_ubo);
  }

  nvgl::newBuffer(buffers.scene_ubo);
  glNamedBufferStorage(buffers.scene_ubo, sizeof(SceneData), NULL, GL_DYNAMIC_STORAGE_BIT);

  if(has_GL_NV_shader_buffer_load)
  {
    glGetNamedBufferParameterui64vNV(buffers.scene_ubo, GL_BUFFER_GPU_ADDRESS_NV, &addresses.scene_ubo);
    glMakeNamedBufferResidentNV(buffers.scene_ubo, GL_READ_ONLY);
  }

  m_resources.sceneUbo  = buffers.scene_ubo;
  m_resources.sceneAddr = addresses.scene_ubo;

  m_resources.stateChangeID++;

  bool status = m_scene.loadCSF(filename, clones, cloneaxis);

  LOGI("\nscene %s\n", filename);
  LOGI("geometries: %6d\n", (uint32_t)m_scene.m_geometry.size());
  LOGI("materials:  %6d\n", (uint32_t)m_scene.m_materials.size());
  LOGI("nodes:      %6d\n", (uint32_t)m_scene.m_matrices.size());
  LOGI("objects:    %6d\n", (uint32_t)m_scene.m_objects.size());
  LOGI("\n");

  return status;
}

bool Sample::initFramebuffers(int width, int height)
{
  bool layered = true;

  if(!fbos.scene || m_tweak.msaa != m_lastTweak.msaa)
  {
    nvgl::newFramebuffer(fbos.scene);
    nvgl::newFramebuffer(fbos.scene2);

    m_resources.fbo  = fbos.scene;
    m_resources.fbo2 = fbos.scene2;

    m_resources.stateChangeID++;
  }

  if(layered)
  {

    if(has_GL_NV_bindless_texture && textures.scene_color)
    {
      glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_color));
      glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_depthstencil));
    }

    nvgl::newTexture(textures.scene_color, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY);
    nvgl::newTexture(textures.scene_depthstencil, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY);

    if(m_tweak.msaa)
    {
      glTextureStorage3DMultisample(textures.scene_color, m_tweak.msaa, GL_RGBA8, width, height, 2, GL_TRUE);
      glTextureStorage3DMultisample(textures.scene_depthstencil, m_tweak.msaa, GL_DEPTH24_STENCIL8, width, height, 2, GL_TRUE);
    }
    else
    {
      glTextureStorage3D(textures.scene_color, 1, GL_RGBA8, width, height, 2);
      glTextureStorage3D(textures.scene_depthstencil, 1, GL_DEPTH24_STENCIL8, width, height, 2);
    }

    glNamedFramebufferTextureLayer(fbos.scene, GL_COLOR_ATTACHMENT0, textures.scene_color, 0, 0);
    glNamedFramebufferTextureLayer(fbos.scene, GL_DEPTH_STENCIL_ATTACHMENT, textures.scene_depthstencil, 0, 0);

    glNamedFramebufferTextureLayer(fbos.scene2, GL_COLOR_ATTACHMENT0, textures.scene_color, 0, 1);
    glNamedFramebufferTextureLayer(fbos.scene2, GL_DEPTH_STENCIL_ATTACHMENT, textures.scene_depthstencil, 0, 1);

    if(has_GL_NV_bindless_texture)
    {
      glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_color));
      glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_depthstencil));
    }
  }
  else
  {

    if(has_GL_NV_bindless_texture && textures.scene_color)
    {
      glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_color));
      glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_depthstencil));
      glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_color2));
      glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_depthstencil2));
    }

    nvgl::newTexture(textures.scene_color, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D);
    nvgl::newTexture(textures.scene_depthstencil, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D);

    if(m_tweak.msaa)
    {
      glTextureStorage2DMultisample(textures.scene_color, 1, GL_RGBA8, width, height, GL_TRUE);
      glTextureStorage2DMultisample(textures.scene_depthstencil, 1, GL_DEPTH24_STENCIL8, width, height, GL_TRUE);
    }
    else
    {
      glTextureStorage2D(textures.scene_color, 1, GL_RGBA8, width, height);
      glTextureStorage2D(textures.scene_depthstencil, 1, GL_DEPTH24_STENCIL8, width, height);
    }

    glNamedFramebufferTexture(fbos.scene, GL_COLOR_ATTACHMENT0, textures.scene_color, 0);
    glNamedFramebufferTexture(fbos.scene, GL_DEPTH_STENCIL_ATTACHMENT, textures.scene_depthstencil, 0);

    nvgl::newTexture(textures.scene_color2, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D);
    nvgl::newTexture(textures.scene_depthstencil2, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D);

    if(m_tweak.msaa)
    {
      glTextureStorage2DMultisample(textures.scene_color2, 1, GL_RGBA8, width, height, GL_TRUE);
      glTextureStorage2DMultisample(textures.scene_depthstencil2, 1, GL_DEPTH24_STENCIL8, width, height, GL_TRUE);
    }
    else
    {
      glTextureStorage2D(textures.scene_color2, 1, GL_RGBA8, width, height);
      glTextureStorage2D(textures.scene_depthstencil2, 1, GL_DEPTH24_STENCIL8, width, height);
    }

    glNamedFramebufferTexture(fbos.scene2, GL_COLOR_ATTACHMENT0, textures.scene_color2, 0);
    glNamedFramebufferTexture(fbos.scene2, GL_DEPTH_STENCIL_ATTACHMENT, textures.scene_depthstencil2, 0);

    if(has_GL_NV_bindless_texture)
    {
      glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_color));
      glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_depthstencil));
      glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_color2));
      glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_depthstencil2));
    }
  }

  m_resources.fboTextureChangeID++;

  return true;
}

void Sample::deinitRenderer()
{
  if(m_renderer)
  {
    m_renderer->deinit();
    delete m_renderer;
    m_renderer = NULL;
  }
}

void Sample::initRenderer(int type, Strategy strategy)
{
  deinitRenderer();
  Renderer::getRegistry()[m_renderersSorted[type]]->updatedPrograms(m_progManager);
  m_renderer             = Renderer::getRegistry()[m_renderersSorted[type]]->create();
  m_renderer->m_strategy = strategy;
  m_renderer->init(&m_scene, m_resources);
}

bool Sample::begin()
{
  m_renderer      = NULL;
  m_stateChangeID = 0;

  ImGuiH::Init(m_windowState.m_winSize[0], m_windowState.m_winSize[1], this);
  ImGui::InitGL();

  glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
  glEnable(GL_CULL_FACE);
  glEnable(GL_DEPTH_TEST);

#if defined(NDEBUG)
  setVsync(false);
#endif

  Renderer::s_bindless_ubo = !!m_contextWindow.extensionSupported("GL_NV_uniform_buffer_unified_memory");
  LOGI("\nNV_uniform_buffer_unified_memory support: %s\n\n", Renderer::s_bindless_ubo ? "true" : "false");

  bool validated(true);

  GLuint defaultVAO;
  glGenVertexArrays(1, &defaultVAO);
  glBindVertexArray(defaultVAO);

  validated = validated && initProgram();
  validated = validated && initScene(m_modelFilename.c_str(), 0, 3);
  validated = validated && initFramebuffers(m_windowState.m_winSize[0], m_windowState.m_winSize[1]);


  const Renderer::Registry registry = Renderer::getRegistry();
  for(size_t i = 0; i < registry.size(); i++)
  {
    if(registry[i]->isAvailable())
    {
      if(!registry[i]->loadPrograms(m_progManager))
      {
        LOGE("Failed to load resources for renderer %s\n", registry[i]->name());
        return false;
      }

      uint sortkey = uint(i);
      sortkey |= registry[i]->priority() << 16;
      m_renderersSorted.push_back(sortkey);
    }
  }

  std::sort(m_renderersSorted.begin(), m_renderersSorted.end());

  for(size_t i = 0; i < m_renderersSorted.size(); i++)
  {
    m_renderersSorted[i] &= 0xFFFF;

    m_ui.enumAdd(GUI_RENDERER, int(i), registry[m_renderersSorted[i]]->name());
  }

  {
    m_ui.enumAdd(GUI_STRATEGY, STRATEGY_INDIVIDUAL, "drawcall individual");
    m_ui.enumAdd(GUI_STRATEGY, STRATEGY_JOIN, "drawcall join");
    m_ui.enumAdd(GUI_STRATEGY, STRATEGY_GROUPS, "material groups");

    m_ui.enumAdd(GUI_SHADE, SHADE_SOLID, toString(SHADE_SOLID));
    m_ui.enumAdd(GUI_SHADE, SHADE_SOLIDWIRE, toString(SHADE_SOLIDWIRE));
    m_ui.enumAdd(GUI_SHADE, SHADE_SOLIDWIRE_SPLIT, "solid w edges (split test, only in sorted)");

    m_ui.enumAdd(GUI_MSAA, 0, "none");
    m_ui.enumAdd(GUI_MSAA, 2, "2x");
    m_ui.enumAdd(GUI_MSAA, 4, "4x");
    m_ui.enumAdd(GUI_MSAA, 8, "8x");
  }


  m_control.m_sceneOrbit     = glm::vec3(m_scene.m_bbox.max + m_scene.m_bbox.min) * 0.5f;
  m_control.m_sceneDimension = glm::length((m_scene.m_bbox.max - m_scene.m_bbox.min));
  m_control.m_viewMatrix =
      glm::lookAt(m_control.m_sceneOrbit - (-vec3(1, 1, 1) * m_control.m_sceneDimension * 0.5f * (float(m_tweak.zoom) / 100.0f)),
                      m_control.m_sceneOrbit, vec3(0, 1, 0));

  m_sceneUbo.wLightPos   = (m_scene.m_bbox.max + m_scene.m_bbox.min) * 0.5f + m_control.m_sceneDimension;
  m_sceneUbo.wLightPos.w = 1.0;

  updatedPrograms();

  CullingSystem::Programs cullprogs;
  getCullPrograms(cullprogs);
  Renderer::s_cullsys.init(cullprogs, true);

  ScanSystem::Programs scanprogs;
  getScanPrograms(scanprogs);
  Renderer::s_scansys.init(scanprogs);
  //Renderer::s_scansys.test();

  TransformSystem::Programs xformprogs;
  getTransformPrograms(xformprogs);
  m_transformSystem.init(xformprogs);


  initRenderer(m_tweak.renderer, m_tweak.strategy);

  return validated;
}

void Sample::processUI(double time)
{
  int width  = m_windowState.m_winSize[0];
  int height = m_windowState.m_winSize[1];

  // Update imgui configuration
  auto& imgui_io       = ImGui::GetIO();
  imgui_io.DeltaTime   = static_cast<float>(time - m_uiTime);
  imgui_io.DisplaySize = ImVec2(static_cast<float>(width), static_cast<float>(height));

  m_uiTime = time;

  ImGui::NewFrame();
  ImGui::SetNextWindowSize(ImGuiH::dpiScaled(350, 0), ImGuiCond_FirstUseEver);
  if(ImGui::Begin("NVIDIA " PROJECT_NAME, nullptr))
  {
    m_ui.enumCombobox(GUI_RENDERER, "renderer", &m_tweak.renderer);
    m_ui.enumCombobox(GUI_STRATEGY, "strategy", &m_tweak.strategy);
    m_ui.enumCombobox(GUI_SHADE, "shademode", &m_tweak.shade);
    ImGui::Checkbox("xplode via GPU", &m_tweak.animateActive);
    ImGui::SliderFloat("xplode min", &m_tweak.animateMin, 0, 16.0f);
    ImGui::SliderFloat("xplode delta", &m_tweak.animateDelta, 0, 16.0f);
    ImGuiH::InputIntClamped("clones", &m_tweak.clones, 0, 255, 1, 10, ImGuiInputTextFlags_EnterReturnsTrue);
    ImGui::Checkbox("clone X", &m_tweak.cloneaxisX);
    ImGui::Checkbox("clone Y", &m_tweak.cloneaxisY);
    ImGui::Checkbox("clone Z", &m_tweak.cloneaxisZ);
    m_ui.enumCombobox(GUI_MSAA, "msaa", &m_tweak.msaa);
  }
  if(!m_tweak.cloneaxisX && !m_tweak.cloneaxisY && !m_tweak.cloneaxisZ)
  {
    m_tweak.cloneaxisX = true;
  }

  ImGui::End();
}

void Sample::updatedPrograms()
{

  CullingSystem::Programs cullprogs;
  getCullPrograms(cullprogs);
  Renderer::s_cullsys.update(cullprogs, true);

  ScanSystem::Programs scanprogs;
  getScanPrograms(scanprogs);
  Renderer::s_scansys.update(scanprogs);

  TransformSystem::Programs xformprogs;
  getTransformPrograms(xformprogs);
  m_transformSystem.update(xformprogs);

  m_resources.programUbo     = m_progManager.get(programs.draw_object);
  m_resources.programUboLine = m_progManager.get(programs.draw_object_line);
  m_resources.programUboTris = m_progManager.get(programs.draw_object_tris);
  m_resources.programIdx     = m_progManager.get(programs.draw_object_indexed);
  m_resources.programIdxLine = m_progManager.get(programs.draw_object_indexed_line);
  m_resources.programIdxTris = m_progManager.get(programs.draw_object_indexed_tris);

  GLuint groupsizes[3];
  glGetProgramiv(m_progManager.get(programs.xplode), GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupsizes);
  m_xplodeGroupSize = groupsizes[0];

  m_resources.stateChangeID++;
}

void Sample::think(double time)
{
  NV_PROFILE_GL_SECTION("Frame");

  processUI(time);

  m_control.processActions({m_windowState.m_winSize[0], m_windowState.m_winSize[1]},
                           glm::vec2(m_windowState.m_mouseCurrent[0], m_windowState.m_mouseCurrent[1]),
                           m_windowState.m_mouseButtonFlags, m_windowState.m_mouseWheel);

  if(m_windowState.onPress(KEY_R))
  {
    m_progManager.reloadPrograms();
    Renderer::getRegistry()[m_tweak.renderer]->updatedPrograms(m_progManager);
    updatedPrograms();
  }

  if(m_tweak.msaa != m_lastTweak.msaa)
  {
    initFramebuffers(m_windowState.m_winSize[0], m_windowState.m_winSize[1]);
  }

  if(m_tweak.clones != m_lastTweak.clones || m_tweak.cloneaxisX != m_lastTweak.cloneaxisX
     || m_tweak.cloneaxisY != m_lastTweak.cloneaxisY || m_tweak.cloneaxisZ != m_lastTweak.cloneaxisZ)
  {
    deinitRenderer();
    initScene(m_modelFilename.c_str(), m_tweak.clones,
              (int(m_tweak.cloneaxisX) << 0) | (int(m_tweak.cloneaxisY) << 1) | (int(m_tweak.cloneaxisZ) << 2));
  }

  if(m_tweak.renderer != m_lastTweak.renderer || m_tweak.strategy != m_lastTweak.strategy
     || m_tweak.cloneaxisX != m_lastTweak.cloneaxisX || m_tweak.cloneaxisY != m_lastTweak.cloneaxisY
     || m_tweak.cloneaxisZ != m_lastTweak.cloneaxisZ || m_tweak.clones != m_lastTweak.clones)
  {
    initRenderer(m_tweak.renderer, m_tweak.strategy);
  }

  if(!m_tweak.animateActive && m_lastTweak.animateActive)
  {
    m_scene.resetMatrices();
  }

  m_lastTweak = m_tweak;

  int width  = m_windowState.m_winSize[0];
  int height = m_windowState.m_winSize[1];

  {
    // generic state setup
    glViewport(0, 0, width, height);

    if(m_tweak.shade == SHADE_SOLIDWIRE_SPLIT)
    {
      glBindFramebuffer(GL_FRAMEBUFFER, fbos.scene2);
      glClearColor(0.2f, 0.2f, 0.2f, 0.0f);
      glClearDepth(1.0);
      glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
    }

    glBindFramebuffer(GL_FRAMEBUFFER, fbos.scene);
    glClearColor(0.2f, 0.2f, 0.2f, 0.0f);
    glClearDepth(1.0);
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);

    glEnable(GL_DEPTH_TEST);

    m_sceneUbo.viewport = ivec2(width, height);

    glm::mat4 projection = glm::perspectiveRH_ZO((45.f), float(width) / float(height),
                                                  m_control.m_sceneDimension * 0.001f, m_control.m_sceneDimension * 10.0f);
    glm::mat4 view       = m_control.m_viewMatrix;

    m_sceneUbo.viewProjMatrix = projection * view;
    m_sceneUbo.viewMatrix     = view;
    m_sceneUbo.viewMatrixIT   = glm::transpose(glm::inverse(view));

    m_sceneUbo.viewPos = glm::row(m_sceneUbo.viewMatrixIT, 3);
    m_sceneUbo.viewDir = -glm::row(view,2);

    m_sceneUbo.wLightPos   = glm::row(m_sceneUbo.viewMatrixIT, 3);
    m_sceneUbo.wLightPos.w = 1.0;

    m_sceneUbo.tboMatrices = uvec2(m_scene.m_matricesTexGLADDR & 0xFFFFFFFF, m_scene.m_matricesTexGLADDR >> 32);

    glNamedBufferSubData(buffers.scene_ubo, 0, sizeof(SceneData), &m_sceneUbo);

    glDisable(GL_CULL_FACE);
  }

  if(m_tweak.animateActive)
  {
    {
      NV_PROFILE_GL_SECTION("Xplode");

      float  speed      = 0.5;
      float  scale      = m_tweak.animateMin + (cosf(float(time) * speed) * 0.5f + 0.5f) * (m_tweak.animateDelta);
      GLuint totalNodes = GLuint(m_scene.m_matrices.size());
      GLuint groupsize  = m_xplodeGroupSize;

      glUseProgram(m_progManager.get(programs.xplode));
      glUniform1f(0, scale);
      glUniform1i(1, totalNodes);

      nvgl::bindMultiTexture(GL_TEXTURE0, GL_TEXTURE_BUFFER, m_scene.m_matricesOrigTexGL);
      glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_scene.m_matricesGL);

      glDispatchCompute((totalNodes + groupsize - 1) / groupsize, 1, 1);
      glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);

      nvgl::bindMultiTexture(GL_TEXTURE0, GL_TEXTURE_BUFFER, 0);
      glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
      glUseProgram(0);
    }

    {
      NV_PROFILE_GL_SECTION("Tree");
      TransformSystem::Buffer ids;
      TransformSystem::Buffer world;
      TransformSystem::Buffer object;

      ids.buffer = m_scene.m_parentIDsGL;
      ids.offset = 0;
      ids.size   = sizeof(GLuint) * m_scene.m_matrices.size();

      world.buffer = m_scene.m_matricesGL;
      world.offset = 0;
      world.size   = sizeof(CadScene::MatrixNode) * m_scene.m_matrices.size();

      object.buffer = m_scene.m_matricesGL;
      object.offset = 0;
      object.size   = sizeof(CadScene::MatrixNode) * m_scene.m_matrices.size();

      m_transformSystem.process(m_scene.m_nodeTree, ids, object, world);
    }
  }

  {
    NV_PROFILE_GL_SECTION("Render");

    m_resources.cullView.viewPos        = glm::value_ptr(m_sceneUbo.viewPos);
    m_resources.cullView.viewDir        = glm::value_ptr(m_sceneUbo.viewDir);
    m_resources.cullView.viewProjMatrix = glm::value_ptr(m_sceneUbo.viewProjMatrix);

    m_renderer->draw(m_tweak.shade, m_resources, m_profiler, m_progManager);
  }


  {
    NV_PROFILE_GL_SECTION("Blit");


    if(m_tweak.shade == SHADE_SOLIDWIRE_SPLIT)
    {
      glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);

      int wh = width / 2;
      int hh = height / 2;

      glBindFramebuffer(GL_READ_FRAMEBUFFER, fbos.scene);
      glBlitFramebuffer(0, 0, wh, hh, 0, 0, wh, hh, GL_COLOR_BUFFER_BIT, GL_NEAREST);
      glBlitFramebuffer(wh, hh, width, height, wh, hh, width, height, GL_COLOR_BUFFER_BIT, GL_NEAREST);

      glBindFramebuffer(GL_READ_FRAMEBUFFER, fbos.scene2);
      glBlitFramebuffer(wh, 0, width, hh, wh, 0, width, hh, GL_COLOR_BUFFER_BIT, GL_NEAREST);
      glBlitFramebuffer(0, hh, wh, height, 0, hh, wh, height, GL_COLOR_BUFFER_BIT, GL_NEAREST);
    }
    else
    {
      // blit to background
      glBindFramebuffer(GL_READ_FRAMEBUFFER, fbos.scene);
      glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
      glBlitFramebuffer(0, 0, width, height, 0, 0, width, height, GL_COLOR_BUFFER_BIT, GL_NEAREST);
    }
  }

  if(!m_tweak.noUI)
  {
    NV_PROFILE_GL_SECTION("GUI");
    ImGui::Render();
    ImGui::RenderDrawDataGL(ImGui::GetDrawData());
  }

  ImGui::EndFrame();

  m_lastTweak = m_tweak;
}

void Sample::resize(int width, int height)
{
  initFramebuffers(width, height);
}

void Sample::setRendererFromName()
{
  if(!m_rendererName.empty())
  {
    const Renderer::Registry registry = Renderer::getRegistry();
    for(size_t i = 0; i < m_renderersSorted.size(); i++)
    {
      if(strcmp(m_rendererName.c_str(), registry[m_renderersSorted[i]]->name()) == 0)
      {
        m_tweak.renderer = int(i);
      }
    }
  }
}

static std::string addPath(std::string const& defaultPath, std::string const& filename)
{
  if(
#ifdef _WIN32
      filename.find(':') != std::string::npos
#else
      !filename.empty() && filename[0] == '/'
#endif
  )
  {
    return filename;
  }
  else
  {
    return defaultPath + "/" + filename;
  }
}

static bool endsWith(std::string const& s, std::string const& end)
{
  if(s.length() >= end.length())
  {
    return (0 == s.compare(s.length() - end.length(), end.length(), end));
  }
  else
  {
    return false;
  }
}

void Sample::setupConfigParameters()
{
  m_parameterList.addFilename(".csf", &m_modelFilename);
  m_parameterList.addFilename(".csf.gz", &m_modelFilename);
  m_parameterList.addFilename(".gltf", &m_modelFilename);

  m_parameterList.add("noui", &m_tweak.noUI, false);

  m_parameterList.add("renderer", (uint32_t*)&m_tweak.renderer);
  m_parameterList.add("renderernamed", &m_rendererName);
  m_parameterList.add("strategy", (uint32_t*)&m_tweak.strategy);
  m_parameterList.add("shademode", (uint32_t*)&m_tweak.shade);
  m_parameterList.add("msaa", &m_tweak.msaa);
  m_parameterList.add("clones", &m_tweak.clones);
  m_parameterList.add("xplode", &m_tweak.animateActive);
  m_parameterList.add("zoom", &m_tweak.zoom);
}


bool Sample::validateConfig()
{
  if(m_modelFilename.empty())
  {
    LOGI("no .csf model file specified\n");
    LOGI("exe <filename.csf/cfg> parameters...\n");
    m_parameterList.print();
    return false;
  }
  return true;
}

}  // namespace csfviewer

using namespace csfviewer;

int main(int argc, const char** argv)
{
  NVPSystem system(PROJECT_NAME);

  Sample sample;

  {
    std::vector<std::string> directories;
    directories.push_back(NVPSystem::exePath());
    directories.push_back(NVPSystem::exePath() + "/media");
    directories.push_back(NVPSystem::exePath() + std::string(PROJECT_DOWNLOAD_RELDIRECTORY));
    sample.m_modelFilename = nvh::findFile(std::string("geforce.csf.gz"), directories);
  }

  return sample.run(PROJECT_NAME, argc, argv, SAMPLE_SIZE_WIDTH, SAMPLE_SIZE_HEIGHT);
}


================================================
FILE: cull-bitpack.vert.glsl
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */



#version 330
/**/

#define TEMPORAL_LAST 1
#define TEMPORAL_NEW  2

#ifndef TEMPORAL
#define TEMPORAL 0
#endif

#extension GL_ARB_explicit_attrib_location : require
#extension GL_ARB_shader_storage_buffer_object : enable

layout(location=0) in uvec4 instream[8];

#if TEMPORAL
layout(location=9) in uint last;
#endif

#if GL_ARB_shader_storage_buffer_object
layout(std430,binding=0)  writeonly buffer outputBuffer {
  uint outstream[];
};

void storeOutput(uint value)
{
  outstream[gl_VertexID] = value;
}

#else
flat out uint outstream;

void storeOutput(uint value)
{
  outstream= value;
}
#endif

void main ()
{
  uint bits = 0u;
  int outbit = 0;
  for (int i = 0; i < 8; i++){
    for (int n = 0; n < 4; n++, outbit++){
      uint checkbytes = instream[i][n];
      bits |= (checkbytes & 1u) << outbit;
    }
  }
  
#if TEMPORAL == TEMPORAL_LAST
  // render what was visible in last frame and passes current test
  bits &= last;
#elif TEMPORAL == TEMPORAL_NEW
  // render what was not visible in last frame (already rendered), but is now visible
  bits &= (~last);
#endif

  storeOutput(bits);
}


================================================
FILE: cull-downsample.frag.glsl
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */



#version 330 
/**/

uniform sampler2D depthTex;
uniform int       depthLod;
uniform bool      evenLod;

in vec2 uv;

void main()
{
  ivec2 lodSize = textureSize(depthTex,depthLod);
  float depth = 0;
  
  if (evenLod){
    ivec2 offsets[] = ivec2[](
      ivec2(0,0),
      ivec2(0,1),
      ivec2(1,1),
      ivec2(1,0)
    );
    ivec2 coord = ivec2(gl_FragCoord.xy);
    coord *= 2;
    
    for (int i = 0; i < 4; i++){
      depth = max(
        depth, 
        texelFetch(depthTex,
          clamp(coord + offsets[i], ivec2(0), lodSize - ivec2(1)),
          depthLod).r );
    }
  }
  else{
    // need this to handle non-power of two
    // very conservative
    
    vec2 offsets[] = vec2[](
      vec2(-1,-1),
      vec2( 0,-1),
      vec2( 1,-1),
      vec2(-1, 0),
      vec2( 0, 0),
      vec2( 1, 0),
      vec2(-1, 1),
      vec2( 0, 1),
      vec2( 1, 1)
    );
    vec2 coord = uv;
    vec2 texel = 1.0/(vec2(lodSize));
    
    for (int i = 0; i < 9; i++){
      vec2 pos = coord + offsets[i] * texel;
      depth = max(
        depth, 
        #if 1
        texelFetch(depthTex,
          clamp(ivec2(pos * lodSize), ivec2(0), lodSize - ivec2(1)),
          depthLod).r 
        #else
        textureLod(depthTex,
          pos,
          depthLod).r 
        #endif
        );
    }
  }

  gl_FragDepth = depth;
}


================================================
FILE: cull-downsample.vert.glsl
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */



#version 330
/**/

out vec2 uv;

void main()
{
  vec4 pos =  vec4(
      (float( gl_VertexID    &1)) * 4.0 - 1.0,
      (float((gl_VertexID>>1)&1)) * 4.0 - 1.0,
      0, 1.0);
      
  uv = pos.xy * 0.5 + 0.5;
  
  gl_Position = pos;
}


================================================
FILE: cull-raster.frag.glsl
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */



#version 430
/**/

layout(early_fragment_tests) in;

layout(std430,binding=0) buffer visibleBuffer {
  int visibles[];
};

layout(location=0,index=0) out vec4 out_Color;

flat in int objid;

void main (){
  visibles[objid] = 1;
  
  out_Color = unpackUnorm4x8(uint(objid));
}


================================================
FILE: cull-raster.geo.glsl
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */



#version 430
/**/

#ifndef MATRIX_WORLD
#define MATRIX_WORLD    0
#endif

#ifndef MATRIX_WORLD_IT
#define MATRIX_WORLD_IT 1
#endif

#ifndef MATRICES
#define MATRICES        2
#endif

#ifndef FLIPWIND
#define FLIPWIND        1
#endif

#ifndef PERSPECTIVE
#define PERSPECTIVE     1
#endif

// render the 3 visible sides based on view direction and box normal
layout(points,invocations=3) in;  

// one side each invocation
layout(triangle_strip,max_vertices=4) out;

in VertexOut{
  vec3 bboxCtr;
  vec3 bboxDim;
  flat int matrixIndex;
  flat int objid;
} IN[1];

flat out int objid;

uniform vec3 viewPos;
uniform vec3 viewDir;
uniform mat4 viewProjTM;
uniform samplerBuffer matricesTex;

void main()
{

  int  matindex = (IN[0].matrixIndex*MATRICES + MATRIX_WORLD)*4;
  mat4 worldTM = mat4(
    texelFetch(matricesTex,matindex + 0),
    texelFetch(matricesTex,matindex + 1),
    texelFetch(matricesTex,matindex + 2),
    texelFetch(matricesTex,matindex + 3));

  vec3 faceNormal = vec3(0);
  vec3 edgeBasis0 = vec3(0);
  vec3 edgeBasis1 = vec3(0);
  
  int id = gl_InvocationID;

  if (id == 0)
  {
      faceNormal.x = IN[0].bboxDim.x;
      edgeBasis0.y = IN[0].bboxDim.y;
      edgeBasis1.z = IN[0].bboxDim.z;
  }
  else if(id == 1)
  {
      faceNormal.y = IN[0].bboxDim.y;
      edgeBasis1.x = IN[0].bboxDim.x;
      edgeBasis0.z = IN[0].bboxDim.z;
  }
  else if(id == 2)
  {
      faceNormal.z = IN[0].bboxDim.z;
      edgeBasis0.x = IN[0].bboxDim.x;
      edgeBasis1.y = IN[0].bboxDim.y;
  }
  
  vec3 worldCtr = (worldTM * vec4(IN[0].bboxCtr, 1)).xyz;
  
#if PERSPECTIVE
  vec3 worldNormal = mat3(worldTM) * faceNormal;
  vec3 worldPos    = worldCtr + worldNormal;
  float proj = sign(dot(worldPos - viewPos.xyz, worldNormal));
#else
  vec3 worldNormal = mat3(worldTM) * faceNormal;
  float proj = sign(dot(viewDir,worldNormal));
#endif
  
#if FLIPWIND
  proj *= -1;
#endif
  
  
  faceNormal = mat3(worldTM) * (faceNormal) * proj;
  edgeBasis0 = mat3(worldTM) * (edgeBasis0);
  edgeBasis1 = mat3(worldTM) * (edgeBasis1) * proj;
  
#if FLIPWIND
  objid = IN[0].objid;
  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal - edgeBasis0 - edgeBasis1),1);
  EmitVertex();
  
  objid = IN[0].objid;
  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal + edgeBasis0 - edgeBasis1),1);
  EmitVertex();
  
  objid = IN[0].objid;
  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal - edgeBasis0 + edgeBasis1),1);
  EmitVertex();
  
  objid = IN[0].objid;
  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal + edgeBasis0 + edgeBasis1),1);
  EmitVertex();
  
#else
  objid = IN[0].objid;
  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal - edgeBasis0 - edgeBasis1),1);
  EmitVertex();
  
  objid = IN[0].objid;
  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal - edgeBasis0 + edgeBasis1),1);
  EmitVertex();
  
  objid = IN[0].objid;
  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal + edgeBasis0 - edgeBasis1),1);
  EmitVertex();
  
  objid = IN[0].objid;
  gl_Position = viewProjTM * vec4(worldCtr + (faceNormal + edgeBasis0 + edgeBasis1),1);
  EmitVertex();
#endif
  
}


================================================
FILE: cull-raster.vert.glsl
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */



#version 430
/**/

#ifndef MATRIX_WORLD
#define MATRIX_WORLD    0
#endif

#ifndef MATRIX_WORLD_IT
#define MATRIX_WORLD_IT 1
#endif

#ifndef MATRICES
#define MATRICES        2
#endif

layout(std430,binding=0) buffer visibleBuffer {
  int visibles[];
};

uniform samplerBuffer matricesTex;

#ifdef DUALINDEX
layout(location=0) in int  bboxIndex;
layout(location=2) in int  matrixIndex;
uniform samplerBuffer     bboxesTex;

vec4 bboxMin = texelFetch(bboxesTex, bboxIndex*2+0);
vec4 bboxMax = texelFetch(bboxesTex, bboxIndex*2+1);
#else
layout(location=0) in vec4 bboxMin;
layout(location=1) in vec4 bboxMax;
layout(location=2) in int  matrixIndex;
#endif

uniform vec3 viewPos;

out VertexOut{
  vec3 bboxCtr;
  vec3 bboxDim;
  flat int matrixIndex;
  flat int objid;
} OUT;

void main()
{
  int objid = gl_VertexID;
  vec3 ctr =((bboxMin + bboxMax)*0.5).xyz;
  vec3 dim =((bboxMax - bboxMin)*0.5).xyz;
  OUT.bboxCtr = ctr;
  OUT.bboxDim = dim;
  OUT.matrixIndex = matrixIndex;
  OUT.objid = objid;
  
  {
    // if camera is inside the bbox then none of our
    // side faces will be visible, must treat object as 
    // visible
    int matindex = (matrixIndex * MATRICES + MATRIX_WORLD_IT)*4;
    mat4 worldInvTransTM = mat4(
      texelFetch(matricesTex,matindex + 0),
      texelFetch(matricesTex,matindex + 1),
      texelFetch(matricesTex,matindex + 2),
      texelFetch(matricesTex,matindex + 3));
      
    vec3 objPos = (vec4(viewPos,1) * worldInvTransTM).xyz;
    objPos -= ctr;
    if (all(lessThan(abs(objPos),dim))){
      // inside bbox
      visibles[objid] = 1;
    }
  }
}


================================================
FILE: cull-tokencmds.vert.glsl
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */



#version 440
/**/

#define SCAN_BATCHSIZE 2048

layout(location=0) in uint  cmdOffset;
layout(location=1) in uint  cmdCullSize;
layout(location=2) in uint  cmdCullScan;

uniform uint startOffset;
uniform int  startID;
uniform uint endOffset;
uniform int  endID;
uniform uint terminateCmd;

layout(std430,binding=0)  writeonly buffer outputBuffer {
  uint outcmds[];
};

layout(std430,binding=1)  readonly buffer commandBuffer {
  uint incmds[];
};

layout(std430,binding=2)  readonly buffer cullSizesBuffer {
  uint cullSizes[];
};

layout(std430,binding=3)  readonly buffer cullScanBuffer {
  uint cullScan[];
};

layout(std430,binding=4)  readonly buffer cullScanOffsetBuffer {
  uint cullScanOffsets[];
};

uint getOffset( int id, uint scan, uint size, bool exclusive)
{
  int scanBatch = id / SCAN_BATCHSIZE;
  uint  scanOffset  = scan;
        scanOffset += scanBatch > 0 ? cullScanOffsets[ scanBatch-1] : 0;
  
  if (exclusive){
    scanOffset -= size;
  }
  return scanOffset;
}

uint getOffset( int id, bool exclusive)
{
  return getOffset(id, cullScan[id], cullSizes[id], exclusive);
}

uint rebaseOffset(uint cullOffset)
{
  // where the current sequence starts
  uint startCullOffset = getOffset(startID, true);

  // rebase from where it should start
  uint outOffset    = startOffset + (cullOffset - startCullOffset);
  
  return outOffset;
}

#define DEBUG 0

void main ()
{
  if (cmdCullSize > 0)
  {
    // cullOffset goes across "stateobject" sequences
    uint cullOffset = getOffset(gl_VertexID,cmdCullScan,cmdCullSize,true);
  
    uint outOffset  = rebaseOffset(cullOffset);
    
  #if DEBUG
    outcmds[(gl_VertexID)*2+0] = outOffset;
    outcmds[(gl_VertexID)*2+1] = cmdOffset;
  #else
    for (uint i = 0; i < cmdCullSize; i++){
      outcmds[outOffset+i] = incmds[cmdOffset+i];
    }
  #endif
  }
#if DEBUG
  else {
    outcmds[(gl_VertexID)*2+0] = ~0;
    outcmds[(gl_VertexID)*2+1] = cmdOffset;
  }
#endif

  if (gl_VertexID == startID)
  {
    // add terminator if sequence not original
    uint lastOffset = rebaseOffset( getOffset(endID, false) );
    if (lastOffset != endOffset) {
#if !DEBUG
      outcmds[lastOffset] = terminateCmd;
#endif
    }
    
#if DEBUG && 0
    outcmds[(startID)*2+0] = lastOffset;
    outcmds[(startID)*2+1] = endOffset;
#endif
  }
}


================================================
FILE: cull-tokensizes.vert.glsl
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */



#version 440
/**/

layout(location=0) in uint  cmdSize;
layout(location=1) in int   cmdObject;

layout(std430,binding=0)  writeonly buffer outputBuffer {
  uint outsizes[];
};

layout(std430,binding=1)  readonly buffer visibleBuffer {
  int visibles[];
};

#define DEBUG false

void main ()
{
  if (cmdObject >= 0 && !DEBUG){
    outsizes[gl_VertexID] = (visibles[cmdObject/32] & (1<<(cmdObject%32))) != 0 ? cmdSize : 0;
  }
  else{
    outsizes[gl_VertexID] = cmdSize;
  }
}


================================================
FILE: cull-xfb.vert.glsl
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */



#version 330
/**/

#ifndef MATRIX_WORLD
#define MATRIX_WORLD    0
#endif

#ifndef MATRIX_WORLD_IT
#define MATRIX_WORLD_IT 1
#endif

#ifndef MATRICES
#define MATRICES        2
#endif

#extension GL_ARB_explicit_attrib_location : require
#extension GL_ARB_shader_storage_buffer_object : enable


//#define OCCLUSION

#ifdef DUALINDEX
layout(location=0) in int  bboxIndex;
layout(location=2) in int  matrixIndex;

uniform samplerBuffer     bboxesTex;
vec4 bboxMin = texelFetch(bboxesTex, bboxIndex*2+0);
vec4 bboxMax = texelFetch(bboxesTex, bboxIndex*2+1);
#else
layout(location=0) in vec4 bboxMin;
layout(location=1) in vec4 bboxMax;
layout(location=2) in int  matrixIndex;
#endif

#if GL_ARB_shader_storage_buffer_object
layout(std430,binding=0)  writeonly buffer outputBuffer {
  int outstream[];
};

void storeOutput(int value)
{
  outstream[gl_VertexID] = value;
}

#else
flat out int outstream;

void storeOutput(int value)
{
  outstream = value;
}
#endif

uniform mat4              viewProjTM;
uniform samplerBuffer     matricesTex;

#ifdef OCCLUSION
uniform sampler2D         depthTex;
#endif

vec4 getBoxCorner(int n)
{
#if 1
  bvec3 useMax = bvec3((n & 1) != 0, (n & 2) != 0, (n & 4) != 0);
  return vec4(mix(bboxMin.xyz, bboxMax.xyz, useMax),1);
#else
  switch(n){
  case 0:
    return vec4(bboxMin.x,bboxMin.y,bboxMin.z,1);
  case 1:
    return vec4(bboxMax.x,bboxMin.y,bboxMin.z,1);
  case 2:
    return vec4(bboxMin.x,bboxMax.y,bboxMin.z,1);
  case 3:
    return vec4(bboxMax.x,bboxMax.y,bboxMin.z,1);
  case 4:
    return vec4(bboxMin.x,bboxMin.y,bboxMax.z,1);
  case 5:
    return vec4(bboxMax.x,bboxMin.y,bboxMax.z,1);
  case 6:
    return vec4(bboxMin.x,bboxMax.y,bboxMax.z,1);
  case 7:
    return vec4(bboxMax.x,bboxMax.y,bboxMax.z,1);
  }
#endif
}

vec3 projected(mat4 a, vec4 pos) {
  vec4 hpos = (a * pos);
  return hpos.xyz/hpos.w;
}

void main (){
  int isvisible = 0;
  int matindex = (matrixIndex*MATRICES + MATRIX_WORLD)*4;
  mat4 worldTM = mat4(
    texelFetch(matricesTex,matindex + 0),
    texelFetch(matricesTex,matindex + 1),
    texelFetch(matricesTex,matindex + 2),
    texelFetch(matricesTex,matindex + 3));
    
  mat4 worldViewProjTM = (viewProjTM * worldTM);
  
  // clipspace bbox
  vec3 clipmin  = projected(worldViewProjTM, getBoxCorner(0));
  vec3 clipmax  = clipmin;

  for (int n = 1; n < 8; n++){
    vec3 ab = projected(worldViewProjTM, getBoxCorner(n));
    clipmin = min(clipmin,ab);
    clipmax = max(clipmax,ab);
  }

  isvisible = (
    clipmin.x <= 1 &&
    clipmin.y <= 1 &&
    clipmin.z <= 1 &&
    clipmax.x >= -1 &&
    clipmax.y >= -1 &&
    clipmax.z >= -1) ? 1 : 0;

#ifdef OCCLUSION
  if (isvisible != 0){
    clipmin = clipmin * 0.5 + 0.5;
    clipmax = clipmax * 0.5 + 0.5;
    vec2 size = (clipmax.xy - clipmin.xy);
    ivec2 texsize = textureSize(depthTex,0);
    float maxsize = max(size.x, size.y) * float(max(texsize.x,texsize.y));
    float miplevel = ceil(log2(maxsize));
    
    float depth = 0;
    float a = textureLod(depthTex,clipmin.xy,miplevel).r;
    float b = textureLod(depthTex,vec2(clipmax.x,clipmin.y),miplevel).r;
    float c = textureLod(depthTex,clipmax.xy,miplevel).r;
    float d = textureLod(depthTex,vec2(clipmin.x,clipmax.y),miplevel).r;
    depth = max(depth,max(max(max(a,b),c),d));

    isvisible =  clipmin.z <= depth ? 1 : 0;
  }
#endif

  storeOutput(isvisible);
}


================================================
FILE: cullingsystem.cpp
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */


/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */

#include "cullingsystem.hpp"
#include <assert.h>
#include <string.h>

#define DEBUG_VISIBLEBOXES  0

inline unsigned int minDivide(unsigned int val, unsigned int alignment)
{
  return (val+alignment-1)/alignment;
}

void CullingSystem::init( const Programs &programs, bool dualindex )
{
  update(programs,dualindex);
  glGenFramebuffers(1,&m_fbo);
  glCreateTextures(GL_TEXTURE_BUFFER,2,m_tbo);
}

void CullingSystem::update( const Programs &programs, bool dualindex )
{
  m_programs = programs;
  m_dualindex = dualindex;
  m_useSSBO = has_GL_VERSION_4_2 != 0;
  m_useRepesentativeTest = !!has_GL_NV_representative_fragment_test;

  if (!m_useSSBO)
  {
    const char* xfbstreams[] = {"outstream"};
    glTransformFeedbackVaryings(programs.bit_regular,1,xfbstreams,GL_INTERLEAVED_ATTRIBS);
    glLinkProgram(programs.bit_regular);

    glTransformFeedbackVaryings(programs.bit_temporallast,1,xfbstreams,GL_INTERLEAVED_ATTRIBS);
    glLinkProgram(programs.bit_temporallast);

    glTransformFeedbackVaryings(programs.bit_temporalnew,1,xfbstreams,GL_INTERLEAVED_ATTRIBS);
    glLinkProgram(programs.bit_temporalnew);

    glTransformFeedbackVaryings(programs.object_frustum,1,xfbstreams,GL_INTERLEAVED_ATTRIBS);
    glLinkProgram(programs.object_frustum);

    glTransformFeedbackVaryings(programs.object_hiz,1,xfbstreams,GL_INTERLEAVED_ATTRIBS);
    glLinkProgram(programs.object_hiz);
  }

  glUseProgram(programs.depth_mips);
  glUniform1i(glGetUniformLocation(programs.depth_mips,"depthTex"),0);
  m_uniforms.depth_lod = glGetUniformLocation(programs.depth_mips,"depthLod");
  m_uniforms.depth_even = glGetUniformLocation(programs.depth_mips,"evenLod");

  glUseProgram(programs.object_frustum);
  glUniform1i(glGetUniformLocation(programs.object_frustum,"matricesTex"),0);
  if (dualindex){
    glUniform1i(glGetUniformLocation(programs.object_frustum,"bboxesTex"),1);
  }
  m_uniforms.frustum_viewProj = glGetUniformLocation(programs.object_frustum, "viewProjTM");

  glUseProgram(programs.object_hiz);
  glUniform1i(glGetUniformLocation(programs.object_hiz,"matricesTex"),0);
  if (dualindex){
    glUniform1i(glGetUniformLocation(programs.object_frustum,"bboxesTex"),1);
  }
  glUniform1i(glGetUniformLocation(programs.object_hiz,"depthTex"),2);
  m_uniforms.hiz_viewProj = glGetUniformLocation(programs.object_hiz, "viewProjTM");
  
  glUseProgram(programs.object_raster);
  glUniform1i(glGetUniformLocation(programs.object_raster,"matricesTex"),0);
  if (dualindex){
    glUniform1i(glGetUniformLocation(programs.object_frustum,"bboxesTex"),1);
  }
  m_uniforms.raster_viewProj = glGetUniformLocation(programs.object_raster, "viewProjTM");
  m_uniforms.raster_viewPos  = glGetUniformLocation(programs.object_raster, "viewPos");
  m_uniforms.raster_viewDir  = glGetUniformLocation(programs.object_raster, "viewDir");

  glUseProgram(0);
}

void CullingSystem::deinit()
{
  glDeleteFramebuffers(1,&m_fbo);
  glDeleteTextures(2,m_tbo);
}

void CullingSystem::buildDepthMipmaps( GLuint textureDepth, int width, int height )
{
  int level = 0;
  int dim = width > height ? width : height;
  int twidth  = width;
  int theight = height;
  int wasEven = 0;

  glBindFramebuffer(GL_FRAMEBUFFER,m_fbo);
  glDepthFunc(GL_ALWAYS);
  glUseProgram(m_programs.depth_mips);
  glActiveTexture(GL_TEXTURE0);
  glBindTexture(GL_TEXTURE_2D, textureDepth);


  while (dim){
    if (level){
      twidth  = twidth < 1 ? 1 : twidth;
      theight = theight < 1 ? 1 : theight;
      glViewport(0,0,twidth,theight);
      glFramebufferTexture2D(GL_FRAMEBUFFER,GL_DEPTH_STENCIL_ATTACHMENT,GL_TEXTURE_2D, textureDepth, level);
      glUniform1i(m_uniforms.depth_lod, level-1);
      glUniform1i(m_uniforms.depth_even, wasEven);

      glDrawArrays(GL_TRIANGLES,0,3);
    }

    wasEven = (twidth % 2 == 0) && (theight % 2 == 0);
    
    dim       /=  2;
    twidth    /=  2;
    theight   /=  2;
    level++;
  }

  glUseProgram(0);
  glViewport(0,0,width,height);
  glBindFramebuffer(GL_FRAMEBUFFER,0);
  glBindTexture(GL_TEXTURE_2D, 0);
  glDepthFunc(GL_LEQUAL);
  glViewport(0,0,width,height);
}



void CullingSystem::testBboxes( Job &job, bool raster )
{
  // send the scene's bboxes as points stream

  glBindBuffer(GL_ARRAY_BUFFER, job.m_bufferObjectBbox.buffer);
  if (m_dualindex){
    glVertexAttribIPointer(0, 1, GL_INT, job.m_bufferObjectBbox.stride, (const void*) job.m_bufferObjectBbox.offset);
    glVertexAttribDivisor(0, 0);
    glEnableVertexAttribArray(0);
  }
  else{
    GLsizei stride = job.m_bufferObjectBbox.stride ? job.m_bufferObjectBbox.stride : sizeof(float)*4*2;
    glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, stride, (const void*)job.m_bufferObjectBbox.offset);
    glVertexAttribDivisor(0, 0);
    glEnableVertexAttribArray(0);
    glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, stride, (const void*)(sizeof(float)*4 + job.m_bufferObjectBbox.offset));
    glVertexAttribDivisor(1, 0);
    glEnableVertexAttribArray(1);
  }
  
  glBindBuffer(GL_ARRAY_BUFFER, job.m_bufferObjectMatrix.buffer);
  glVertexAttribIPointer(2, 1, GL_INT, job.m_bufferObjectMatrix.stride, (const void*) job.m_bufferObjectMatrix.offset);
  glVertexAttribDivisor(2, 0);
  glEnableVertexAttribArray(2);
  glBindBuffer(GL_ARRAY_BUFFER, 0);
  
  glActiveTexture(GL_TEXTURE0);
  glBindTexture(GL_TEXTURE_BUFFER, m_tbo[0]);
  job.m_bufferMatrices.TexBuffer(GL_TEXTURE_BUFFER,GL_RGBA32F);

  if (m_dualindex){
    glActiveTexture(GL_TEXTURE1);
    glBindTexture(GL_TEXTURE_BUFFER, m_tbo[1]);
    job.m_bufferBboxes.TexBuffer(GL_TEXTURE_BUFFER,GL_RGBA32F);
  }

  if (raster){
    if (m_useRepesentativeTest) {
      glEnable( GL_REPRESENTATIVE_FRAGMENT_TEST_NV );
    }
#if !DEBUG_VISIBLEBOXES
    glDepthMask(GL_FALSE);
    glColorMask(GL_FALSE,GL_FALSE,GL_FALSE,GL_FALSE);
#endif
  }
  else if (m_useSSBO){
    glEnable(GL_RASTERIZER_DISCARD);
    job.m_bufferVisOutput.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0);
  }
  else{
    glEnable(GL_RASTERIZER_DISCARD);
    // setup transform feedback
    job.m_bufferVisOutput.BindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,0);
    glBeginTransformFeedback(GL_POINTS);
  }

  glDrawArrays(GL_POINTS,0,job.m_numObjects);

  if (raster){
    if (m_useRepesentativeTest) {
      glDisable( GL_REPRESENTATIVE_FRAGMENT_TEST_NV );
    }
#if !DEBUG_VISIBLEBOXES
    glDepthMask(GL_TRUE);
    glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE);
#endif
  }
  else if (m_useSSBO){
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER,0,0);
    glDisable(GL_RASTERIZER_DISCARD);
  }
  else{
    glEndTransformFeedback();
    glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER,0,0);
    glDisable(GL_RASTERIZER_DISCARD);
  }

  if (m_dualindex){
    glBindTexture(GL_TEXTURE_BUFFER, 0);
    glActiveTexture(GL_TEXTURE0);
  }
  glBindTexture(GL_TEXTURE_BUFFER, 0);
  
  glDisableVertexAttribArray(0);
  glDisableVertexAttribArray(1);
  glDisableVertexAttribArray(2);
  
}

void CullingSystem::bitsFromOutput( Job &job, BitType type)
{
  // for GL 3.3 compatibility we use xfb
  // in GL 4.3 SSBO is used
  // 
  // using compute instead of "invisible" point drawing
  // would be better if we had really huge thread counts

  glEnable(GL_RASTERIZER_DISCARD);

  glBindBuffer(GL_ARRAY_BUFFER, job.m_bufferVisOutput.buffer);
  for (int i = 0; i < 8; i++){
    glVertexAttribIPointer(i, 4, GL_UNSIGNED_INT, sizeof(int)*32, (const void*)(i*sizeof(int)*4 + job.m_bufferVisOutput.offset));
    glVertexAttribDivisor(i, 0);
    glEnableVertexAttribArray(i);
  }
  
  if (type == BITS_CURRENT){
    glUseProgram(m_programs.bit_regular);
  }
  else{
    glUseProgram(type == BITS_CURRENT_AND_LAST ? m_programs.bit_temporallast : m_programs.bit_temporalnew);

    glBindBuffer(GL_ARRAY_BUFFER, job.m_bufferVisBitsLast.buffer);
    glVertexAttribIPointer(9, 1, GL_UNSIGNED_INT, sizeof(int), (const void*)job.m_bufferVisBitsLast.offset);
    glEnableVertexAttribArray(9);
  }

  if (m_useSSBO){
    job.m_bufferVisBitsCurrent.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0);
    glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);
  }
  else{
    job.m_bufferVisBitsCurrent.BindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,0);
    glBeginTransformFeedback(GL_POINTS);
  }

  glDrawArrays(GL_POINTS,0, minDivide(job.m_numObjects,32));

  if (m_useSSBO){
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
  }
  else{
    glEndTransformFeedback();
    glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, 0);
  }
  
  glDisableVertexAttribArray(9);
  for (int i = 0; i < 8; i++){
    glDisableVertexAttribArray(i);
  }

  glDisable(GL_RASTERIZER_DISCARD);
  glBindBuffer(GL_ARRAY_BUFFER, 0);
}

void CullingSystem::resultFromBits( Job &job )
{
  job.resultFromBits(job.m_bufferVisBitsCurrent);
}

void CullingSystem::resultClient(Job &job)
{
  job.resultClient();
}

void CullingSystem::buildOutput( MethodType method, Job &job, const View& view )
{
  switch(method){
  case METHOD_FRUSTUM:
    {
      glUseProgram(m_programs.object_frustum);
      glUniformMatrix4fv(m_uniforms.frustum_viewProj, 1 ,GL_FALSE, view.viewProjMatrix);
      
      testBboxes(job,false);
    }
    break;
  case METHOD_HIZ:
    {
      glUseProgram(m_programs.object_hiz);
      glUniformMatrix4fv(m_uniforms.hiz_viewProj, 1, GL_FALSE, view.viewProjMatrix);
      glActiveTexture(GL_TEXTURE2);
      glBindTexture(GL_TEXTURE_2D,job.m_textureDepthWithMipmaps);
      
      testBboxes(job,false);
      
      glActiveTexture(GL_TEXTURE2);
      glBindTexture(GL_TEXTURE_2D,0);
      glActiveTexture(GL_TEXTURE0);
    }
    break;
  case METHOD_RASTER:
    {
      // clear visibles
      job.m_bufferVisOutput.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0);
      glClearBufferData(GL_SHADER_STORAGE_BUFFER, GL_R32UI,GL_RED_INTEGER,GL_UNSIGNED_INT,0);

      glUseProgram(m_programs.object_raster);
      glUniformMatrix4fv(m_uniforms.raster_viewProj, 1, GL_FALSE, view.viewProjMatrix);
      glUniform3fv(m_uniforms.raster_viewPos, 1, view.viewPos);
      glUniform3fv(m_uniforms.raster_viewDir, 1, view.viewDir);
      
      glEnable( GL_POLYGON_OFFSET_FILL );
      glPolygonOffset(-1,-1);
      testBboxes(job,true);
      glPolygonOffset(0,0);
      glDisable( GL_POLYGON_OFFSET_FILL );

      glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);

      glBindBufferBase (GL_SHADER_STORAGE_BUFFER,0,0);
    }
    break;
  }
}


void CullingSystem::swapBits( Job &job )
{
  Buffer temp = job.m_bufferVisBitsCurrent;
  job.m_bufferVisBitsCurrent = job.m_bufferVisBitsLast;
  job.m_bufferVisBitsLast = temp;
}


void CullingSystem::JobIndirectUnordered::resultFromBits( const Buffer& bufferVisBitsCurrent )
{
  glEnable(GL_RASTERIZER_DISCARD);

  glUseProgram(m_program_indirect_compact);

  m_bufferIndirectCounter.BindBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0);
  m_bufferIndirectCounter.ClearBufferSubData (GL_ATOMIC_COUNTER_BUFFER, GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);

  bufferVisBitsCurrent.   BindBufferRange(GL_SHADER_STORAGE_BUFFER, 2);
  m_bufferObjectIndirects.BindBufferRange(GL_SHADER_STORAGE_BUFFER, 1);
  m_bufferIndirectResult. BindBufferRange(GL_SHADER_STORAGE_BUFFER, 0);
  m_bufferIndirectResult. ClearBufferSubData(GL_SHADER_STORAGE_BUFFER, GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);

  glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
  glDrawArrays(GL_POINTS,0,m_numObjects);

  glDisable(GL_RASTERIZER_DISCARD);
  glBindBufferBase  (GL_ATOMIC_COUNTER_BUFFER, 0, 0);
  glBindBufferBase  (GL_SHADER_STORAGE_BUFFER, 2, 0);
  glBindBufferBase  (GL_SHADER_STORAGE_BUFFER, 1, 0);
  glBindBufferBase  (GL_SHADER_STORAGE_BUFFER, 0, 0);
}

void CullingSystem::JobReadback::resultFromBits( const Buffer& bufferVisBitsCurrent )
{
  GLsizeiptr size = sizeof(int) * minDivide(m_numObjects,32);
  glBindBuffer(GL_COPY_READ_BUFFER, bufferVisBitsCurrent.buffer );
  glBindBuffer(GL_COPY_WRITE_BUFFER, m_bufferVisBitsReadback.buffer );
  glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, bufferVisBitsCurrent.offset, m_bufferVisBitsReadback.offset, size);
  glBindBuffer( GL_COPY_READ_BUFFER, 0 );
  glBindBuffer( GL_COPY_WRITE_BUFFER, 0 );
}

void CullingSystem::JobReadback::resultClient()
{
  glBindBuffer(GL_COPY_WRITE_BUFFER, m_bufferVisBitsReadback.buffer);
  glGetBufferSubData(GL_COPY_WRITE_BUFFER, m_bufferVisBitsReadback.offset, m_bufferVisBitsReadback.size, m_hostVisBits);
  glBindBuffer( GL_COPY_WRITE_BUFFER, 0);
}

void CullingSystem::JobReadbackPersistent::resultFromBits(const Buffer& bufferVisBitsCurrent)
{
  GLsizeiptr size = sizeof( int ) * minDivide( m_numObjects, 32 );
  glCopyNamedBufferSubData( bufferVisBitsCurrent.buffer, m_bufferVisBitsReadback.buffer, bufferVisBitsCurrent.offset, m_bufferVisBitsReadback.offset, size);
  if (m_fence) {
    glDeleteSync( m_fence );
  }
  m_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}

void CullingSystem::JobReadbackPersistent::resultClient()
{
  if (m_fence) {
    GLsizeiptr size = sizeof( int ) * minDivide( m_numObjects, 32 );
    // as some samples read-back within same frame (not recommended) we use the flush here, normally one wouldnt use it
    glClientWaitSync(m_fence, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
    glDeleteSync(m_fence);
    m_fence = NULL;
    memcpy( m_hostVisBits, ((uint8_t*)m_bufferVisBitsMapping) + m_bufferVisBitsReadback.offset, size );
  }
}


================================================
FILE: cullingsystem.hpp
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */



#ifndef CULLINGSYSTEM_H__
#define CULLINGSYSTEM_H__

#include <cstddef>
#include <cstdint>
#include <nvgl/extensions_gl.hpp>


class CullingSystem {
public:
  struct Programs {
    GLuint  object_frustum;
    GLuint  object_hiz;
    GLuint  object_raster;

    GLuint  bit_temporallast;
    GLuint  bit_temporalnew;
    GLuint  bit_regular;
    GLuint  depth_mips;
  };

  enum MethodType {
    METHOD_FRUSTUM,
    METHOD_HIZ,
    METHOD_RASTER,
    NUM_METHODS,
  };

  enum BitType {
    BITS_CURRENT,
    BITS_CURRENT_AND_LAST,
    BITS_CURRENT_AND_NOT_LAST,
    NUM_BITS,
  };

  struct Buffer {
    GLuint      buffer;
    GLsizei     stride;
    GLintptr    offset;
    GLsizeiptr  size;

    void create( size_t sizei, const void* data, GLbitfield flags )
    {
      size = sizei;
      offset = 0;
      stride = 0;
      glCreateBuffers( 1, &buffer );
      glNamedBufferStorage( buffer, size, data, flags );
    }

    Buffer( GLuint buffer, size_t sizei = 0 )
      : buffer( buffer )
      , offset( 0 )
      , stride( 0 )
    {
      if (!sizei) {
        if (sizeof( GLsizeiptr ) > 4)
          glGetNamedBufferParameteri64v( buffer, GL_BUFFER_SIZE, (GLint64*)&size );
        else
          glGetNamedBufferParameteriv( buffer, GL_BUFFER_SIZE, (GLint*)&size );
      }
      else {
        size = sizei;
      }
    }

    Buffer()
      : buffer(0)
      , stride(0)
      , offset(0)
      , size(0)
    {

    }

    inline void BindBufferRange(GLenum target, GLuint index) const {
      glBindBufferRange(target, index, buffer, offset, size);
    }
    inline void TexBuffer(GLenum target, GLenum internalformat) const {
      glTexBufferRange(target, internalformat, buffer, offset, size);
    }
    inline void ClearBufferSubData(GLenum target,GLenum internalformat,GLenum format,GLenum type,const GLvoid* data) const {
      glClearBufferSubData(target,internalformat,offset,size,format,type,data);
    }

  };
  
  class Job {
  public:
    int     m_numObjects;
      // world-space matrices {mat4 world, mat4 worldInverseTranspose}
    Buffer  m_bufferMatrices;
    Buffer  m_bufferBboxes; // only used in dualindex mode (2 x vec4)
      // 1 32-bit integer per object (index)
    Buffer  m_bufferObjectMatrix;
      // object-space bounding box (2 x vec4)
      // or 1 32-bit integer per object (dualindex mode)
    Buffer  m_bufferObjectBbox;
    
      // 1 32-bit integer per object
    Buffer  m_bufferVisOutput;
    
      // 1 32-bit integer per 32 objects (1 bit per object)
    Buffer  m_bufferVisBitsCurrent;
    Buffer  m_bufferVisBitsLast;
    
      // for HiZ
    GLuint  m_textureDepthWithMipmaps;

    // derive from this class and implement this function how you want to
    // deal with the results that are provided in the buffer
    virtual void resultFromBits( const Buffer& bufferVisBitsCurrent ) = 0;
    // for readback methods we need to wait for a result
    virtual void resultClient() {};

  };

  class JobReadback : public Job {
  public:
    // 1 32-bit integer per 32 objects (1 bit per object)
    Buffer      m_bufferVisBitsReadback;
    uint32_t*   m_hostVisBits;

    // Do not use this Job class unless you have to. Persistent 
    // mapped buffers are preferred.

    // Copies result into readback buffer
    void resultFromBits( const Buffer& bufferVisBitsCurrent );

    // getBufferData into hostVisBits (blocking!)
    void resultClient();
  };

  class JobReadbackPersistent : public Job {
  public:
    // 1 32-bit integer per 32 objects (1 bit per object)
    Buffer      m_bufferVisBitsReadback;
    void*       m_bufferVisBitsMapping;
    uint32_t*   m_hostVisBits;
    GLsync      m_fence;

    // Copies result into readback buffer and records
    // a fence.
    void resultFromBits(const Buffer& bufferVisBitsCurrent);

    // waits on fence and copies mapping into hostVisBits
    void resultClient();
  };

  // multidrawindirect based
  class JobIndirectUnordered : public Job {
  public:
    GLuint  m_program_indirect_compact;
    // 1 indirectSize per object, 
    Buffer  m_bufferObjectIndirects;
    Buffer  m_bufferIndirectResult;
    // 1 integer
    Buffer  m_bufferIndirectCounter;

    void resultFromBits( const Buffer& bufferVisBitsCurrent );
  };
  
  struct View {
    const float*  viewProjMatrix;
    const float*  viewDir;
    const float*  viewPos;
  };
  
  void init( const Programs &programs, bool dualindex );
  void deinit();
  void update( const Programs &programs, bool dualindex );
  
  // helper function for HiZ method, leaves fbo bound to 0
  void buildDepthMipmaps(GLuint textureDepth, int width, int height);
  
  // assumes relevant fbo bound for raster method
  void buildOutput( MethodType  method, Job &job, const View& view );

  void bitsFromOutput ( Job &job, BitType type );
  void resultFromBits ( Job &job );
  void resultClient   ( Job &job );

  // swaps the Current/Last bit array (for temporal coherent techniques)
  void swapBits       ( Job &job );

private:

  struct Uniforms {
    GLint   depth_lod;
    GLint   depth_even;
    GLint   frustum_viewProj;
    GLint   hiz_viewProj;
    GLint   raster_viewProj;
    GLint   raster_viewDir;
    GLint   raster_viewPos;
  };

  void testBboxes( Job &job, bool raster);
  
  Programs  m_programs;
  Uniforms  m_uniforms;
  GLuint    m_fbo;
  GLuint    m_tbo[2];
  bool      m_dualindex;
  bool      m_useSSBO;
  bool      m_useRepesentativeTest;
};

#endif


================================================
FILE: nodetree.cpp
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */


/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */

#include "nodetree.hpp"
#include <assert.h>

//////////////////////////////////////////////////////////////////////////


static inline void clearNode(NodeTree::Node &node)
{
  node.level      = -1;
  node.leafidx    = NodeTree::INVALID;
  node.levelidx   = NodeTree::INVALID;
  node.parentidx  = NodeTree::INVALID;
  node.childidx   = NodeTree::INVALID;
  node.siblingidx = NodeTree::INVALID;
}

NodeTree::NodeTree()
{
  m_levelsUsed = 0;
  m_treeCompactChangeID = 0;
  m_nodesActive = 0;

  clearNode(m_root);
  m_root.levelidx =  0;
  m_root.level    = -1;
}

const NodeTree::Level* NodeTree::getUsedLevel( int level ) const
{
  if (0 <= level && level < m_levelsUsed){
    return &m_levels[level];
  }
  return nullptr;
}

unsigned int NodeTree::getTreeParentChangeID() const
{
  return m_treeCompactChangeID;
}

const std::vector<NodeTree::compactID>& NodeTree::getTreeCompactNodes() const
{
  return m_treeCompactNodes;
}

NodeTree::nodeID NodeTree::createNode()
{
  nodeID id;

  if (!m_unusedNodes.empty()){
    id = m_unusedNodes[m_unusedNodes.size()-1];
    m_unusedNodes.pop_back();
  }
  else{
    Node node;
    m_nodes.push_back(node);
    m_treeCompactNodes.push_back(compactID());
    id = (nodeID)(m_nodes.size()-1);
  }

  Node&  node = getNode(id);
  clearNode(node);

  return id;
}

void NodeTree::deleteNode( nodeID nodeidx )
{
  assert (isValid(nodeidx) && nodeidx != ROOT);

  const Node &node = getNode(nodeidx);

  // make children unlinked
  while (isValid(node.childidx)){
    setNodeParent(node.childidx,INVALID);
  }

  // remove self from parent list
  setNodeParent(nodeidx,INVALID);

  m_unusedNodes.push_back(nodeidx);
}

void NodeTree::setNodeParent( nodeID nodeidx, nodeID parentidx )
{
  assert (isValid(nodeidx) && nodeidx != ROOT);

  Node &node = getNode(nodeidx);
  if (node.parentidx == parentidx)
    return;

  if (isValid(node.parentidx)){
    // unlink from old
    Node& parent = getNode(node.parentidx);
    bool found = false;
    
    if (parent.childidx == nodeidx){
      parent.childidx = node.siblingidx;
      found = true;
    }
    else if (isValid(parent.childidx)){
      nodeID child = parent.childidx;
      while(isValid(getNode(child).siblingidx)){
        if (getNode(child).siblingidx == nodeidx){
          getNode(child).siblingidx = node.siblingidx;
          found = true;
          break;
        }
        child = getNode(child).siblingidx;
      }
    }

    assert(found && "node was not a child of parent");
    node.siblingidx = INVALID;
    updateLeafNode(node.parentidx);
  }

  if (isValid(parentidx)){
    // link to new
    Node& parent = getNode(parentidx);
    node.siblingidx = parent.childidx;
    parent.childidx = nodeidx;
    updateLeafNode(node.parentidx);
  }

  if (isNodeInTree(nodeidx)){
    updateLevelNode(nodeidx, isNodeInTree(parentidx) ? parentidx : INVALID);
  }

  node.parentidx = parentidx;
}

void NodeTree::addToTree( nodeID nodeidx )
{
  assert (isValid(nodeidx) && nodeidx != ROOT);

  const Node &node = getNode(nodeidx);
  assert (!isNodeInTree(nodeidx)        && "must not be already added to tree");
  assert ( isNodeInTree(node.parentidx) && "parent must be already added to tree");

  updateLevelNode(nodeidx,node.parentidx);
}

void NodeTree::removeFromTree( nodeID nodeidx )
{
  assert (isValid(nodeidx) && nodeidx != ROOT);
  const Node &node = getNode(nodeidx);
  assert (isNodeInTree(nodeidx) && "must be already added to tree");

  updateLevelNode(nodeidx,INVALID);
}

void NodeTree::addToLevel( nodeID nodeidx, nodeID parentidx )
{
  Node&   node        = getNode(nodeidx);
  const Node& parent  = getNode(parentidx);
  Level&  level       = getLevel(parent.level+1);

  level.changeID++;

  node.levelidx = (lvlID)level.nodes.size();
  node.level    = parent.level+1;
  level.nodes.push_back(nodeidx);

  if (!isValid(node.childidx)){
    addLeafNode(nodeidx);
  }

  m_levelsUsed = node.level+1 > m_levelsUsed ? node.level+1 : m_levelsUsed;

  m_nodesActive++;
}

void NodeTree::removeFromLevel( nodeID nodeidx )
{
  Node&   node  = getNode(nodeidx);
  Level&  level = getLevel(node.level);

  level.changeID++;

  level.nodes[node.levelidx] = level.nodes[level.nodes.size()-1];
  getNode(level.nodes[node.levelidx]).levelidx = node.levelidx;
  level.nodes.pop_back();

  if (isValid(node.leafidx)){
    removeLeafNode(nodeidx);
  }

  if (node.level+1 == m_levelsUsed && level.nodes.empty()){
    m_levelsUsed--;
  }

  node.level    = -1;
  node.levelidx = INVALID;
  node.leafidx  = INVALID;

  m_nodesActive--;
}

void NodeTree::removeLeafNode( nodeID nodeidx )
{
  assert(isNodeInTree(nodeidx));
  Node& node    = getNode(nodeidx);
  Level& level  = getLevel(node.level);
  // remove
  level.leaves[node.leafidx] = level.leaves[level.leaves.size()-1];
  getNode(level.leaves[node.leafidx]).leafidx = node.leafidx;
  level.leaves.pop_back();
}

void NodeTree::addLeafNode( nodeID nodeidx )
{
  assert(isNodeInTree(nodeidx));
  Node& node    = getNode(nodeidx);
  Level& level  = getLevel(node.level);
  // add
  node.leafidx = (lvlID)level.leaves.size();
  level.leaves.push_back(nodeidx);
}

void NodeTree::updateLeafNode( nodeID nodeidx )
{
  if (!isNodeInTree(nodeidx))
    return;

  Node& node    = getNode(nodeidx);
  if (!isValid(node.childidx) && isValid(node.leafidx)){
    removeLeafNode(nodeidx);
  }
  else if (isValid(node.childidx) && !isValid(node.leafidx)){
    addLeafNode(nodeidx);
  }
}

void NodeTree::updateLevelNode( nodeID nodeidx, nodeID parentidx )
{
  // at this point node.parentidx is still the old value
  Node &node = getNode(nodeidx);

  // update level parent buffer to reflect last state always
  m_treeCompactNodes[nodeidx].parent = parentidx;
  m_treeCompactChangeID++;

  if (isValid(node.levelidx)){
    // already active
    if (isValid(parentidx)){
      const Node& parent = getNode(parentidx);
      int oldlevel = node.level;
      int newlevel = parent.level + 1;

      // we remain in the same level and only our parent has changed
      if (oldlevel == newlevel){
        return;
      }

      removeFromLevel(nodeidx);
      addToLevel(nodeidx,parentidx);
    }
    else{
      removeFromLevel(nodeidx);
    }
  }
  else if (isValid(parentidx)){
    // was inactive 
    // add to level
    addToLevel(nodeidx,parentidx);
  }

  m_treeCompactNodes[nodeidx].level  = node.level;

  nodeID child = node.childidx;
  while (isValid(child)){
    updateLevelNode(child, isValid(parentidx) ? nodeidx : INVALID );
    child = getNode(child).siblingidx;
  }
}

void NodeTree::reserve( int numNodes )
{
  m_nodes.reserve( numNodes );
  m_treeCompactNodes.reserve( numNodes );
}

void NodeTree::create( int numNodes )
{
  Node node;
  clearNode(node);

  m_nodes.resize( numNodes, node );
  m_treeCompactNodes.resize( numNodes, compactID() );
}

void NodeTree::clear()
{
  m_nodesActive = 0;
  m_levelsUsed  = 0;
  m_treeCompactChangeID = 0;
  m_levels.clear();
  m_nodes.clear();
  m_treeCompactNodes.clear();
}



================================================
FILE: nodetree.hpp
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */


/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */

#pragma once

#include <vector>

class NodeTree {
public:
  enum Flags {
    INVALID = 0xFFFFFFFF,
    ROOT = 0x7FFFFFFF,
    LEVELBITS = 8,
    PARENTBITS = 32 - LEVELBITS
  };

  static constexpr unsigned INVALID_LEVEL = (1 << LEVELBITS) - 1;
  static constexpr unsigned INVALID_PARENT = (1 << PARENTBITS) - 1;

  struct compactID {
    unsigned level : LEVELBITS;
    unsigned parent : PARENTBITS;

    compactID(){
      level = INVALID_LEVEL;
      parent = INVALID_PARENT;
    }
  };
  typedef unsigned int nodeID;
  typedef unsigned int lvlID;


  struct Level {
    unsigned int          changeID;
    std::vector<nodeID>   nodes;
    std::vector<nodeID>   leaves;

    Level(){
      changeID = 0;
    }
  };

  struct Node {
    nodeID                parentidx;
    lvlID                 levelidx;
    lvlID                 leafidx;
    int                   level;
    nodeID                childidx;
    nodeID                siblingidx;
  };

private:

  Node                              m_root;

  // general nodes
  std::vector<Node>                 m_nodes;
  std::vector<nodeID>               m_unusedNodes;

  // actual nodes added to tree
  std::vector<compactID>            m_treeCompactNodes;
  std::vector<Level>                m_levels;
  unsigned int                      m_treeCompactChangeID;
  int                               m_nodesActive;
  int                               m_levelsUsed;

public:
  NodeTree();

  const Level*  getUsedLevel(int level) const;
  inline int getNumUsedLevel() const 
  {
    return m_levelsUsed;
  }

  unsigned int getTreeParentChangeID() const;
  const std::vector<compactID>& getTreeCompactNodes() const;

  inline nodeID getTreeRoot()
  {
    return ROOT;
  }

  inline const Node& getNode(nodeID nodeidx) const
  {
    if (nodeidx == ROOT) return m_root;
    else                 return m_nodes[nodeidx];
  }

  inline bool  isValid(unsigned int id)
  {
    return id != INVALID;
  }

  inline bool  isNodeInTree(nodeID nodeidx)
  {
    return isValid(nodeidx) && isValid(getNode(nodeidx).levelidx);
  }

  inline nodeID  getParentNode(nodeID nodeidx) const
  {
    return getNode(nodeidx).parentidx;
  }

  nodeID  createNode();

  void    deleteNode(nodeID nodeidx);

  void    setNodeParent(nodeID nodeidx, nodeID parentidx);

  void    addToTree(nodeID nodeidx);

  void    removeFromTree(nodeID nodeidx);

  void    reserve(int numNodes);

  void    create(int numNodes);

  void    clear();

  int     getNumActiveNodes() const {
    return m_nodesActive;
  }

private:

  inline Level& getLevel(int level)
  {
    if ((int)m_levels.size() < level+1){
      m_levels.resize(level+1);
    }
    return m_levels[level];
  }

  inline Node& getNode(nodeID nodeidx)
  {
    if (nodeidx == ROOT) return m_root;
    else                 return m_nodes[nodeidx];
  }

  void addToLevel(nodeID nodeidx, nodeID parentidx);

  void removeFromLevel(nodeID nodeidx);

  void removeLeafNode(nodeID nodeidx);

  void addLeafNode(nodeID nodeidx);

  void updateLeafNode(nodeID nodeidx);

  void updateLevelNode(nodeID nodeidx, nodeID parentidx);

};






================================================
FILE: nvtoken.cpp
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */


/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */

#include "nvtoken.hpp"

namespace nvtoken
{

  //////////////////////////////////////////////////////////////////////////
  // generic

  GLuint   s_nvcmdlist_header[NVTOKEN_TYPES] = {0};
  GLuint   s_nvcmdlist_headerSizes[NVTOKEN_TYPES] = {0};
  GLushort s_nvcmdlist_stages[NVTOKEN_STAGES] = {0};
  bool     s_nvcmdlist_bindless  = false;
  
  static inline GLuint nvtokenHeaderSW(GLuint type, GLuint size){
    return type | (size<<16);
  }
  
  static inline GLenum nvtokenHeaderCommandSW(GLuint header)
  {
    return header & 0xFFFF;
  }

  static inline GLuint nvtokenHeaderSizeSW(GLuint header)
  {
    return header>>16;
  }

  static inline GLenum nvtokenHeaderCommand(GLuint header)
  {
    for (int i = 0; i < NVTOKEN_TYPES; i++){
      if (header == s_nvcmdlist_header[i]) return i;
    }

    assert(0 && "can't find header");
    return -1;
  }

  template <class T>
  static void nvtokenRegisterSize()
  {
    s_nvcmdlist_headerSizes[T::ID] = sizeof(T);
  }

  void nvtokenInitInternals( bool hwsupport, bool bindlessSupport)
  {
    assert( !hwsupport || (hwsupport && bindlessSupport) );

    nvtokenRegisterSize<NVTokenTerminate>();
    nvtokenRegisterSize<NVTokenNop>();
    nvtokenRegisterSize<NVTokenDrawElems>();
    nvtokenRegisterSize<NVTokenDrawArrays>();
    nvtokenRegisterSize<NVTokenDrawElemsStrip>();
    nvtokenRegisterSize<NVTokenDrawArraysStrip>();
    nvtokenRegisterSize<NVTokenDrawElemsInstanced>();
    nvtokenRegisterSize<NVTokenDrawArraysInstanced>();
    nvtokenRegisterSize<NVTokenVbo>();
    nvtokenRegisterSize<NVTokenIbo>();
    nvtokenRegisterSize<NVTokenUbo>();
    nvtokenRegisterSize<NVTokenLineWidth>();
    nvtokenRegisterSize<NVTokenPolygonOffset>();
    nvtokenRegisterSize<NVTokenScissor>();
    nvtokenRegisterSize<NVTokenBlendColor>();
    nvtokenRegisterSize<NVTokenViewport>();
    nvtokenRegisterSize<NVTokenAlphaRef>();
    nvtokenRegisterSize<NVTokenStencilRef>();
    nvtokenRegisterSize<NVTokenFrontFace>();
    
    for (int i = 0; i < NVTOKEN_TYPES; i++){
      GLuint sz = s_nvcmdlist_headerSizes[i];
      assert(sz);
    }
    
    s_nvcmdlist_bindless  = bindlessSupport;
    
    if (hwsupport){
      for (int i = 0; i < NVTOKEN_TYPES; i++){
        s_nvcmdlist_header[i] = glGetCommandHeaderNV(i,s_nvcmdlist_headerSizes[i]);
      }
      s_nvcmdlist_stages[NVTOKEN_STAGE_VERTEX] = glGetStageIndexNV(GL_VERTEX_SHADER);
      s_nvcmdlist_stages[NVTOKEN_STAGE_TESS_CONTROL] = glGetStageIndexNV(GL_TESS_CONTROL_SHADER);
      s_nvcmdlist_stages[NVTOKEN_STAGE_TESS_EVALUATION] = glGetStageIndexNV(GL_TESS_EVALUATION_SHADER);
      s_nvcmdlist_stages[NVTOKEN_STAGE_GEOMETRY] = glGetStageIndexNV(GL_GEOMETRY_SHADER);
      s_nvcmdlist_stages[NVTOKEN_STAGE_FRAGMENT] = glGetStageIndexNV(GL_FRAGMENT_SHADER);
    }
    else{
      for (int i = 0; i < NVTOKEN_TYPES; i++){
        s_nvcmdlist_header[i] = nvtokenHeaderSW(i,s_nvcmdlist_headerSizes[i]);
      }
      for (int i = 0; i < NVTOKEN_STAGES; i++){
        s_nvcmdlist_stages[i] = i;
      }
    }
  }

#define TOSTRING(a)  case a: return #a;
  const char* nvtokenCommandToString(GLenum type){
    switch  (type){
      TOSTRING(GL_NOP_COMMAND_NV                   );
      TOSTRING(GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV);
      TOSTRING(GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV  );
      TOSTRING(GL_ELEMENT_ADDRESS_COMMAND_NV       );
      TOSTRING(GL_ATTRIBUTE_ADDRESS_COMMAND_NV     );
      TOSTRING(GL_UNIFORM_ADDRESS_COMMAND_NV       );
      TOSTRING(GL_BLEND_COLOR_COMMAND_NV           );
      TOSTRING(GL_STENCIL_REF_COMMAND_NV           );
      TOSTRING(GL_TERMINATE_SEQUENCE_COMMAND_NV    );
      TOSTRING(GL_LINE_WIDTH_COMMAND_NV            );
      TOSTRING(GL_POLYGON_OFFSET_COMMAND_NV        );
      TOSTRING(GL_ALPHA_REF_COMMAND_NV             );
      TOSTRING(GL_VIEWPORT_COMMAND_NV              );
      TOSTRING(GL_SCISSOR_COMMAND_NV               );
      TOSTRING(GL_DRAW_ELEMENTS_COMMAND_NV         );
      TOSTRING(GL_DRAW_ARRAYS_COMMAND_NV           );
      TOSTRING(GL_DRAW_ELEMENTS_STRIP_COMMAND_NV   );
      TOSTRING(GL_DRAW_ARRAYS_STRIP_COMMAND_NV     );
    }
    return NULL;
  }

  //////////////////////////////////////////////////////////////////////////


  void nvtokenGetStats( const void* NV_RESTRICT stream, size_t streamSize, int stats[NVTOKEN_TYPES] )
  {
    const GLubyte* NV_RESTRICT current = (GLubyte*)stream;
    const GLubyte* streamEnd = current + streamSize;

    while (current < streamEnd){
      const GLuint*             header  = (const GLuint*)current;

      GLenum type = nvtokenHeaderCommand(*header);
      stats[type]++;

      current += s_nvcmdlist_headerSizes[type];
    }
  }


  // Emulation related

  static inline GLenum nvtokenDrawCommandSequenceSW( const void* NV_RESTRICT stream, size_t streamSize, GLenum mode, GLenum type, const StateSystem::State& state )
  {
    const GLubyte* NV_RESTRICT current = (GLubyte*)stream;
    const GLubyte* streamEnd = current + streamSize;

    GLenum modeStrip;
    if      (mode == GL_LINES)                modeStrip = GL_LINE_STRIP;
    else if (mode == GL_TRIANGLES)            modeStrip = GL_TRIANGLE_STRIP;
    /*else if (mode == GL_QUADS)                modeStrip = GL_QUAD_STRIP;*/
    else if (mode == GL_LINES_ADJACENCY)      modeStrip = GL_LINE_STRIP_ADJACENCY;
    else if (mode == GL_TRIANGLES_ADJACENCY)  modeStrip = GL_TRIANGLE_STRIP_ADJACENCY;
    else    modeStrip = mode;

    GLenum modeSpecial;
    if      (mode == GL_LINES)      modeSpecial = GL_LINE_LOOP;
    else if (mode == GL_TRIANGLES)  modeSpecial = GL_TRIANGLE_FAN;
    else    modeSpecial = mode;

    while (current < streamEnd){
      const GLuint*             header  = (const GLuint*)current;

      GLenum cmdtype = nvtokenHeaderCommand(*header);
      // if you always use emulation on non-native tokens you can use 
      // cmdtype = nvtokenHeaderCommandSW(header->encoded)
      switch(cmdtype){
      case GL_TERMINATE_SEQUENCE_COMMAND_NV:
        {
          return type;
        }
        break;
      case GL_NOP_COMMAND_NV:
        {
        }
        break;
      case GL_DRAW_ELEMENTS_COMMAND_NV:
        {
          const DrawElementsCommandNV* cmd = (const DrawElementsCommandNV*)current;
          glDrawElementsBaseVertex(mode, cmd->count, type, (const GLvoid*)(cmd->firstIndex * sizeof(GLuint)), cmd->baseVertex);
        }
        break;
      case GL_DRAW_ARRAYS_COMMAND_NV:
        {
          const DrawArraysCommandNV* cmd = (const DrawArraysCommandNV*)current;
          glDrawArrays(mode, cmd->first, cmd->count);
        }
        break;
      case GL_DRAW_ELEMENTS_STRIP_COMMAND_NV:
        {
          const DrawElementsCommandNV* cmd = (const DrawElementsCommandNV*)current;
          glDrawElementsBaseVertex(modeStrip, cmd->count, type, (const GLvoid*)(cmd->firstIndex * sizeof(GLuint)), cmd->baseVertex);
        }
        break;
      case GL_DRAW_ARRAYS_STRIP_COMMAND_NV:
        {
          const DrawArraysCommandNV* cmd = (const DrawArraysCommandNV*)current;
          glDrawArrays(modeStrip, cmd->first, cmd->count);
        }
        break;
      case GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV:
        {
          const DrawElementsInstancedCommandNV* cmd = (const DrawElementsInstancedCommandNV*)current;

          assert (cmd->mode == mode || cmd->mode == modeStrip || cmd->mode == modeSpecial);

          glDrawElementsIndirect(cmd->mode, type, &cmd->count);
        }
        break;
      case GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV:
        {
          const DrawArraysInstancedCommandNV* cmd = (const DrawArraysInstancedCommandNV*)current;

          assert (cmd->mode == mode || cmd->mode == modeStrip || cmd->mode == modeSpecial);

          glDrawArraysIndirect(cmd->mode, &cmd->count);
        }
        break;
      case GL_ELEMENT_ADDRESS_COMMAND_NV:
        {
          const ElementAddressCommandNV* cmd = (const ElementAddressCommandNV*)current;
          type = cmd->typeSizeInByte == 4 ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT;
          if (s_nvcmdlist_bindless){
            glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, GLuint64(cmd->addressLo) | (GLuint64(cmd->addressHi)<<32), 0x7FFFFFFF);
          }
          else{
            const ElementAddressCommandEMU* cmd = (const ElementAddressCommandEMU*)current;
            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, cmd->buffer);
          }
        }
        break;
      case GL_ATTRIBUTE_ADDRESS_COMMAND_NV:
        {
          if (s_nvcmdlist_bindless){
            const AttributeAddressCommandNV* cmd = (const AttributeAddressCommandNV*)current;
            glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, cmd->index, GLuint64(cmd->addressLo) | (GLuint64(cmd->addressHi)<<32), 0x7FFFFFFF);
          }
          else{
            const AttributeAddressCommandEMU* cmd = (const AttributeAddressCommandEMU*)current;
            glBindVertexBuffer(cmd->index, cmd->buffer, cmd->offset, state.vertexformat.bindings[cmd->index].stride);
          }
        }
        break;
      case GL_UNIFORM_ADDRESS_COMMAND_NV:
        {
           if (s_nvcmdlist_bindless){
            const UniformAddressCommandNV* cmd = (const UniformAddressCommandNV*)current;
            glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV, cmd->index, GLuint64(cmd->addressLo) | (GLuint64(cmd->addressHi)<<32), 0x10000);
          }
          else{
            const UniformAddressCommandEMU* cmd = (const UniformAddressCommandEMU*)current;
            glBindBufferRange(GL_UNIFORM_BUFFER,cmd->index, cmd->buffer, cmd->offset256 * 256, cmd->size4*4);
          }
        }
        break;
      case GL_BLEND_COLOR_COMMAND_NV:
        {
          const BlendColorCommandNV* cmd = (const BlendColorCommandNV*)current;
          glBlendColor(cmd->red,cmd->green,cmd->blue,cmd->alpha);
        }
        break;
      case GL_STENCIL_REF_COMMAND_NV:
        {
          const StencilRefCommandNV* cmd = (const StencilRefCommandNV*)current;
          glStencilFuncSeparate(GL_FRONT, state.stencil.funcs[StateSystem::FACE_FRONT].func, cmd->frontStencilRef, state.stencil.funcs[StateSystem::FACE_FRONT].mask);
          glStencilFuncSeparate(GL_BACK,  state.stencil.funcs[StateSystem::FACE_BACK ].func, cmd->backStencilRef,  state.stencil.funcs[StateSystem::FACE_BACK ].mask);
        }
        break;

      case GL_LINE_WIDTH_COMMAND_NV:
        {
          const LineWidthCommandNV* cmd = (const LineWidthCommandNV*)current;
          glLineWidth(cmd->lineWidth);
        }
        break;
      case GL_POLYGON_OFFSET_COMMAND_NV:
        {
          const PolygonOffsetCommandNV* cmd = (const PolygonOffsetCommandNV*)current;
          glPolygonOffset(cmd->scale,cmd->bias);
        }
        break;
      case GL_ALPHA_REF_COMMAND_NV:
        {/*
          const AlphaRefCommandNV* cmd = (const AlphaRefCommandNV*)current;
          glAlphaFunc(state.alpha.mode, cmd->alphaRef);
          */
        }
        break;
      case GL_VIEWPORT_COMMAND_NV:
        {
          const ViewportCommandNV* cmd = (const ViewportCommandNV*)current;
          glViewport(cmd->x, cmd->y, cmd->width, cmd->height);
        }
        break;
      case GL_SCISSOR_COMMAND_NV:
        {
          const ScissorCommandNV* cmd = (const ScissorCommandNV*)current;
          glScissor(cmd->x,cmd->y,cmd->width,cmd->height);
        }
        break;
      case GL_FRONT_FACE_COMMAND_NV:
        {
          FrontFaceCommandNV* cmd = (FrontFaceCommandNV*)current;
          glFrontFace(cmd->frontFace?GL_CW:GL_CCW);
        }
        break;
      }


      GLuint tokenSize = s_nvcmdlist_headerSizes[cmdtype];
      assert(tokenSize);

      current += tokenSize;

    }
    return type;
  }

  void nvtokenDrawCommandsSW(GLenum mode, const void* NV_RESTRICT stream, size_t streamSize, 
    const GLintptr* NV_RESTRICT offsets, const GLsizei* NV_RESTRICT sizes, 
    GLuint count, 
    StateSystem::State &state)
  {
    const char* NV_RESTRICT tokens = (const char*)stream;
    GLenum type = GL_UNSIGNED_SHORT;
    for (GLuint i = 0; i < count; i++)
    {
      size_t offset = offsets[i];
      size_t size   = sizes[i];

      assert(size + offset <= streamSize);

      type = nvtokenDrawCommandSequenceSW(&tokens[offset], size, mode, type, state);
    }

  }

#if NVTOKEN_STATESYSTEM
  void nvtokenDrawCommandsStatesSW(const void* NV_RESTRICT stream, size_t streamSize, 
    const GLintptr* NV_RESTRICT offsets, const GLsizei* NV_RESTRICT sizes, 
    const GLuint* NV_RESTRICT states, const GLuint* NV_RESTRICT fbos, GLuint count, 
    StateSystem &stateSystem)
  {
    int lastFbo = ~0;
    const char* NV_RESTRICT tokens = (const char*)stream;

    StateSystem::StateID lastID;

    GLenum type = GL_UNSIGNED_SHORT;
    for (GLuint i = 0; i < count; i++)
    {
      GLuint fbo;

      StateSystem::StateID curID = states[i];
      const StateSystem::State&  state = stateSystem.get(curID);

      if (fbos[i]){
        fbo = fbos[i];
      }
      else{
        fbo = state.fbo.fboDraw;
      }

      if (fbo != lastFbo){
        glBindFramebuffer(GL_FRAMEBUFFER, fbo);
        lastFbo = fbo;
      }

      if (i == 0){
        stateSystem.applyGL( curID, true ); // quite costly
      }
      else {
        stateSystem.applyGL( curID, lastID, true );
      }
      lastID = curID;

      size_t offset = offsets[i];
      size_t size   = sizes[i];

      GLenum mode = state.basePrimitiveMode;

      assert(size + offset <= streamSize);

      type = nvtokenDrawCommandSequenceSW(&tokens[offset], size, mode, type, state);
    }
  }
#endif
}


================================================
FILE: nvtoken.hpp
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */


/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */


#include <assert.h>
#include <string>
#include <vector>

#define NVTOKEN_STATESYSTEM 1

#include "platform.h"
#include <nvgl/extensions_gl.hpp>
#if NVTOKEN_STATESYSTEM
// not needed if emulation is not used, or implemented differently
#include "statesystem.hpp"
#else
namespace StateSystem {
  // Minimal emulation layer
  enum Faces {
    FACE_FRONT,
    FACE_BACK,
    MAX_FACES,
  };
  struct State {
    struct {
      struct {
        GLsizei stride;
      }bindings[16];
    }vertexformat;

    struct {
      GLenum mode;
    }alpha;

    struct {
      struct {
        GLenum func;
        GLuint mask;
      }funcs[MAX_FACES];
    }stencil;
  };
}
#endif


namespace nvtoken
{

  //////////////////////////////////////////////////////////////////////////
  // generic

  // not the cleanest way
  #define NVTOKEN_TYPES (GL_FRONT_FACE_COMMAND_NV+1)

  enum NVTokenShaderStage {
    NVTOKEN_STAGE_VERTEX,
    NVTOKEN_STAGE_TESS_CONTROL,
    NVTOKEN_STAGE_TESS_EVALUATION,
    NVTOKEN_STAGE_GEOMETRY,
    NVTOKEN_STAGE_FRAGMENT,
    NVTOKEN_STAGES,
  };

  extern bool     s_nvcmdlist_bindless;
  extern GLuint   s_nvcmdlist_header[NVTOKEN_TYPES];
  extern GLuint   s_nvcmdlist_headerSizes[NVTOKEN_TYPES];
  extern GLushort s_nvcmdlist_stages[NVTOKEN_STAGES];
  
  class NVPointerStream {
  public:
    size_t          m_max;
    unsigned char*  m_begin;
    unsigned char*  m_end;
    unsigned char* NV_RESTRICT m_cur;

    void init(void* data, size_t size)
    {
      m_begin = (unsigned char*)data;
      m_end   = m_begin + size;
      m_cur   = m_begin;
      m_max   = size;
    }

    size_t size() const
    {
      return m_cur - m_begin;
    }

    size_t  capacity() const
    {
      return m_max;
    }
  };

  struct NVTokenSequence {
    std::vector<GLintptr>  offsets;
    std::vector<GLsizei>   sizes;
    std::vector<GLuint>    states;
    std::vector<GLuint>    fbos;
  };

#pragma pack(push,1)

  typedef struct {
    GLuint   header;
    GLuint   buffer;
    GLuint   _pad;
    GLuint   typeSizeInByte;
  } ElementAddressCommandEMU;

  typedef struct {
    GLuint   header;
    GLuint   index;
    GLuint   buffer;
    GLuint   offset;
  } AttributeAddressCommandEMU;

  typedef struct {
    GLuint      header;
    GLushort    index;
    GLushort    stage;
    GLuint      buffer;
    GLushort    offset256;
    GLushort    size4;
  } UniformAddressCommandEMU;


  struct NVTokenNop {
    static const GLenum   ID = GL_NOP_COMMAND_NV;

    NOPCommandNV      cmd;

    NVTokenNop() {
      cmd.header  = s_nvcmdlist_header[ID];
    }
  };

  struct NVTokenTerminate {
    static const GLenum   ID = GL_TERMINATE_SEQUENCE_COMMAND_NV;

    TerminateSequenceCommandNV      cmd;

    NVTokenTerminate() {
      cmd.header  = s_nvcmdlist_header[ID];
    }
  };

  struct NVTokenDrawElemsInstanced {
    static const GLenum   ID = GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV;

    DrawElementsInstancedCommandNV   cmd;

    NVTokenDrawElemsInstanced() {
      cmd.mode = GL_TRIANGLES;
      cmd.baseInstance = 0;
      cmd.baseVertex = 0;
      cmd.firstIndex = 0;
      cmd.count = 0;
      cmd.instanceCount = 1;

      cmd.header  = s_nvcmdlist_header[ID];
    }
    
    void setMode(GLenum primmode) {
      cmd.mode = primmode;
    }

    void setParams(GLuint count, GLuint firstIndex=0, GLuint baseVertex=0)
    {
      cmd.count = count;
      cmd.firstIndex = firstIndex;
      cmd.baseVertex = baseVertex;
    }

    void setInstances(GLuint count, GLuint baseInstance=0){
      cmd.baseInstance  = baseInstance;
      cmd.instanceCount = count;
    }
  };

  struct NVTokenDrawArraysInstanced {
    static const GLenum   ID = GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV;

    DrawArraysInstancedCommandNV          cmd;

    NVTokenDrawArraysInstanced() {
      cmd.mode = GL_TRIANGLES;
      cmd.baseInstance = 0;
      cmd.first = 0;
      cmd.count = 0;
      cmd.instanceCount = 1;

      cmd.header  = s_nvcmdlist_header[ID];
    }
    
    void setMode(GLenum primmode) {
      cmd.mode = primmode;
    }

    void setParams(GLuint count, GLuint first=0)
    {
      cmd.count = count;
      cmd.first = first;
    }

    void setInstances(GLuint count, GLuint baseInstance=0){
      cmd.baseInstance  = baseInstance;
      cmd.instanceCount = count;
    }
  };

  struct NVTokenDrawElems {
    static const GLenum   ID = GL_DRAW_ELEMENTS_COMMAND_NV;

    DrawElementsCommandNV   cmd;

    NVTokenDrawElems() {
      cmd.baseVertex = 0;
      cmd.firstIndex = 0;
      cmd.count = 0;

      cmd.header  = s_nvcmdlist_header[ID];
    }

    void setParams(GLuint count, GLuint firstIndex=0, GLuint baseVertex=0)
    {
      cmd.count = count;
      cmd.firstIndex = firstIndex;
      cmd.baseVertex = baseVertex;
    }
    
    void setMode(GLenum primmode) {
      assert(primmode != GL_TRIANGLE_FAN && /* primmode != GL_POLYGON && */ primmode != GL_LINE_LOOP);
      
      if (primmode == GL_LINE_STRIP || primmode == GL_TRIANGLE_STRIP || /* primmode == GL_QUAD_STRIP || */
          primmode == GL_LINE_STRIP_ADJACENCY || primmode == GL_TRIANGLE_STRIP_ADJACENCY)
      {
        cmd.header = s_nvcmdlist_header[GL_DRAW_ELEMENTS_STRIP_COMMAND_NV];
      }
      else
      {
        cmd.header = s_nvcmdlist_header[GL_DRAW_ELEMENTS_COMMAND_NV];
      }
    }
  };

  struct NVTokenDrawArrays {
    static const GLenum   ID = GL_DRAW_ARRAYS_COMMAND_NV;

    DrawArraysCommandNV   cmd;

    NVTokenDrawArrays() {
      cmd.first = 0;
      cmd.count = 0;

      cmd.header  = s_nvcmdlist_header[ID];
    }

    void setParams(GLuint count, GLuint first=0)
    {
      cmd.count = count;
      cmd.first = first;
    }
    
    void setMode(GLenum primmode) {
      assert(primmode != GL_TRIANGLE_FAN && /* primmode != GL_POLYGON && */ primmode != GL_LINE_LOOP);
      
      if (primmode == GL_LINE_STRIP || primmode == GL_TRIANGLE_STRIP || /* primmode == GL_QUAD_STRIP || */
          primmode == GL_LINE_STRIP_ADJACENCY || primmode == GL_TRIANGLE_STRIP_ADJACENCY)
      {
        cmd.header = s_nvcmdlist_header[GL_DRAW_ARRAYS_STRIP_COMMAND_NV];
      }
      else
      {
        cmd.header = s_nvcmdlist_header[GL_DRAW_ARRAYS_COMMAND_NV];
      }
    }
  };

  struct NVTokenDrawElemsStrip {
    static const GLenum   ID = GL_DRAW_ELEMENTS_STRIP_COMMAND_NV;

    DrawElementsCommandNV   cmd;

    NVTokenDrawElemsStrip() {
      cmd.baseVertex = 0;
      cmd.firstIndex = 0;
      cmd.count = 0;

      cmd.header  = s_nvcmdlist_header[ID];
    }

    void setParams(GLuint count, GLuint firstIndex=0, GLuint baseVertex=0)
    {
      cmd.count = count;
      cmd.firstIndex = firstIndex;
      cmd.baseVertex = baseVertex;
    }
  };

  struct NVTokenDrawArraysStrip {
    static const GLenum   ID = GL_DRAW_ARRAYS_STRIP_COMMAND_NV;

    DrawArraysCommandNV   cmd;

    NVTokenDrawArraysStrip() {
      cmd.first = 0;
      cmd.count = 0;

      cmd.header  = s_nvcmdlist_header[ID];
    }

    void setParams(GLuint count, GLuint first=0)
    {
      cmd.count = count;
      cmd.first = first;
    }
  };

  struct NVTokenVbo {
    static const GLenum   ID = GL_ATTRIBUTE_ADDRESS_COMMAND_NV;

    union {
      AttributeAddressCommandNV   cmd;
      AttributeAddressCommandEMU  cmdEMU;
    };

    void setBinding(GLuint idx){
      cmd.index = idx;
    }

    void setBuffer(GLuint buffer, GLuint64 address, GLuint offset)
    {
      if (s_nvcmdlist_bindless){
        address += offset;
        cmd.addressLo = GLuint(address & 0xFFFFFFFF);
        cmd.addressHi = GLuint(address >> 32);
      }
      else{
        cmdEMU.buffer = buffer;
        cmdEMU.offset = offset;
      }
    }

    NVTokenVbo() {
      cmd.header  = s_nvcmdlist_header[ID];
    }
  };

  struct NVTokenIbo {
    static const GLenum   ID = GL_ELEMENT_ADDRESS_COMMAND_NV;

    union{
      ElementAddressCommandNV     cmd;
      ElementAddressCommandEMU    cmdEMU;
    };

    void setType(GLenum type){
      if (type == GL_UNSIGNED_BYTE){
        cmd.typeSizeInByte = 1;
      }
      else if (type == GL_UNSIGNED_SHORT){
        cmd.typeSizeInByte = 2;
      }
      else if (type == GL_UNSIGNED_INT){
        cmd.typeSizeInByte = 4;
      }
      else{
        assert(0 && "illegal type");
      }
    }

    void setBuffer(GLuint buffer, GLuint64 address)
    {
      if (s_nvcmdlist_bindless){
        cmd.addressLo = GLuint(address & 0xFFFFFFFF);
        cmd.addressHi = GLuint(address >> 32);
      }
      else{
        cmdEMU.buffer = buffer;
        cmdEMU._pad   = 0;
      }
    }
    
    NVTokenIbo() {
      cmd.header  = s_nvcmdlist_header[ID];
    }
  };

  struct NVTokenUbo {
    static const GLenum   ID = GL_UNIFORM_ADDRESS_COMMAND_NV;

    union{
      UniformAddressCommandNV   cmd;
      UniformAddressCommandEMU  cmdEMU;
    };

    void setBuffer(GLuint buffer, GLuint64 address, GLuint offset, GLuint size)
    {
      assert(size % 4 == 0 && offset % 256 == 0);
      if (s_nvcmdlist_bindless){
        address += offset;
        cmd.addressLo = GLuint(address & 0xFFFFFFFF);
        cmd.addressHi = GLuint(address >> 32);
      }
      else{
        cmdEMU.buffer = buffer;
        cmdEMU.offset256 = offset / 256;
        cmdEMU.size4     = size / 4;
      }
    }

    void setBinding(GLuint idx, NVTokenShaderStage stage){
      cmd.index = idx;
      cmd.stage = s_nvcmdlist_stages[stage];
    }
    
    NVTokenUbo() {
      cmd.header  = s_nvcmdlist_header[ID];
    }
  };

  struct NVTokenBlendColor{
    static const GLenum   ID = GL_BLEND_COLOR_COMMAND_NV;

    BlendColorCommandNV     cmd;

    NVTokenBlendColor() {
      cmd.header  = s_nvcmdlist_header[ID];
    }
  };

  struct NVTokenStencilRef{
    static const GLenum   ID = GL_STENCIL_REF_COMMAND_NV;

    StencilRefCommandNV cmd;

    NVTokenStencilRef() {
      cmd.header  = s_nvcmdlist_header[ID];
    }
  } ;

  struct NVTokenLineWidth{
    static const GLenum   ID = GL_LINE_WIDTH_COMMAND_NV;

    LineWidthCommandNV  cmd;

    NVTokenLineWidth() {
      cmd.header  = s_nvcmdlist_header[ID];
    }
  };

  struct NVTokenPolygonOffset{
    static const GLenum   ID = GL_POLYGON_OFFSET_COMMAND_NV;

    PolygonOffsetCommandNV  cmd;

    NVTokenPolygonOffset() {
      cmd.header  = s_nvcmdlist_header[ID];
    }
  };

  struct NVTokenAlphaRef{
    static const GLenum   ID = GL_ALPHA_REF_COMMAND_NV;

    AlphaRefCommandNV cmd;

    NVTokenAlphaRef() {
      cmd.header  = s_nvcmdlist_header[ID];
    }
  };

  struct NVTokenViewport{
    static const GLenum   ID = GL_VIEWPORT_COMMAND_NV;

    ViewportCommandNV cmd;

    NVTokenViewport() {
      cmd.header  = s_nvcmdlist_header[ID];
    }
  };

  struct NVTokenScissor {
    static const GLenum   ID = GL_SCISSOR_COMMAND_NV;

    ScissorCommandNV  cmd;

    NVTokenScissor() {
      cmd.header  = s_nvcmdlist_header[ID];
    }
  };

  struct NVTokenFrontFace {
    static const GLenum   ID = GL_FRONT_FACE_COMMAND_NV;

    FrontFaceCommandNV  cmd;

    NVTokenFrontFace() {
      cmd.header  = s_nvcmdlist_header[ID];
    }

    void setFrontFace(GLenum winding){
      cmd.frontFace = winding == GL_CCW;
    }
  };

#pragma pack(pop)

  template <class T>
  void nvtokenMakeNop(T & token){
    NVTokenNop *nop = (NVTokenNop*)&token;
    for (size_t i = 0; i < (sizeof(T))/4; i++){
      nop[i] = NVTokenNop();
    }
  }

  template <class T>
  size_t nvtokenEnqueue(std::string& queue, T& data)
  {
    size_t offset = queue.size();
    std::string cmd = std::string((const char*)&data,sizeof(T));

    queue += cmd;

    return offset;
  }

  template <class T>
  size_t nvtokenEnqueue(NVPointerStream& queue, T& data)
  {
    assert(queue.m_cur + sizeof(T) <= queue.m_end);
    size_t offset = queue.m_cur - queue.m_begin;

    memcpy(queue.m_cur,&data,sizeof(T));
    queue.m_cur += sizeof(T);

    return offset;
  }
  
  //////////////////////////////////////////////////////////
  
  void        nvtokenInitInternals( bool hwsupport, bool bindlessSupport);
  const char* nvtokenCommandToString( GLenum type );
  void        nvtokenGetStats( const void* NV_RESTRICT stream, size_t streamSize, int stats[NVTOKEN_TYPES]);

  void nvtokenDrawCommandsSW(GLenum mode, const void* NV_RESTRICT stream, size_t streamSize, 
    const GLintptr* NV_RESTRICT offsets, const GLsizei* NV_RESTRICT sizes, 
    GLuint count, 
    StateSystem::State &state);

#if NVTOKEN_STATESYSTEM
  void nvtokenDrawCommandsStatesSW(const void* NV_RESTRICT stream, size_t streamSize, 
    const GLintptr* NV_RESTRICT offsets, const GLsizei* NV_RESTRICT sizes, 
    const GLuint* NV_RESTRICT states, const GLuint* NV_RESTRICT fbos, GLuint count, 
    StateSystem &stateSystem);
#endif
}


================================================
FILE: renderer.cpp
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */


/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */

#include <assert.h>
#include <algorithm>
#include "renderer.hpp"

#include "common.h"

#pragma pack(1)


namespace csfviewer
{

  //////////////////////////////////////////////////////////////////////////

  bool Renderer::s_bindless_ubo = false;

  CullingSystem   Renderer::s_cullsys;
  ScanSystem      Renderer::s_scansys;

  const char* toString( enum ShadeType st )
  {
    switch(st){
    case SHADE_SOLID: return "solid";
    case SHADE_SOLIDWIRE: return "solid w edges";
    case SHADE_SOLIDWIRE_SPLIT: return "solid w edges (split)";
    }

    return NULL;
  }


  static void FillCache( std::vector<Renderer::DrawItem>& drawItems, const CadScene::Object& obj, const CadScene::Geometry& geo,  bool solid, int objectIndex ) 
  {
    int begin = 0;
    const CadScene::DrawRangeCache &cache = solid ? obj.cacheSolid : obj.cacheWire;

    for (size_t s = 0; s < cache.state.size(); s++)
    {
      const CadScene::DrawStateInfo &state = cache.state[s];
      for (int d = 0; d < cache.stateCount[s]; d++){
        // evict
        Renderer::DrawItem di;
        di.geometryIndex = obj.geometryIndex;
        di.matrixIndex   = state.matrixIndex;
        di.materialIndex = state.materialIndex;
        di.objectIndex   = objectIndex;

        di.solid = solid;
        di.range.offset = cache.offsets[begin + d];
        di.range.count  = cache.counts [begin + d];

        drawItems.push_back(di);
      }
      begin += cache.stateCount[s];
    }
  }

  static void FillJoin( std::vector<Renderer::DrawItem>& drawItems, const CadScene::Object& obj, const CadScene::Geometry& geo,  bool solid, int objectIndex ) 
  {
    CadScene::DrawRange range;

    int lastMaterial = -1;
    int lastMatrix   = -1;

    for (size_t p = 0; p < obj.parts.size(); p++){
      const CadScene::ObjectPart&   part = obj.parts[p];
      const CadScene::GeometryPart& mesh = geo.parts[p];

      if (!part.active) continue;

      if (part.materialIndex != lastMaterial || part.matrixIndex != lastMatrix){

        if (range.count){
          // evict
          Renderer::DrawItem di;
          di.geometryIndex = obj.geometryIndex;
          di.matrixIndex   = lastMatrix;
          di.materialIndex = lastMaterial;
          di.objectIndex   = objectIndex;

          di.solid = solid;
          di.range = range;

          drawItems.push_back(di);
        }

        range = CadScene::DrawRange();

        lastMaterial = part.materialIndex;
        lastMatrix   = part.matrixIndex;
      }

      if (!range.count){
        range.offset = solid ? mesh.indexSolid.offset : mesh.indexWire.offset;
      }

      range.count += solid ? mesh.indexSolid.count : mesh.indexWire.count;
    }

    // evict
    Renderer::DrawItem di;
    di.geometryIndex = obj.geometryIndex;
    di.matrixIndex   = lastMatrix;
    di.materialIndex = lastMaterial;
    di.objectIndex   = objectIndex;

    di.solid = solid;
    di.range = range;

    drawItems.push_back(di);
  }

  static void FillIndividual( std::vector<Renderer::DrawItem>& drawItems, const CadScene::Object& obj, const CadScene::Geometry& geo, bool solid, int objectIndex ) 
  {
    for (size_t p = 0; p < obj.parts.size(); p++){
      const CadScene::ObjectPart&   part = obj.parts[p];
      const CadScene::GeometryPart& mesh = geo.parts[p];

      if (!part.active) continue;

      Renderer::DrawItem di;
      di.geometryIndex = obj.geometryIndex;
      di.matrixIndex   = part.matrixIndex;
      di.materialIndex = part.materialIndex;
      di.objectIndex   = objectIndex;

      di.solid = solid;
      di.range = solid ? mesh.indexSolid : mesh.indexWire;

      drawItems.push_back(di);
    }
  }


  void Renderer::fillDrawItems( std::vector<DrawItem>& drawItems, size_t from, size_t to, bool solid, bool wire )
  {
    const CadScene* NV_RESTRICT scene = m_scene;
    for (size_t i = from; i < scene->m_objects.size() && i < to; i++){
      const CadScene::Object& obj = scene->m_objects[i];
      const CadScene::Geometry& geo = scene->m_geometry[obj.geometryIndex];

      if (m_strategy == STRATEGY_GROUPS){
        if (solid)  FillCache(drawItems, obj, geo, true,  int(i));
        if (wire)   FillCache(drawItems, obj, geo, false, int(i));
      }
      else if (m_strategy == STRATEGY_JOIN) {
        if (solid)  FillJoin(drawItems, obj, geo, true,  int(i));
        if (wire)   FillJoin(drawItems, obj, geo, false, int(i));
      }
      else if (m_strategy == STRATEGY_INDIVIDUAL){
        if (solid)  FillIndividual(drawItems, obj, geo, true,  int(i));
        if (wire)   FillIndividual(drawItems, obj, geo, false, int(i));
      }
    }
  }

}





================================================
FILE: renderer.hpp
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */



#ifndef RENDERER_H__
#define RENDERER_H__

// bindless UBO
#ifndef GL_UNIFORM_BUFFER_UNIFIED_NV
#define GL_UNIFORM_BUFFER_UNIFIED_NV                        0x936E
#endif
#ifndef GL_UNIFORM_BUFFER_ADDRESS_NV
#define GL_UNIFORM_BUFFER_ADDRESS_NV                        0x936F
#endif
#ifndef GL_UNIFORM_BUFFER_LENGTH_NV
#define GL_UNIFORM_BUFFER_LENGTH_NV                         0x9370
#endif

#include "cadscene.hpp"
#include <NvFoundation.h>
#include <nvgl/programmanager_gl.hpp>
#include <nvgl/base_gl.hpp>
#include <nvh/profiler.hpp>
#include "cullingsystem.hpp"
#include "scansystem.hpp"

namespace csfviewer {
  #define USE_NOFILTER           0  // some renderers support turning off redundancy filter

  #define USE_WIRE_SHADERSWITCH  0  // If set we use two different shaders for tris and lines,
                                    // otherwise we use an immediate mode vertexattrib as pseudo uniform toggle.
                                    // Enable this to stress shader switching in app (becomes primary bottleneck)
  enum Strategy {
    STRATEGY_GROUPS,
    STRATEGY_JOIN,
    STRATEGY_INDIVIDUAL,
  };

  enum ShadeType {
    SHADE_SOLID,
    SHADE_SOLIDWIRE,
    SHADE_SOLIDWIRE_SPLIT, // this mode is not "sane" it is only meant for performance testing of fbo toggles
    NUM_SHADES,
  };

  const char* toString(enum ShadeType st);

  struct Resources {
    GLuint    sceneUbo;
    GLuint64  sceneAddr;

    GLuint    programUbo;
    GLuint    programUboTris;
    GLuint    programUboLine;

    GLuint    programIdx;
    GLuint    programIdxTris;
    GLuint    programIdxLine;

    GLuint    fbo;
    GLuint    fbo2;

    size_t    stateChangeID;
    size_t    fboTextureChangeID;

    CullingSystem::View cullView;

    // ugly hack
    mutable GLuint programUsed;
    mutable GLuint programUsedTris;
    mutable GLuint programUsedLine;

    void usingUboProgram(bool ubo=true) const
    {
      programUsed     = ubo ? programUbo     : programIdx;
      programUsedTris = ubo ? programUboTris : programIdxTris;
      programUsedLine = ubo ? programUboLine : programIdxLine;
    }

    Resources() {
      stateChangeID = 0;
      fboTextureChangeID = 0;
    }
  };

#if USE_WIRE_SHADERSWITCH
  #define SetWireMode(state) glUseProgram((state) ? resources.programUsedLine : resources.programUsedTris )
#else
  #define SetWireMode(state) glVertexAttribI1i(VERTEX_WIREMODE,(state))
#endif

  class Renderer {
  public:

    struct DrawItem {
      bool                solid;
      int                 materialIndex;
      int                 geometryIndex;
      int                 matrixIndex;
      int                 objectIndex;
      CadScene::DrawRange range;
    };

    static bool DrawItem_compare_groups(const DrawItem& a, const DrawItem& b)
    {
      int diff = 0;
      diff = diff != 0 ? diff : (a.solid == b.solid ? 0 : ( a.solid ? -1 : 1 ));
      diff = diff != 0 ? diff : (a.materialIndex - b.materialIndex);
      diff = diff != 0 ? diff : (a.geometryIndex - b.geometryIndex);
      diff = diff != 0 ? diff : (a.matrixIndex - b.matrixIndex);

      return diff < 0;
    }

    class Type {
    public:
      Type() {
        getRegistry().push_back(this);
      }

    public:
      virtual bool loadPrograms( nvgl::ProgramManager &mgr ) { return true; }
      virtual void updatedPrograms( nvgl::ProgramManager &mgr ) { }
      virtual bool isAvailable() const = 0;
      virtual const char* name() const = 0;
      virtual Renderer* create() const = 0;
      virtual unsigned int priority() const { return 0xFF; } 
    };

    typedef std::vector<Type*> Registry;

    static bool s_bindless_ubo;
    static Registry& getRegistry()
    {
      static Registry s_registry;
      return s_registry;
    }

    static CullingSystem   s_cullsys;
    static ScanSystem      s_scansys;

  public:
    virtual void init(const CadScene* NV_RESTRICT scene, const Resources& resources) {}
    virtual void deinit() {}
    virtual void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager ) {}
    virtual ~Renderer() {}


    void fillDrawItems( std::vector<DrawItem>& drawItems, size_t from, size_t to, bool solid, bool wire);

    Strategy                    m_strategy;
    const CadScene* NV_RESTRICT  m_scene;
  };
}

#endif


================================================
FILE: rendererindexedmdi.cpp
================================================
/*
 * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
 * SPDX-License-Identifier: Apache-2.0
 */


/* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */

#include <assert.h>
#include <algorithm>
#include "renderer.hpp"

#include "common.h"

#define USE_VERTEX_ASSIGNS  (!USE_BASEINSTANCE)
#define USE_GPU_INDIRECT    1
#define USE_CPU_INDIRECT    (!USE_GPU_INDIRECT)

namespace csfviewer
{
  //////////////////////////////////////////////////////////////////////////

  class RendererIndexedMDI: public Renderer {
  public:
    class Type : public Renderer::Type 
    {
      bool isAvailable() const
      {
        return true;
      }
      const char* name() const
      {
        return "indexedmdi";
      }
      Renderer* create() const
      {
        RendererIndexedMDI* renderer = new RendererIndexedMDI();
        return renderer;
      }
      unsigned int priority() const 
      {
        return 3;
      }
    };
    class TypeVbum : public Renderer::Type 
    {
      bool isAvailable() const
      {
        return !!has_GL_NV_vertex_buffer_unified_memory;
      }
      const char* name() const
      {
        return "indexedmdi_bindless";
      }
      Renderer* create() const
      {
        RendererIndexedMDI* renderer = new RendererIndexedMDI();
        renderer->m_vbum = true;
        return renderer;
      }
      unsigned int priority() const 
      {
        return 3;
      }
    };
    class TypeSort : public Renderer::Type 
    {
      bool isAvailable() const
      {
        return true;
      }
      const char* name() const
      {
        return "indexedmdi_sorted";
      }
      Renderer* create() const
      {
        RendererIndexedMDI* renderer = new RendererIndexedMDI();
        renderer->m_sort = true;
        return renderer;
      }
      unsigned int priority() const 
      {
        return 3;
      }
    };
    class TypeSortVbum : public Renderer::Type 
    {
      bool isAvailable() const
      {
        return !!has_GL_NV_vertex_buffer_unified_memory;
      }
      const char* name() const
      {
        return "indexedmdi_sorted_bindless";
      }
      Renderer* create() const
      {
        RendererIndexedMDI* renderer = new RendererIndexedMDI();
        renderer->m_vbum = true;
        renderer->m_sort = true;
        return renderer;
      }
      unsigned int priority() const 
      {
        return 3;
      }
    };

  private:
    struct DrawIndirectGL {
      GLuint count;
      GLuint instanceCount;
      GLuint firstIndex;
      GLint  baseVertex;
      GLuint baseInstance;

      DrawIndirectGL ()
        : count(0)
        , instanceCount(1)
        , firstIndex(0)
        , baseVertex(0)
        , baseInstance(0) {}
    };

    struct IndexedCommand {
      DrawIndirectGL  cmd;
    };

    struct ShadeCommand {
      std::vector<IndexedCommand> indirects;
      std::vector<int>      assigns;

      std::vector<size_t>   sizes;
      std::vector<size_t>   offsets;
      std::vector<int>      geometries;
      std::vector<bool>     solids;

#if USE_GPU_INDIRECT
      GLuint    indirectGL;
      GLuint64  indirectADDR;
#endif

#if USE_VERTEX_ASSIGNS
      GLuint    assignGL;
      GLuint64  assignADDR;
#endif

      ShadeCommand() {
#if USE_GPU_INDIRECT
        indirectGL = 0;
#endif
#if USE_VERTEX_ASSIGNS
        assignGL = 0;
#endif
      }
    };

  public:
    void init(const CadScene* NV_RESTRICT scene, const Resources& resources);
    void deinit();
    void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager);

    bool                        m_vbum;
    bool                        m_sort;


    RendererIndexedMDI()
      : m_vbum(false) 
      , m_sort(false)
    {

    }

  private:

    ShadeCommand    m_shades[NUM_SHADES];
    
    GLuint packBaseInstance( int matrixIndex, int materialIndex )
    {
      assert( materialIndex <= 0xFFF );
      assert( matrixIndex   <= 0xFFFFF );
      return (GLuint(matrixIndex) | (GLuint(materialIndex) << 20));
    }

    void GenerateIndirects(std::vector<DrawItem>& drawItems, ShadeType shade, const CadScene* NV_RESTRICT scene, const Resources& resources )
    {
      int lastMaterial = -1;
      int lastGeometry = -1;
      int lastMatrix   = -1;
      bool lastSolid   = true;

      ShadeCommand& sc = m_shades[shade];
      sc.assigns.clear();
      sc.indirects.clear();

      sc.sizes.clear();
      sc.offsets.clear();
      sc.solids.clear();
      sc.geometries.clear();

      std::vector<int>& assigns = sc.assigns;
      std::vector<IndexedCommand>& indirectStream = sc.indirects;

      size_t begin = 0;

      int numAssigns = 0;

      for (int i = 0; i < drawItems.size(); i++){
        const DrawItem& di = drawItems[i];

        if (shade == SHADE_SOLID && !di.solid){
          if (m_sort) break;
          continue;
        }

        if (lastGeometry != di.geometryIndex || (shade == SHADE_SOLIDWIRE && di.solid != lastSolid)){
          sc.offsets.push_back( begin );
          sc.sizes.  push_back( GLsizei((indirectStream.size()-begin)) );
          sc.solids. push_back( lastSolid );
          sc.geometries.push_back( lastGeometry );

          begin = indirectStream.size();
        }

#if USE_VERTEX_ASSIGNS
        if (lastMatrix != di.matrixIndex || lastMaterial != di.materialIndex)
        {
          // push indices
          assigns.push_back(di.matrixIndex);
          assigns.push_back(di.materialIndex);
          numAssigns++;

          lastMatrix    = di.matrixIndex;
          lastMaterial  = di.materialIndex;
        }
#endif

        IndexedCommand drawelems;
        drawelems.cmd.count = di.range.count;
        drawelems.cmd.firstIndex = GLuint((di.range.offset )/sizeof(GLuint));
#if USE_VERTEX_ASSIGNS
        drawelems.cmd.baseInstance = numAssigns - 1;
#else
        drawelems.cmd.baseInstance = packBaseInstance(di.matrixIndex, di.materialIndex);
#endif
        indirectStream.push_back(drawelems);

        lastGeometry = di.geometryIndex;
        lastSolid = di.solid;
      }

      sc.offsets.push_back( begin );
      sc.sizes.  push_back( GLsizei((indirectStream.size()-begin)) );
      sc.solids. push_back( lastSolid );
      sc.geometries.push_back( lastGeometry );
    }

  };

  static RendererIndexedMDI::Type s_indexed;
  static RendererIndexedMDI::TypeVbum s_indexed_vbum;
  static RendererIndexedMDI::TypeSort s_indexedsort;
  static RendererIndexedMDI::TypeSortVbum s_indexedsort_vbum;

  void RendererIndexedMDI::init( const CadScene* NV_RESTRICT scene, const Resources& resources )
  {
    m_scene = scene;
    resources.usingUboProgram(false);

    std::vector<DrawItem> drawItems;

    fillDrawItems(drawItems,0,scene->m_objects.size(), true, true);

    if (m_sort){
      std::sort(drawItems.begin(),drawItems.end(),DrawItem_compare_groups);
    }

    // build SC

    GenerateIndirects(drawItems, SHADE_SOLID, scene, resources);
    GenerateIndirects(drawItems, SHADE_SOLIDWIRE, scene, resources);

    for (size_t i = 0; i <= SHADE_SOLIDWIRE; i++){
      ShadeCommand& sc = m_shades[i];
#if USE_GPU_INDIRECT
      glCreateBuffers(1,&sc.indirectGL);
      glNamedBufferStorage( sc.indirectGL, sizeof(IndexedCommand) * sc.indirects.size(), &sc.indirects[0], 0 );
      if (m_vbum){
        glGetNamedBufferParameterui64vNV(sc.indirectGL, GL_BUFFER_GPU_ADDRESS_NV, &sc.indirectADDR);
        glMakeNamedBufferResidentNV(sc.indirectGL, GL_READ_ONLY);
      }
#endif
#if USE_VERTEX_ASSIGNS
      glCreateBuffers(1,&sc.assignGL);
      glNamedBufferStorage( sc.assignGL, sizeof(int) * sc.assigns.size(), &sc.assigns[0], 0 );
      if (m_vbum){
        glGetNamedBufferParameterui64vNV(sc.assignGL, GL_BUFFER_GPU_ADDRESS_NV, &sc.assignADDR);
        glMakeNamedBufferResidentNV(sc.assignGL, GL_READ_ONLY);
      }
#endif
    }

    m_shades[SHADE_SOLIDWIRE_SPLIT] = m_shades[SHADE_SOLIDWIRE];

  }

  void RendererIndexedMDI::deinit()
  {
    for (size_t i = 0; i < SHADE_SOLIDWIRE; i++){
      ShadeCommand& sc = m_shades[i];
      if (m_vbum){
#if USE_GPU_INDIRECT
        glMakeNamedBufferNonResidentNV(sc.indirectGL);
#endif
#if USE_VERTEX_ASSIGNS
        glMakeNamedBufferNonResidentNV(sc.assignGL);
#endif
      }
#if USE_GPU_INDIRECT
      glDeleteBuffers(1,&sc.indirectGL);
#endif
#if USE_VERTEX_ASSIGNS
      glDeleteBuffers(1,&sc.assignGL);
#endif
    }
  }

  void RendererIndexedMDI::draw( ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager )
  {
    const CadScene* NV_RESTRICT scene = m_scene;
    bool vbum = m_vbum;

    scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL);

    glUseProgram(resources.programIdx);

    if (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT){
      glEnable(GL_POLYGON_OFFSET_FILL);
      glPolygonOffset(1,1);
    }

    SetWireMode(GL_FALSE);

#if USE_VERTEX_ASSIGNS
    glVertexAttribIFormat(VERTEX_ASSIGNS,2,GL_INT,0);
    glVertexAttribBinding(VERTEX_ASSIGNS,1);
    glEnableVertexAttribArray(VERTEX_ASSIGNS);
    glBindVertexBuffer(1,0,0,sizeof(GLint)*2);
    glVertexBindingDivisor(1,1);
#endif
    if (vbum){
      glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
      glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
#if USE_GPU_INDIRECT
      glEnableClientState(GL_DRAW_INDIRECT_UNIFIED_NV);
#endif
    }
    if (vbum && s_bindless_ubo){
      glEnableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV);
      glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV, UBO_MATERIAL, scene->m_materialsADDR, sizeof(CadScene::Material) * scene->m_materials.size() );
      glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV, UBO_SCENE,resources.sceneAddr,sizeof(SceneData));
    }
    else{
      glBindBufferBase(GL_UNIFORM_BUFFER, UBO_SCENE, resources.sceneUbo);
      glBindBufferBase(GL_UNIFORM_BUFFER, UBO_MATERIAL, scene->m_materialsGL);
    }

    nvgl::bindMultiTexture(GL_TEXTURE0 + TEX_MATRICES, GL_TEXTURE_BUFFER, scene->m_matricesTexGL);
    glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);

    {
      ShadeCommand& sc = m_shades[shadetype];
      if (vbum){
  #if USE_GPU_INDIRECT
        glBufferAddressRangeNV(GL_DRAW_INDIRECT_ADDRESS_NV, 0,       sc.indirectADDR, sc.indirects.size() * sizeof(IndexedCommand) );
  #endif
  #if USE_VERTEX_ASSIGNS
        glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 1, sc.assignADDR, sc.assigns.size() * sizeof(GLint));
  #endif
      }
      else{
  #if USE_GPU_INDIRECT
        glBindBuffer(GL_DRAW_INDIRECT_BUFFER, sc.indirectGL);
  #endif
  #if USE_VERTEX_ASSIGNS
        glBindVertexBuffer(1, sc.assignGL, 0, sizeof(GLint)*2);
  #endif
      }
  #if USE_CPU_INDIRECT
      size_t offset = (size_t)&sc.indirects[0];
  #else
      size_t offset = 0;
  #endif

      int lastGeometry = -1;
      bool lastSolid  = true;
      for (size_t i = 0; i < sc.geometries.size(); i++){
        int geometryIndex = sc.geometries[i];

        if (geometryIndex != lastGeometry){
          const CadScene::Geometry& geo = m_scene->m_geometry[ geometryIndex ];
          if (vbum){
            glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0,  geo.vboADDR, geo.numVertices * sizeof(CadScene::Vertex));
            glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_N

Download .txt

gitextract_wdi1bw94/

├── .gitignore
├── CMakeLists.txt
├── CONTRIBUTING
├── LICENSE
├── README.md
├── cadscene.cpp
├── cadscene.hpp
├── common.h
├── csf.cpp
├── csfviewer.cpp
├── cull-bitpack.vert.glsl
├── cull-downsample.frag.glsl
├── cull-downsample.vert.glsl
├── cull-raster.frag.glsl
├── cull-raster.geo.glsl
├── cull-raster.vert.glsl
├── cull-tokencmds.vert.glsl
├── cull-tokensizes.vert.glsl
├── cull-xfb.vert.glsl
├── cullingsystem.cpp
├── cullingsystem.hpp
├── nodetree.cpp
├── nodetree.hpp
├── nvtoken.cpp
├── nvtoken.hpp
├── renderer.cpp
├── renderer.hpp
├── rendererindexedmdi.cpp
├── renderertoken.cpp
├── renderertokensortcull.cpp
├── renderertokenstream.cpp
├── rendereruborange.cpp
├── rendererubosub.cpp
├── scan.comp.glsl
├── scansystem.cpp
├── scansystem.hpp
├── scene.frag.glsl
├── scene.vert.glsl
├── statesystem.cpp
├── statesystem.hpp
├── tokenbase.cpp
├── tokenbase.hpp
├── transform-leaves.comp.glsl
├── transform-level.comp.glsl
├── transformsystem.cpp
├── transformsystem.hpp
└── xplode-animation.comp.glsl

Download .txt

SYMBOL INDEX (404 symbols across 24 files)

FILE: cadscene.cpp
  function randomVector (line 34) | glm::vec4 randomVector(float from, float to)
  function recursiveHierarchy (line 45) | static void recursiveHierarchy(NodeTree& tree, CSFile* csf, int idx, int...
  type ListItem (line 460) | struct ListItem
  function ListItem_compare (line 466) | static bool ListItem_compare(const ListItem& a, const ListItem& b)
  function fillCache (line 476) | static void fillCache(CadScene::DrawRangeCache& cache, const std::vector...

FILE: cadscene.hpp
  class CadScene (line 30) | class CadScene {
    type BBox (line 34) | struct BBox {
      method BBox (line 38) | BBox() : min(FLT_MAX), max(-FLT_MAX) {}
      method merge (line 40) | inline void merge( const glm::vec4& point )
      method merge (line 46) | inline void merge( const BBox& bbox )
      method BBox (line 52) | inline BBox transformed ( const glm::mat4 &matrix, int dim=3)
    type MaterialSide (line 88) | struct MaterialSide {
    type Material (line 96) | struct Material {
      method Material (line 102) | Material() {
    type MatrixNode (line 108) | struct MatrixNode {
    type Vertex (line 115) | struct Vertex {
    type DrawRange (line 120) | struct DrawRange {
      method DrawRange (line 124) | DrawRange() : offset(0) , count(0) {}
    type DrawStateInfo (line 127) | struct DrawStateInfo {
    type DrawRangeCache (line 140) | struct DrawRangeCache {
    type GeometryPart (line 148) | struct GeometryPart {
    type Geometry (line 153) | struct Geometry {
    type ObjectPart (line 170) | struct ObjectPart {
    type Object (line 176) | struct Object {

FILE: common.h
  type SceneData (line 45) | struct SceneData {
  function uniform (line 73) | uniform sceneBuffer {
  function uniform (line 78) | uniform matrixBuffer {
  function mat4 (line 99) | mat4 getIndexedMatrix(int idx, int what)

FILE: csfviewer.cpp
  type csfviewer (line 53) | namespace csfviewer {
    class Sample (line 60) | class Sample : public nvgl::AppWindowProfilerGL
      type GuiEnums (line 63) | enum GuiEnums
      type Tweak (line 111) | struct Tweak
      method Sample (line 171) | Sample() { setupConfigParameters(); }
      method end (line 183) | void end() override { ImGui::ShutdownGL(); }
      method mouse_pos (line 185) | bool mouse_pos(int x, int y) override
      method mouse_button (line 191) | bool mouse_button(int button, int action) override
      method mouse_wheel (line 197) | bool mouse_wheel(int wheel) override
      method key_char (line 203) | bool key_char(int button) override
      method key_button (line 209) | bool key_button(int button, int action, int mods) override
    function addPath (line 865) | static std::string addPath(std::string const& defaultPath, std::string...
    function main (line 930) | int main(int argc, const char** argv)

FILE: cullingsystem.cpp
  function minDivide (line 29) | inline unsigned int minDivide(unsigned int val, unsigned int alignment)

FILE: cullingsystem.hpp
  class CullingSystem (line 30) | class CullingSystem {
    type Programs (line 32) | struct Programs {
    type MethodType (line 43) | enum MethodType {
    type BitType (line 50) | enum BitType {
    type Buffer (line 57) | struct Buffer {
      method create (line 63) | void create( size_t sizei, const void* data, GLbitfield flags )
      method Buffer (line 72) | Buffer( GLuint buffer, size_t sizei = 0 )
      method Buffer (line 88) | Buffer()
      method BindBufferRange (line 97) | inline void BindBufferRange(GLenum target, GLuint index) const {
      method TexBuffer (line 100) | inline void TexBuffer(GLenum target, GLenum internalformat) const {
      method ClearBufferSubData (line 103) | inline void ClearBufferSubData(GLenum target,GLenum internalformat,G...
    class Job (line 109) | class Job {
    class JobReadback (line 139) | class JobReadback : public Job {
    class JobReadbackPersistent (line 155) | class JobReadbackPersistent : public Job {
    class JobIndirectUnordered (line 172) | class JobIndirectUnordered : public Job {
    type View (line 184) | struct View {
    type Uniforms (line 209) | struct Uniforms {

FILE: nodetree.cpp
  function clearNode (line 29) | static inline void clearNode(NodeTree::Node &node)

FILE: nodetree.hpp
  class NodeTree (line 27) | class NodeTree {
    type Flags (line 29) | enum Flags {
    type compactID (line 39) | struct compactID {
      method compactID (line 43) | compactID(){
    type Level (line 52) | struct Level {
      method Level (line 57) | Level(){
    type Node (line 62) | struct Node {
    method getNumUsedLevel (line 90) | inline int getNumUsedLevel() const
    method nodeID (line 98) | inline nodeID getTreeRoot()
    method Node (line 103) | inline const Node& getNode(nodeID nodeidx) const
    method isValid (line 109) | inline bool  isValid(unsigned int id)
    method isNodeInTree (line 114) | inline bool  isNodeInTree(nodeID nodeidx)
    method nodeID (line 119) | inline nodeID  getParentNode(nodeID nodeidx) const
    method getNumActiveNodes (line 140) | int     getNumActiveNodes() const {
    method Level (line 146) | inline Level& getLevel(int level)
      method Level (line 57) | Level(){
    method Node (line 154) | inline Node& getNode(nodeID nodeidx)

FILE: nvtoken.cpp
  type nvtoken (line 25) | namespace nvtoken
    function GLuint (line 36) | static inline GLuint nvtokenHeaderSW(GLuint type, GLuint size){
    function GLenum (line 40) | static inline GLenum nvtokenHeaderCommandSW(GLuint header)
    function GLuint (line 45) | static inline GLuint nvtokenHeaderSizeSW(GLuint header)
    function GLenum (line 50) | static inline GLenum nvtokenHeaderCommand(GLuint header)
    function nvtokenRegisterSize (line 61) | static void nvtokenRegisterSize()
    function nvtokenInitInternals (line 66) | void nvtokenInitInternals( bool hwsupport, bool bindlessSupport)
    function nvtokenGetStats (line 145) | void nvtokenGetStats( const void* NV_RESTRICT stream, size_t streamSiz...
    function GLenum (line 163) | static inline GLenum nvtokenDrawCommandSequenceSW( const void* NV_REST...
    function nvtokenDrawCommandsSW (line 339) | void nvtokenDrawCommandsSW(GLenum mode, const void* NV_RESTRICT stream...
    function nvtokenDrawCommandsStatesSW (line 359) | void nvtokenDrawCommandsStatesSW(const void* NV_RESTRICT stream, size_...

FILE: nvtoken.hpp
  type StateSystem (line 36) | namespace StateSystem {
    type Faces (line 38) | enum Faces {
    type State (line 43) | struct State {
  type nvtoken (line 65) | namespace nvtoken
    type NVTokenShaderStage (line 74) | enum NVTokenShaderStage {
    class NVPointerStream (line 88) | class NVPointerStream {
      method init (line 95) | void init(void* data, size_t size)
      method size (line 103) | size_t size() const
      method capacity (line 108) | size_t  capacity() const
    type NVTokenSequence (line 114) | struct NVTokenSequence {
    type NVTokenNop (line 147) | struct NVTokenNop {
      method NVTokenNop (line 152) | NVTokenNop() {
    type NVTokenTerminate (line 157) | struct NVTokenTerminate {
      method NVTokenTerminate (line 162) | NVTokenTerminate() {
    type NVTokenDrawElemsInstanced (line 167) | struct NVTokenDrawElemsInstanced {
      method NVTokenDrawElemsInstanced (line 172) | NVTokenDrawElemsInstanced() {
      method setMode (line 183) | void setMode(GLenum primmode) {
      method setParams (line 187) | void setParams(GLuint count, GLuint firstIndex=0, GLuint baseVertex=0)
      method setInstances (line 194) | void setInstances(GLuint count, GLuint baseInstance=0){
    type NVTokenDrawArraysInstanced (line 200) | struct NVTokenDrawArraysInstanced {
      method NVTokenDrawArraysInstanced (line 205) | NVTokenDrawArraysInstanced() {
      method setMode (line 215) | void setMode(GLenum primmode) {
      method setParams (line 219) | void setParams(GLuint count, GLuint first=0)
      method setInstances (line 225) | void setInstances(GLuint count, GLuint baseInstance=0){
    type NVTokenDrawElems (line 231) | struct NVTokenDrawElems {
      method NVTokenDrawElems (line 236) | NVTokenDrawElems() {
      method setParams (line 244) | void setParams(GLuint count, GLuint firstIndex=0, GLuint baseVertex=0)
      method setMode (line 251) | void setMode(GLenum primmode) {
    type NVTokenDrawArrays (line 266) | struct NVTokenDrawArrays {
      method NVTokenDrawArrays (line 271) | NVTokenDrawArrays() {
      method setParams (line 278) | void setParams(GLuint count, GLuint first=0)
      method setMode (line 284) | void setMode(GLenum primmode) {
    type NVTokenDrawElemsStrip (line 299) | struct NVTokenDrawElemsStrip {
      method NVTokenDrawElemsStrip (line 304) | NVTokenDrawElemsStrip() {
      method setParams (line 312) | void setParams(GLuint count, GLuint firstIndex=0, GLuint baseVertex=0)
    type NVTokenDrawArraysStrip (line 320) | struct NVTokenDrawArraysStrip {
      method NVTokenDrawArraysStrip (line 325) | NVTokenDrawArraysStrip() {
      method setParams (line 332) | void setParams(GLuint count, GLuint first=0)
    type NVTokenVbo (line 339) | struct NVTokenVbo {
      method setBinding (line 347) | void setBinding(GLuint idx){
      method setBuffer (line 351) | void setBuffer(GLuint buffer, GLuint64 address, GLuint offset)
      method NVTokenVbo (line 364) | NVTokenVbo() {
    type NVTokenIbo (line 369) | struct NVTokenIbo {
      method setType (line 377) | void setType(GLenum type){
      method setBuffer (line 392) | void setBuffer(GLuint buffer, GLuint64 address)
      method NVTokenIbo (line 404) | NVTokenIbo() {
    type NVTokenUbo (line 409) | struct NVTokenUbo {
      method setBuffer (line 417) | void setBuffer(GLuint buffer, GLuint64 address, GLuint offset, GLuin...
      method setBinding (line 432) | void setBinding(GLuint idx, NVTokenShaderStage stage){
      method NVTokenUbo (line 437) | NVTokenUbo() {
    type NVTokenBlendColor (line 442) | struct NVTokenBlendColor{
      method NVTokenBlendColor (line 447) | NVTokenBlendColor() {
    type NVTokenStencilRef (line 452) | struct NVTokenStencilRef{
      method NVTokenStencilRef (line 457) | NVTokenStencilRef() {
    type NVTokenLineWidth (line 462) | struct NVTokenLineWidth{
      method NVTokenLineWidth (line 467) | NVTokenLineWidth() {
    type NVTokenPolygonOffset (line 472) | struct NVTokenPolygonOffset{
      method NVTokenPolygonOffset (line 477) | NVTokenPolygonOffset() {
    type NVTokenAlphaRef (line 482) | struct NVTokenAlphaRef{
      method NVTokenAlphaRef (line 487) | NVTokenAlphaRef() {
    type NVTokenViewport (line 492) | struct NVTokenViewport{
      method NVTokenViewport (line 497) | NVTokenViewport() {
    type NVTokenScissor (line 502) | struct NVTokenScissor {
      method NVTokenScissor (line 507) | NVTokenScissor() {
    type NVTokenFrontFace (line 512) | struct NVTokenFrontFace {
      method NVTokenFrontFace (line 517) | NVTokenFrontFace() {
      method setFrontFace (line 521) | void setFrontFace(GLenum winding){
    function nvtokenMakeNop (line 529) | void nvtokenMakeNop(T & token){
    function nvtokenEnqueue (line 537) | size_t nvtokenEnqueue(std::string& queue, T& data)
    function nvtokenEnqueue (line 548) | size_t nvtokenEnqueue(NVPointerStream& queue, T& data)

FILE: renderer.cpp
  type csfviewer (line 32) | namespace csfviewer
    type ShadeType (line 42) | enum ShadeType
    function FillCache (line 54) | static void FillCache( std::vector<Renderer::DrawItem>& drawItems, con...
    function FillJoin (line 80) | static void FillJoin( std::vector<Renderer::DrawItem>& drawItems, cons...
    function FillIndividual (line 135) | static void FillIndividual( std::vector<Renderer::DrawItem>& drawItems...

FILE: renderer.hpp
  type csfviewer (line 44) | namespace csfviewer {
    type Strategy (line 50) | enum Strategy {
    type ShadeType (line 56) | enum ShadeType {
    type ShadeType (line 63) | enum ShadeType
    type Resources (line 65) | struct Resources {
      method usingUboProgram (line 90) | void usingUboProgram(bool ubo=true) const
      method Resources (line 97) | Resources() {
    class Renderer (line 109) | class Renderer {
      type DrawItem (line 112) | struct DrawItem {
      method DrawItem_compare_groups (line 121) | static bool DrawItem_compare_groups(const DrawItem& a, const DrawIte...
      class Type (line 132) | class Type {
        method Type (line 134) | Type() {
        method loadPrograms (line 139) | virtual bool loadPrograms( nvgl::ProgramManager &mgr ) { return tr...
        method updatedPrograms (line 140) | virtual void updatedPrograms( nvgl::ProgramManager &mgr ) { }
        method priority (line 144) | virtual unsigned int priority() const { return 0xFF; }
      method Registry (line 150) | static Registry& getRegistry()
      method init (line 160) | virtual void init(const CadScene* NV_RESTRICT scene, const Resources...
      method deinit (line 161) | virtual void deinit() {}
      method draw (line 162) | virtual void draw(ShadeType shadetype, const Resources& resources, n...

FILE: rendererindexedmdi.cpp
  type csfviewer (line 33) | namespace csfviewer
    class RendererIndexedMDI (line 37) | class RendererIndexedMDI: public Renderer {
      class Type (line 39) | class Type : public Renderer::Type
        method isAvailable (line 41) | bool isAvailable() const
        method Renderer (line 49) | Renderer* create() const
        method priority (line 54) | unsigned int priority() const
      class TypeVbum (line 59) | class TypeVbum : public Renderer::Type
        method isAvailable (line 61) | bool isAvailable() const
        method Renderer (line 69) | Renderer* create() const
        method priority (line 75) | unsigned int priority() const
      class TypeSort (line 80) | class TypeSort : public Renderer::Type
        method isAvailable (line 82) | bool isAvailable() const
        method Renderer (line 90) | Renderer* create() const
        method priority (line 96) | unsigned int priority() const
      class TypeSortVbum (line 101) | class TypeSortVbum : public Renderer::Type
        method isAvailable (line 103) | bool isAvailable() const
        method Renderer (line 111) | Renderer* create() const
        method priority (line 118) | unsigned int priority() const
      type DrawIndirectGL (line 125) | struct DrawIndirectGL {
        method DrawIndirectGL (line 132) | DrawIndirectGL ()
      type IndexedCommand (line 140) | struct IndexedCommand {
      type ShadeCommand (line 144) | struct ShadeCommand {
        method ShadeCommand (line 163) | ShadeCommand() {
      method RendererIndexedMDI (line 182) | RendererIndexedMDI()
      method GLuint (line 193) | GLuint packBaseInstance( int matrixIndex, int materialIndex )
      method GenerateIndirects (line 200) | void GenerateIndirects(std::vector<DrawItem>& drawItems, ShadeType s...

FILE: renderertoken.cpp
  type csfviewer (line 27) | namespace csfviewer
    class RendererToken (line 31) | class RendererToken: public Renderer, public TokenRendererBase {
      class Type (line 33) | class Type : public Renderer::Type
        method isAvailable (line 35) | bool isAvailable() const
        method Renderer (line 43) | Renderer* create() const
        method priority (line 48) | unsigned int priority() const
      class TypeAddr (line 53) | class TypeAddr : public Renderer::Type
        method isAvailable (line 55) | bool isAvailable() const
        method Renderer (line 63) | Renderer* create() const
        method priority (line 69) | unsigned int priority() const
      class TypeList (line 74) | class TypeList : public Renderer::Type
        method isAvailable (line 76) | bool isAvailable() const
        method Renderer (line 84) | Renderer* create() const
        method priority (line 90) | unsigned int priority() const
      class TypeEmu (line 95) | class TypeEmu : public Renderer::Type
        method isAvailable (line 97) | bool isAvailable() const
        method Renderer (line 105) | Renderer* create() const
        method priority (line 111) | unsigned int priority() const
      class TypeSort (line 117) | class TypeSort : public Renderer::Type
        method isAvailable (line 119) | bool isAvailable() const
        method Renderer (line 127) | Renderer* create() const
        method priority (line 133) | unsigned int priority() const
      class TypeSortAddr (line 138) | class TypeSortAddr : public Renderer::Type
        method isAvailable (line 140) | bool isAvailable() const
        method Renderer (line 148) | Renderer* create() const
        method priority (line 155) | unsigned int priority() const
      class TypeSortList (line 160) | class TypeSortList : public Renderer::Type
        method isAvailable (line 162) | bool isAvailable() const
        method Renderer (line 170) | Renderer* create() const
        method priority (line 177) | unsigned int priority() const
      class TypeSortEmu (line 182) | class TypeSortEmu : public Renderer::Type
        method isAvailable (line 184) | bool isAvailable() const
        method Renderer (line 192) | Renderer* create() const
        method priority (line 199) | unsigned int priority() const
      method GenerateTokens (line 214) | void GenerateTokens(std::vector<DrawItem>& drawItems, ShadeType shad...

FILE: renderertokensortcull.cpp
  type csfviewer (line 28) | namespace csfviewer
    class RendererCullSortToken (line 36) | class RendererCullSortToken : public Renderer, public TokenRendererBase {
      class Shared (line 38) | class Shared {
        method Shared (line 45) | static Shared& get()
        method Shared (line 51) | Shared() : loaded(false) {}
        method load (line 53) | bool load(nvgl::ProgramManager &progManager)
      class Type (line 73) | class Type : public Renderer::Type
        method isAvailable (line 75) | bool isAvailable() const
        method Renderer (line 83) | Renderer* create() const
        method loadPrograms (line 88) | bool loadPrograms( nvgl::ProgramManager &mgr)
        method priority (line 92) | unsigned int priority() const
      class TypeEmu (line 97) | class TypeEmu : public Renderer::Type
        method isAvailable (line 99) | bool isAvailable() const
        method Renderer (line 107) | Renderer* create() const
        method loadPrograms (line 113) | bool loadPrograms( nvgl::ProgramManager &mgr )
        method priority (line 117) | unsigned int priority() const
      method DrawItem_compare_groups (line 131) | static bool DrawItem_compare_groups(const DrawItem& a, const DrawIte...
      type CullSequence (line 145) | struct CullSequence {
      type CullShade (line 152) | struct CullShade {
      class CullJobToken (line 169) | class CullJobToken : public CullingSystem::Job
      method handleToken (line 193) | static void handleToken(std::vector<GLuint> &tokenSizes, std::vector...
      method GenerateTokens (line 200) | void GenerateTokens(std::vector<DrawItem>& drawItems, ShadeType shad...

FILE: renderertokenstream.cpp
  type csfviewer (line 27) | namespace csfviewer
    class RendererTokenStream (line 31) | class RendererTokenStream: public Renderer, public TokenRendererBase {
      class Type (line 33) | class Type : public Renderer::Type
        method isAvailable (line 35) | bool isAvailable() const
        method Renderer (line 43) | Renderer* create() const
        method priority (line 48) | unsigned int priority() const
      class TypeEmu (line 53) | class TypeEmu : public Renderer::Type
        method isAvailable (line 55) | bool isAvailable() const
        method Renderer (line 63) | Renderer* create() const
        method priority (line 69) | unsigned int priority() const
      method GenerateTokens (line 86) | size_t GenerateTokens(NVPointerStream& tokenStream, std::vector<Draw...

FILE: rendereruborange.cpp
  type csfviewer (line 29) | namespace csfviewer
    class RendererUboRange (line 33) | class RendererUboRange: public Renderer {
      class Type (line 35) | class Type : public Renderer::Type
        method isAvailable (line 37) | bool isAvailable() const
        method Renderer (line 45) | Renderer* create() const
        method priority (line 50) | unsigned int priority() const
      class TypeEmu (line 55) | class TypeEmu : public Renderer::Type
        method isAvailable (line 57) | bool isAvailable() const
        method Renderer (line 65) | Renderer* create() const
        method priority (line 71) | unsigned int priority() const
      class TypeSort (line 76) | class TypeSort : public Renderer::Type
        method isAvailable (line 78) | bool isAvailable() const
        method Renderer (line 86) | Renderer* create() const
        method priority (line 92) | unsigned int priority() const
      class TypeSortEmu (line 97) | class TypeSortEmu : public Renderer::Type
        method isAvailable (line 99) | bool isAvailable() const
        method Renderer (line 107) | Renderer* create() const
        method priority (line 114) | unsigned int priority() const
      method RendererUboRange (line 125) | RendererUboRange()

FILE: rendererubosub.cpp
  type csfviewer (line 29) | namespace csfviewer
    class RendererUboSub (line 33) | class RendererUboSub: public Renderer {
      class Type (line 35) | class Type : public Renderer::Type
        method isAvailable (line 37) | bool isAvailable() const
        method Renderer (line 45) | Renderer* create() const
        method priority (line 50) | unsigned int priority() const
      class TypeVbum (line 55) | class TypeVbum : public Renderer::Type
        method isAvailable (line 57) | bool isAvailable() const
        method Renderer (line 65) | Renderer* create() const
        method priority (line 71) | unsigned int priority() const
      class TypeSort (line 76) | class TypeSort : public Renderer::Type
        method isAvailable (line 78) | bool isAvailable() const
        method Renderer (line 86) | Renderer* create() const
        method priority (line 92) | unsigned int priority() const
      class TypeSortVbum (line 97) | class TypeSortVbum : public Renderer::Type
        method isAvailable (line 99) | bool isAvailable() const
        method Renderer (line 107) | Renderer* create() const
        method priority (line 114) | unsigned int priority() const
      method RendererUboSub (line 135) | RendererUboSub()

FILE: scansystem.cpp
  function GLuint (line 26) | inline static GLuint snapdiv(GLuint input, GLuint align)

FILE: scansystem.hpp
  class ScanSystem (line 29) | class ScanSystem {
    type Programs (line 34) | struct Programs {
    type Buffer (line 40) | struct Buffer {
      method create (line 45) | void create(size_t sizei, const void* data, GLbitfield flags)
      method Buffer (line 53) | Buffer(GLuint buffer)
      method Buffer (line 63) | Buffer()
      method BindBufferRange (line 71) | inline void BindBufferRange(GLenum target, GLuint index) const {
      method BindBufferRange (line 74) | inline void BindBufferRange(GLenum target, GLuint index, GLintptr of...
      method GetNamedBufferSubData (line 78) | inline void GetNamedBufferSubData(void* data){

FILE: statesystem.hpp
  class StateSystem (line 31) | class StateSystem {
    method isBitSet (line 34) | static inline bool isBitSet(GLbitfield bits, GLuint key)
    method setBit (line 39) | static inline void setBit(GLbitfield& bits, GLuint key)
    method GLbitfield (line 44) | static GLbitfield getBit(GLuint key)
    method GLboolean (line 49) | static inline GLboolean setBitState(GLbitfield& bits, GLuint key, GLbo...
    type StateBits (line 63) | enum StateBits {
    type StateBitsDepr (line 92) | enum StateBitsDepr {
    type Faces (line 102) | enum Faces {
    type ClipDistanceState (line 110) | struct ClipDistanceState {
      method ClipDistanceState (line 113) | ClipDistanceState()
    type AlphaStateDepr (line 124) | struct AlphaStateDepr {
      method AlphaStateDepr (line 128) | AlphaStateDepr()
    type StencilOp (line 140) | struct StencilOp
    type StencilFunc (line 146) | struct StencilFunc
    type StencilState (line 152) | struct StencilState{
      method StencilState (line 156) | StencilState()
    type BlendMode (line 170) | struct BlendMode{
    type BlendStage (line 175) | struct BlendStage{
    type BlendState (line 179) | struct BlendState{
      method BlendState (line 185) | BlendState() {
    type DepthState (line 201) | struct DepthState {
      method DepthState (line 205) | DepthState() {
    type LogicState (line 214) | struct LogicState {
      method LogicState (line 217) | LogicState() {
    type RasterState (line 226) | struct RasterState {
      method RasterState (line 237) | RasterState() {
    type RasterStateDepr (line 254) | struct RasterStateDepr {
      method RasterStateDepr (line 260) | RasterStateDepr() {
    type PrimitiveState (line 273) | struct PrimitiveState {
      method PrimitiveState (line 278) | PrimitiveState() {
    type SampleState (line 290) | struct SampleState {
      method SampleState (line 295) | SampleState() {
    type Viewport (line 306) | struct Viewport {
    type DepthRange (line 312) | struct DepthRange {
    type Scissor (line 316) | struct Scissor {
    type DepthRangeState (line 343) | struct DepthRangeState {
      method DepthRangeState (line 347) | DepthRangeState() {
    type ScissorEnableState (line 379) | struct ScissorEnableState {
      method ScissorEnableState (line 382) | ScissorEnableState() {
    type MaskState (line 392) | struct MaskState {
      method MaskState (line 398) | MaskState() {
    type FBOState (line 416) | struct FBOState {
      method FBOState (line 423) | FBOState() {
      method setFbo (line 434) | void setFbo(GLuint fbo){
    type VertexEnableState (line 448) | struct VertexEnableState {
      method VertexEnableState (line 451) | VertexEnableState() {
    type VertexModeType (line 459) | enum VertexModeType {
    type VertexFormat (line 466) | struct VertexFormat {
    type VertexBinding (line 478) | struct VertexBinding {
    type VertexFormatState (line 483) | struct VertexFormatState {
      method VertexFormatState (line 487) | VertexFormatState() {
    type VertexData (line 507) | struct VertexData {
    type VertexImmediateState (line 516) | struct VertexImmediateState {
      method VertexImmediateState (line 519) | VertexImmediateState() {
    type ProgramState (line 535) | struct ProgramState {
      method ProgramState (line 540) | ProgramState() {
    type EnableState (line 550) | struct EnableState {
      method EnableState (line 553) | EnableState() {
    type EnableStateDepr (line 562) | struct EnableStateDepr {
      method EnableStateDepr (line 565) | EnableStateDepr() {
    type State (line 576) | struct State {
      method State (line 611) | State()
    type StateDiffKey (line 641) | struct StateDiffKey{
    type StateDiff (line 646) | struct StateDiff {
      type ContentBits (line 648) | enum ContentBits {
    type StateInternal (line 682) | struct StateInternal {
      method StateInternal (line 690) | StateInternal() {

FILE: tokenbase.cpp
  type csfviewer (line 28) | namespace csfviewer

FILE: tokenbase.hpp
  type csfviewer (line 47) | namespace csfviewer
    class TokenRendererBase (line 60) | class TokenRendererBase {
      type StateType (line 62) | enum StateType {
      type ShadeCommand (line 70) | struct ShadeCommand {
      method TokenRendererBase (line 83) | TokenRendererBase()

FILE: transformsystem.hpp
  class TransformSystem (line 32) | class TransformSystem {
    type Programs (line 35) | struct Programs {
    type Buffer (line 40) | struct Buffer {
      method Buffer (line 45) | Buffer(GLuint buffer, size_t sizei=0)
      method Buffer (line 62) | Buffer()
      method BindBufferRange (line 70) | inline void BindBufferRange(GLenum target, GLuint index) const {
      method TexBuffer (line 73) | inline void TexBuffer(GLenum target, GLenum internalformat) const {
    type Textures (line 87) | enum Textures {

Download .json

Condensed preview — 47 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (371K chars).

[
  {
    "path": ".gitignore",
    "chars": 290,
    "preview": ".clang-format\n.editorconfig\n\n#############################\n#Spirv\n#############################\n*.spv\n*.spva\n*.sass\n*.sa"
  },
  {
    "path": "CMakeLists.txt",
    "chars": 3322,
    "preview": "cmake_minimum_required(VERSION 3.5)\nget_filename_component(PROJNAME ${CMAKE_CURRENT_SOURCE_DIR} NAME)\nProject(${PROJNAME"
  },
  {
    "path": "CONTRIBUTING",
    "chars": 1400,
    "preview": "https://developercertificate.org/\n\nDeveloper Certificate of Origin\nVersion 1.1\n\nCopyright (C) 2004, 2006 The Linux Found"
  },
  {
    "path": "LICENSE",
    "chars": 10173,
    "preview": "\n                                 Apache License\n                           Version 2.0, January 2004\n                  "
  },
  {
    "path": "README.md",
    "chars": 19758,
    "preview": "# gl cadscene render techniques\n\nThis sample implements several scene rendering techniques that target mostly static dat"
  },
  {
    "path": "cadscene.cpp",
    "chars": 18239,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "cadscene.hpp",
    "chars": 5597,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "common.h",
    "chars": 2531,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "csf.cpp",
    "chars": 925,
    "preview": "/*\n * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0"
  },
  {
    "path": "csfviewer.cpp",
    "chars": 31574,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "cull-bitpack.vert.glsl",
    "chars": 1852,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "cull-downsample.frag.glsl",
    "chars": 2084,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "cull-downsample.vert.glsl",
    "chars": 986,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "cull-raster.frag.glsl",
    "chars": 1026,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "cull-raster.geo.glsl",
    "chars": 3859,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "cull-raster.vert.glsl",
    "chars": 2340,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "cull-tokencmds.vert.glsl",
    "chars": 3043,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "cull-tokensizes.vert.glsl",
    "chars": 1226,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "cull-xfb.vert.glsl",
    "chars": 4111,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "cullingsystem.cpp",
    "chars": 14122,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "cullingsystem.hpp",
    "chars": 6185,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "nodetree.cpp",
    "chars": 7792,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "nodetree.hpp",
    "chars": 3930,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "nvtoken.cpp",
    "chars": 14358,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "nvtoken.hpp",
    "chars": 13447,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "renderer.cpp",
    "chars": 5425,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "renderer.hpp",
    "chars": 5070,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "rendererindexedmdi.cpp",
    "chars": 13580,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "renderertoken.cpp",
    "chars": 13328,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "renderertokensortcull.cpp",
    "chars": 24726,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "renderertokenstream.cpp",
    "chars": 10651,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "rendereruborange.cpp",
    "chars": 8071,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "rendererubosub.cpp",
    "chars": 7642,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "scan.comp.glsl",
    "chars": 6573,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "scansystem.cpp",
    "chars": 6310,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "scansystem.hpp",
    "chars": 2796,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "scene.frag.glsl",
    "chars": 2217,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "scene.vert.glsl",
    "chars": 2041,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "statesystem.cpp",
    "chars": 28553,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "statesystem.hpp",
    "chars": 16212,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "tokenbase.cpp",
    "chars": 9048,
    "preview": "/*\n * Copyright (c) 2014-2023, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "tokenbase.hpp",
    "chars": 3455,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "transform-leaves.comp.glsl",
    "chars": 3110,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "transform-level.comp.glsl",
    "chars": 2760,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "transformsystem.cpp",
    "chars": 5011,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "transformsystem.hpp",
    "chars": 2474,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  },
  {
    "path": "xplode-animation.comp.glsl",
    "chars": 2759,
    "preview": "/*\n * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.\n *\n * Licensed under the Apache License, Versio"
  }
]

About this extraction

This page contains the full source code of the nvpro-samples/gl_cadscene_rendertechniques GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 47 files (347.6 KB), approximately 94.0k tokens, and a symbol index with 404 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo