Repository: nvpro-samples/gl_cadscene_rendertechniques Branch: master Commit: bd7e727c8b03 Files: 47 Total size: 347.6 KB Directory structure: gitextract_wdi1bw94/ ├── .gitignore ├── CMakeLists.txt ├── CONTRIBUTING ├── LICENSE ├── README.md ├── cadscene.cpp ├── cadscene.hpp ├── common.h ├── csf.cpp ├── csfviewer.cpp ├── cull-bitpack.vert.glsl ├── cull-downsample.frag.glsl ├── cull-downsample.vert.glsl ├── cull-raster.frag.glsl ├── cull-raster.geo.glsl ├── cull-raster.vert.glsl ├── cull-tokencmds.vert.glsl ├── cull-tokensizes.vert.glsl ├── cull-xfb.vert.glsl ├── cullingsystem.cpp ├── cullingsystem.hpp ├── nodetree.cpp ├── nodetree.hpp ├── nvtoken.cpp ├── nvtoken.hpp ├── renderer.cpp ├── renderer.hpp ├── rendererindexedmdi.cpp ├── renderertoken.cpp ├── renderertokensortcull.cpp ├── renderertokenstream.cpp ├── rendereruborange.cpp ├── rendererubosub.cpp ├── scan.comp.glsl ├── scansystem.cpp ├── scansystem.hpp ├── scene.frag.glsl ├── scene.vert.glsl ├── statesystem.cpp ├── statesystem.hpp ├── tokenbase.cpp ├── tokenbase.hpp ├── transform-leaves.comp.glsl ├── transform-level.comp.glsl ├── transformsystem.cpp ├── transformsystem.hpp └── xplode-animation.comp.glsl ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .clang-format .editorconfig ############################# #Spirv ############################# *.spv *.spva *.sass *.sassbin *.bat ############################# #specific to the project ############################# cmake_built cmake_build build _install bin_x64 NVPRO_EXTERNAL nvpro_core ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.5) get_filename_component(PROJNAME ${CMAKE_CURRENT_SOURCE_DIR} NAME) Project(${PROJNAME}) Message(STATUS "-------------------------------") Message(STATUS "Processing Project ${PROJNAME}:") ##################################################################################### # look for nvpro_core 1) as a sub-folder 2) at some other locations # this cannot be put anywhere else since we still didn't find setup.cmake yet # if(NOT BASE_DIRECTORY) find_path(BASE_DIRECTORY NAMES nvpro_core/cmake/setup.cmake PATHS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/../.. REQUIRED DOC "Directory containing nvpro_core" ) endif() if(EXISTS ${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake) include(${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake) include(${BASE_DIRECTORY}/nvpro_core/cmake/utilities.cmake) else() message(FATAL_ERROR "could not find base directory, please set BASE_DIRECTORY to folder containing nvpro_core") endif() _add_project_definitions(${PROJNAME}) #-------------------------------------------------------------------------------------------------- # Resources # download_files(FILENAMES geforce.csf.gz) ##################################################################################### # additions from packages needed for this sample # add refs in LIBRARIES_OPTIMIZED # add refs in LIBRARIES_DEBUG # add files in PACKAGE_SOURCE_FILES # _add_package_OpenGL() _add_package_ImGUI() _add_package_ZLIB() add_definitions(-DCSF_SUPPORT_ZLIB=1) ##################################################################################### # process the rest of some cmake code that needs to be done *after* the packages add _add_nvpro_core_lib() ##################################################################################### # Source files for this project # file(GLOB SOURCE_FILES *.cpp *.hpp *.inl *.h *.c) file(GLOB GLSL_FILES *.glsl) ##################################################################################### # Executable # if(WIN32) add_definitions(-D_CRT_SECURE_NO_WARNINGS) endif() add_executable(${PROJNAME} ${SOURCE_FILES} ${COMMON_SOURCE_FILES} ${PACKAGE_SOURCE_FILES} ${GLSL_FILES}) ##################################################################################### # common source code needed for this sample # source_group(common FILES ${COMMON_SOURCE_FILES} ${PACKAGE_SOURCE_FILES} ) source_group(shaders FILES ${GLSL_FILES} ) ##################################################################################### # Linkage # target_link_libraries(${PROJNAME} ${PLATFORM_LIBRARIES} nvpro_core) foreach(DEBUGLIB ${LIBRARIES_DEBUG}) target_link_libraries(${PROJNAME} debug ${DEBUGLIB}) endforeach(DEBUGLIB) foreach(RELEASELIB ${LIBRARIES_OPTIMIZED}) target_link_libraries(${PROJNAME} optimized ${RELEASELIB}) endforeach(RELEASELIB) ##################################################################################### # copies binaries that need to be put next to the exe files (ZLib, etc.) # _finalize_target( ${PROJNAME} ) LIST(APPEND GLSL_FILES "common.h") install(FILES ${GLSL_FILES} CONFIGURATIONS Release DESTINATION "bin_${ARCH}/GLSL_${PROJNAME}") install(FILES ${GLSL_FILES} CONFIGURATIONS Debug DESTINATION "bin_${ARCH}_debug/GLSL_${PROJNAME}") ================================================ FILE: CONTRIBUTING ================================================ https://developercertificate.org/ Developer Certificate of Origin Version 1.1 Copyright (C) 2004, 2006 The Linux Foundation and its contributors. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Developer's Certificate of Origin 1.1 By making a contribution to this project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS ================================================ FILE: README.md ================================================ # gl cadscene render techniques This sample implements several scene rendering techniques that target mostly static data, such as often found in CAD or DCC applications. In this context, 'static' means that the vertex and index buffers for the scene's objects rarely change. This can include editing the geometry of a few scene objects, but the matrix and material values are the properties that are modified the most across frames. Imagine making edits to the wheel topology of a car, or positioning an engine; the rest of the assembly remains the same. The principal OpenGL mechanisms that are used here are described in the [SIGGRAPH 2014 presentation slides](http://on-demand.gputechconf.com/siggraph/2014/presentation/SG4117-OpenGL-Scene-Rendering-Techniques.pdf). It is highly recommended to go through the slides first. The sample makes use of multiple OpenGL 4 core features, such as **ARB_multi_draw_indirect**, but also showcases OpenGL 3 style rendering techniques. There are also several techniques built around the **NV_command_list** extension. Please refer to [gl commandlist basic](https://github.com/nvpro-samples/gl_commandlist_basic) for an introduction to NV_command_list. > Note: This is just a sample to illustrate several techniques and possibilities for how to approach rendering. Its purpose is not to provide production-level, highly optimized implementations. ### Scene Setup The sample loads a cadscene file (csf). This file format is inspired by CAD applications' data organization, but (for simplicity) everything is stored in a single RAW file. The scene is organized into: * Matrices: object transforms as well as concatenated world matrices * TreeNodes: a tree consisting hierarchical information, mapping to Matrix indices * Materials: just classic two-sided OpenGL Blinn-Phong material parameters * Geometries: storing vertex and index information, organized into * GeometryParts, which reference a sub-range within index buffer, for either "wireframe" or "solid" surfaces * Objects, that reference Geometry and have corresponding * ObjectParts, that encode part-level Material and Matrix assignment. Typically, an object uses just one Matrix for all its parts. ### Shademodes ![sample screenshot](https://github.com/nvpro-samples/gl_cadscene_rendertechniques/blob/master/doc/sample.jpg) - **solid**: only triangles are drawn - **solid with edges**: triangles and edge outlines on top (using PolygonOffset to push triangles back). When no global sorting (see later) is performed, this means we toggle between the two modes for every object. - **solid with edges (split test, only in sorted)**: an artificial mode that also separates triangles and edges into different FBOs, and is available in "sorted" and "token" renderers. The implementation has no real use-case character and is more or less for internal benchmarking of FBO toggles. ### Strategies These influence the number of drawcalls we generate for the hardware and software. Using OpenGL's MultiDraw* functions we can have less software calls than hardware drawcalls, which helps trigger faster paths in the driver as there is less validation overhead. A strategy is applied on a per-object level. Imagine an object whose parts use two materials, red and blue: ``` material: r b b r parts: A B C D ``` - **materialgroups** Here we create a per-object cache of drawcall ranges for MultiDraw* based on the object's material and matrix assignments. We also "grow" drawcalls if subsequent ranges in the index buffer have the same assignments. Our sample object would be drawn using 2 states one glMultiDrawElements each, which are creating 3 hardware drawcalls: red are ranges A, D and blue is B+C joined together as they are next to each other in the indexbuffer. - **drawcall join** As we traverse we combine drawcalls under same state, this means 3 drawcalls for hardware, and 3 for software as well as 3 states: red A, blue B+C, red D. - **drawcall individual** We render each piece individually: red A, blue B, C, red D. Typically we do all rendering with basic state redundancy filtering so we don't setup a matrix/material change if the same is still active. To keep things simple for state redundancy filtering, you should not go too fine-grained, otherwise all the tracking causes too much memory hopping. In our case we have 3 indices we track: geometry (handles vertex / index buffer setup), material, and matrix. ### Renderers Most renderers will traverse the scene data every frame. The organization of the data is cache-friendly foremost, everything is stored in arrays, without too much memory hopping. Some renderers may implement additional caching for rendering. #### Variants: - **bindless**: these variants make use of NVIDIA's bindless extensions NV_vertex_buffer_unified_memory and NV_uniform_buffer_unified_memory, which allows a lower-overhead path in the driver for faster drawcall submission. Classic glBindVertexBuffer or glBindBufferRange are replaced with glBufferAddressRangeNV. - **sorted**: indicates we do a global scene sort once, to minimize state changes in subsequent frames. - **cullsorted**: next to global sorting by state, we also apply occlusion culling as presented in [end of the slides](http://on-demand.gputechconf.com/siggraph/2014/presentation/SG4117-OpenGL-Scene-Rendering-Techniques.pdf) or in the [gl occlusion culling](https://github.com/nvpro-samples/gl_occlusion_culling) sample. - **emulated**: several of the NV_command_list techniques can be run in emulated mode. #### Techniques: We are mostly looking into accelerating our matrix and material parameter switching performance. - **uborange** All matrices and materials are stored in big buffer objects, which allows us to efficiently bind the required sub-range for a drawcall via glBindBufferRange(GL_UNIFORM_BUFFER, usageSlot, buffer, index * itemSize, itemSize). NVIDIA provides optimized paths if you keep the buffer and itemSize for a usageSlot constant for many glBindBufferRange calls. Be aware of GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, which is 256 bytes for most current NVIDIA hardware (Fermi, Kepler, Maxwell). - **ubosub** Not as efficient as the above, but maybe appropriate if you cannot afford to cache parameter data. We make use of one streaming buffer per usage slot and continously update it via glBufferSubData. NVIDIA's drivers do particularly well if you never bind this buffer as anything but a GL_UNIFORM_BUFFER and keep size and offsets a multiple of 4. - **indexedmdi** Similar to uborange we make use of all data stored in a bigger buffers in advance. It doesn't make this data "static"; you can always update the portions you need, but there is a high chance a lot of data is the same frame to frame. This time, we do not bind memory ranges through the OpenGL API, but let the shader do an indirection and only pass the required matrix and material indices. For the matrix data we use GL_TEXTURE_BUFFER as it's particularly performant for high frequency / potentially divergent access. We typically have far more matrices than materials in our scene. For material data, it's a bit "ugly" to use lots of texelFetch instructions decoding all our parameters; it's much easier to write them as structs and store the array either as GL_UNIFORM_BUFFER or GL_SHADER_STORAGE_BUFFER. The latter is only recommended if you have divergent shader access or exceed the 64 KB limit of UBOs. To pass the indices per-drawcall we make use of GL_ARB_multi_draw_indirect and "instanced" vertex attributes as described at [GTC 2013 on slide 27](http://on-demand.gputechconf.com/gtc/2013/presentations/S3032-Advanced-Scenegraph-Rendering-Pipeline.pdf). Therefore this renderer requires two additional buffers: one encoding our object's matrix and material index assignments, and one encoding the scene's drawcalls as GL_DRAW_INDIRECT_BUFFER. A hybrid approach, where the parameter index like "indexedmdi" is used for matrices and uborange bind is used for materials, is not yet implemented, but would be a good compromise. The following renderers make use of the **NV_command_list** extension. In principle they **behave as "uborange"**, however all buffer bindings and drawcalls are encoded into binary tokens that are submitted in bulk. In preparation for drawing, the appropriate stateobjects are created and reused when rendering (one for lines and for triangles). While stateobject capturing is not extremely expensive, it is still best to cache it across frames. - **tokenbuffer** Similar to indexedmdi we create a buffer that describes our scene by storing all the relevant token commands. This buffer is filled only once and then later reused. - **tokenlist** Instead of storing the tokens inside a buffer we make use of the commandlist object, and create and compile one for each shademode for later reuse. Every time our state changes (for instance, when resizing FBOs), we have to recreate these lists, which makes it less flexible than buffer but faster when there are lots of statechanges within the list. - **tokenstream** This approach does not reuse the tokens across frames, but instead dynamically creates the tokenstream every frame. By default, the demo fills and submits tokens in chunks of 256 KB; better values may exist depending on the scene. ### Performance All timings are preliminary results for *Timer Draw* on a win7-64, i7-860, Quadro K5000 system. **Important Note About Timer Query Results:** The GPU time reported below is measured via timer queries, those values however can be skewed by CPU bottlenecks. The "begin" timestamp may be part of a different command submission to the GPU than the "end" timestamp. That means a long delay on the CPU side between those submissions will also increase the reported GPU time. That is why in CPU-bottlenecked scenarios with tons of OpenGL commands, the GPU times below are close to the CPU time. ``` scene statistics: geometries: 110 materials: 66 nodes: 5004 objects: 2497 tokenbuffer/glstream complexities: type: solid materialgroups | drawcall individual commandsize: 347292 | 1301692 statetoggles: 1 | 1 tokens: GL_DRAW_ELEMENTS_COMMAND_NV: 11103 | 68452 GL_ELEMENT_ADDRESS_COMMAND_NV: 807 | 807 GL_ATTRIBUTE_ADDRESS_COMMAND_NV: 807 | 807 GL_UNIFORM_ADDRESS_COMMAND_NV: 8988 | 11289 GL_POLYGON_OFFSET_COMMAND_NV: 1 | 1 type: solid w edges commandsize: 629644 | 2534412 statetoggles: 4994 | 4994 tokens: GL_DRAW_ELEMENTS_COMMAND_NV: 22281 | 136750 GL_ELEMENT_ADDRESS_COMMAND_NV: 807 | 807 GL_ATTRIBUTE_ADDRESS_COMMAND_NV: 807 | 807 GL_UNIFORM_ADDRESS_COMMAND_NV: 15457 | 20036 GL_POLYGON_OFFSET_COMMAND_NV: 1 | 1 ``` As one can see from the statistics the key difference is the number of drawcalls for the hardware: - **materialgroups**: ~ 10 000 drawcalls (inner two columns) - **drawcall individual**: ~ 70 000 drawcalls (rightmost two columns) *shademode: solid* renderer | GPU time | CPU time | GPU time | CPU time (microseconds) ------------ | ------------- | ------------- | ------------- | ------------- **strategy** | **material-** | **-groups** | **drawcall-** | **-individual** ubosub | 1550 | 1870 | 6000 | 7420 uborange | 1010| 1890 | 3720 | 7660 uborange_bindless | 1010 | 1200 | 2560 | 4900 indexedmdi | 1120 | 1200 | 2080 | 1100 tokenstream | 860 | 300 | 1520 | 1400 tokenbuffer | 780 | <10 | 1230 | <10 tokenlist | 780 | <10 | 880 | <10 tokenbuffer_cullsorted | 540 | 120 | 760 | 120 The results are of course very scene dependent; this model was specifically chosen as it is made of many parts with very few triangles. If the complexity per drawcall were higher (say more triangles or complex shading), then the CPU impact would be lower and we would be GPU-bound. However the CPU time recovered by faster submission mechanisms can always be used elsewhere. So even if we are GPU-bound, time should not be wasted. We can see that the "token" techniques do very well and are never CPU-bound, and the "indexedmdi" technique is also quite good. This technique is especially useful for very high-frequency parameters, for example when rendering "id-buffers" for selection, but also for matrix indices. For general use-cases, working with uborange binds is recommended. *shademode: solid with edges* Unless "sorted", around 5000 toggles are done between triangles/line rendering. The shader is manipulated through an immediate vertex attribute to toggle between lit/unlit rendering respectively. renderer | GPU time | CPU time | GPU time | CPU time (microseconds) ------------ | ------------- | ------------- | ------------- | ------------- **strategy** | **material-** | **-groups** | **drawcall-** | **-individual** ubosub | 2890 | 3350 | 13000 | 15000 | uborange | 2150 | 3700 | 12500 | 15200 | uborange_bindless | 2150 | 2640 | 8300 | 10000 indexedmdi | 2340 | 2200 | 4050 | 2050 tokenstream | 1860 | 1250 | 3360 | 3200 tokenbuffer | 1750 | 450 | 2650 | 350 tokenlist | 1650 | <10 | 1890 | <10 tokenbuffer_cullsorted | 770 | 120 | 1250 | 120 Compared to the "solid" results, the tokenbuffer and tokenlist techniques show a greater difference in CPU time. ### Model Explosion View The simple viewer allows you to add animation to the scene and artificially increase scene complexity via "clones". ![xplodeclones](https://github.com/nvpro-samples/gl_cadscene_rendertechniques/blob/master/doc/xplodeclones.jpg) To "emulate" typical interaction where users might move objects around or have animated scenes, the sample also implements the matrix transform system sketched on [slide 30](http://on-demand.gputechconf.com/siggraph/2014/presentation/SG4117-OpenGL-Scene-Rendering-Techniques.pdf). The effect works by first moving all object matrices a bit (*xplode-animation.comp.glsl*), and afterwards the transform hierarchy is updated via a system that is implemented in the *transformsystem.cpp / hpp* files. The code is not particularly tuned but naively assumes that upper levels of the hierarchy contain fewer nodes than lower levels (pyramid). Therefore it uses leaf-processing (which redundantly calculates matrices) instead of level-wise processing for the first 10 levels, to avoid dependencies (one small compute task waiting for the previous). Later levels are always processed level-wise. A better strategy would be to switch between the two approaches based on the actual number of nodes per level. The shaders for this are *transform-leaves.comp.glsl* and *transform-level.comp.glsl*. The hierarchy is managed by *nodetree.cpp/hpp*, which stores the tree as array of 32bit values. Each value represents a node, and encodes the "level" in the hierarchy in 8 bits and their parent index in the rest of the bits. Which means you can traverse a node up to the root: ``` cpp // sample traversal of "idx" node to root self = array[idx]; while( self.level != 0) { self = array[self.parent]; } // self is now the top root for the idx node ``` The nodetree also stores two node index lists for each level: one storing all nodes of a level, and one for all leaves in this level. We feed these two index lists to the appropriate shader. When leaf processing is used we append the leaves level-wise, which should minimize divergence within a warp (ideally most threads have the same number of levels to ascend in the hierarchy). Many CAD applications tend to use double-precision matrices, and the system could be adjusted for this. For rendering, however, float matrices should be used. To account for large translation values, one could run a concatenation of view-projection (double) and object-world-matrix (double) per-frame and generate the matrices (float) for actual vertex transforms. To improve memory performance, it might be beneficial to use double only for storing translations within the matrices. > Note: Only the GPU matrices are updated. CPU techniques such as "ubosub" will not show animations. ### Sample Highlights This sample is a bit more complex than most others as it contains several subsystems. Don't hesitate to contact the author if something is unclear (commenting was not a priority ;) ). #### csfviewer.cpp The principle setup of the sample is in this main file. However, most of the interesting bits happen in the renderers. - Sample::think - prepares the frame and calls the renderer's draw function #### renderer... and tokenbase... Each renderer has its own file and is derived from the **Renderer** class in *renderer.hpp* - Renderer::init - some renderers may allocate extra buffers or create their own data structures for the scene. - Renderer::deinit - Renderer::draw The renderers may have additional functions. The "token" renderers using NV_command_list or "indexedmdi", for instance, must create their own scene representation. #### cadscene... The "csf" (cadscene file) format is a simple binary format that encodes a scene as is typical for CAD. It closely matches the description at the beginning of the readme. It is not very sophisticated, and is meant for demo purposes. > *Note*: The **geforce.csf.gz** assembly binary file that ships with this sample **may NOT be redistributed.** #### nodetree... and transform... Implement the matrix hierarchy updates as described in the "model explosion view" section. #### cull... and scan... For files related to culling, it is best to refer to the [gl occlusion cullling](https://github.com/nvpro-samples/gl_occlusion_cullling) sample, as it leverages the same system and focuses on just that topic. *renderertokensortcull.cpp* implements *RendererCullSortToken::CullJobToken::resultFromBits*, which contains the details of how the occlusion results are handled in this sample. The implementation uses the "raster" "temporal" approach. #### statesystem... nvtoken... and nvcommandlist... These files contain helpers when using the NV_command_list extension. Please see [gl commandlist basic](https://github.com/nvpro-samples/gl_commandlist_basic) for a smaller sample. ### Building Ideally, clone this and other interesting [nvpro-samples](https://github.com/nvpro-samples) repositories into a common subdirectory. You will always need [nvpro_core](https://github.com/nvpro-samples/nvpro_core). The nvpro_core is searched either as a subdirectory of the sample, or one directory up. If you are interested in multiple samples, you can use the [build_all](https://github.com/nvpro-samples/build_all) CMAKE as entry point. This will also give you options to enable or disable individual samples when creating the solutions. ### Related Samples [gl commandlist basic](https://github.com/nvpro-samples/gl_commandlist_basic) illustrates the core principle of the NV_command_list extension. [gl occlusion cullling](https://github.com/nvpro-samples/gl_occlusion_cullling) also uses the occlusion system of this sample, but in a simpler usage scenario. When using classic scenegraphs, there is typically a lot of overhead in traversing the scene. For this reason, it is highly recommended to use simpler representations for actual rendering. Consider using flattened hierarchies, arrays, memory-friendly data structures, data-oriented design patterns, and similar techniques. If you are still working with a classic scenegraph, then [nvpro-pipeline](https://github.com/nvpro-pipeline/pipeline) may provide some acceleration strategies to avoid full scenegraph traversal. Some of these strategies are also described in this [GTC 2013 presentation](http://on-demand.gputechconf.com/gtc/2013/presentations/S3032-Advanced-Scenegraph-Rendering-Pipeline.pdf). ================================================ FILE: cadscene.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include "cadscene.hpp" #include #include #include #include #include "glm/gtc/type_ptr.hpp" #define USE_CACHECOMBINE 1 glm::vec4 randomVector(float from, float to) { glm::vec4 vec; float width = to - from; for(int i = 0; i < 4; i++) { vec[i] = from + (float(rand()) / float(RAND_MAX)) * width; } return vec; } static void recursiveHierarchy(NodeTree& tree, CSFile* csf, int idx, int cloneoffset) { for(int i = 0; i < csf->nodes[idx].numChildren; i++) { tree.setNodeParent((NodeTree::nodeID)csf->nodes[idx].children[i] + cloneoffset, (NodeTree::nodeID)idx + cloneoffset); } for(int i = 0; i < csf->nodes[idx].numChildren; i++) { recursiveHierarchy(tree, csf, csf->nodes[idx].children[i], cloneoffset); } } bool CadScene::loadCSF(const char* filename, int clones, int cloneaxis) { CSFile* csf; CSFileMemoryPTR mem = CSFileMemory_new(); if(CSFile_loadExt(&csf, filename, mem) != CADSCENEFILE_NOERROR || !(csf->fileFlags & CADSCENEFILE_FLAG_UNIQUENODES)) { CSFileMemory_delete(mem); return false; } int copies = clones + 1; CSFile_transform(csf); srand(234525); // materials m_materials.resize(csf->numMaterials); for(int n = 0; n < csf->numMaterials; n++) { CSFMaterial* csfmaterial = &csf->materials[n]; Material& material = m_materials[n]; for(int i = 0; i < 2; i++) { material.sides[i].ambient = randomVector(0.0f, 0.1f); material.sides[i].diffuse = glm::make_vec4(csf->materials[n].color) + randomVector(0.0f, 0.07f); material.sides[i].specular = randomVector(0.25f, 0.55f); material.sides[i].emissive = randomVector(0.0f, 0.05f); } } glCreateBuffers(1, &m_materialsGL); glNamedBufferStorage(m_materialsGL, sizeof(Material) * m_materials.size(), &m_materials[0], 0); //glMapNamedBufferRange(m_materialsGL, 0, sizeof(Material) * m_materials.size(), GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT); // geometry int numGeoms = csf->numGeometries; m_geometry.resize(csf->numGeometries * copies); m_geometryBboxes.resize(csf->numGeometries * copies); for(int n = 0; n < csf->numGeometries; n++) { CSFGeometry* csfgeom = &csf->geometries[n]; Geometry& geom = m_geometry[n]; geom.cloneIdx = -1; geom.numVertices = csfgeom->numVertices; geom.numIndexSolid = csfgeom->numIndexSolid; geom.numIndexWire = csfgeom->numIndexWire; std::vector vertices(csfgeom->numVertices); for(int i = 0; i < csfgeom->numVertices; i++) { vertices[i].position[0] = csfgeom->vertex[3 * i + 0]; vertices[i].position[1] = csfgeom->vertex[3 * i + 1]; vertices[i].position[2] = csfgeom->vertex[3 * i + 2]; vertices[i].position[3] = 1.0f; if(csfgeom->normal) { vertices[i].normal[0] = csfgeom->normal[3 * i + 0]; vertices[i].normal[1] = csfgeom->normal[3 * i + 1]; vertices[i].normal[2] = csfgeom->normal[3 * i + 2]; vertices[i].normal[3] = 0.0f; } else { vertices[i].normal = glm::vec4(normalize(glm::vec3(vertices[i].position)), 0.0f); } m_geometryBboxes[n].merge(vertices[i].position); } geom.vboSize = sizeof(Vertex) * vertices.size(); glCreateBuffers(1, &geom.vboGL); glNamedBufferStorage(geom.vboGL, geom.vboSize, &vertices[0], 0); std::vector indices(csfgeom->numIndexSolid + csfgeom->numIndexWire); memcpy(&indices[0], csfgeom->indexSolid, sizeof(GLuint) * csfgeom->numIndexSolid); if(csfgeom->indexWire) { memcpy(&indices[csfgeom->numIndexSolid], csfgeom->indexWire, sizeof(GLuint) * csfgeom->numIndexWire); } geom.iboSize = sizeof(GLuint) * indices.size(); glCreateBuffers(1, &geom.iboGL); glNamedBufferStorage(geom.iboGL, geom.iboSize, &indices[0], 0); if(has_GL_NV_vertex_buffer_unified_memory) { glGetNamedBufferParameterui64vNV(geom.vboGL, GL_BUFFER_GPU_ADDRESS_NV, &geom.vboADDR); glMakeNamedBufferResidentNV(geom.vboGL, GL_READ_ONLY); glGetNamedBufferParameterui64vNV(geom.iboGL, GL_BUFFER_GPU_ADDRESS_NV, &geom.iboADDR); glMakeNamedBufferResidentNV(geom.iboGL, GL_READ_ONLY); } geom.parts.resize(csfgeom->numParts); size_t offsetSolid = 0; size_t offsetWire = csfgeom->numIndexSolid * sizeof(GLuint); for(int i = 0; i < csfgeom->numParts; i++) { geom.parts[i].indexWire.count = csfgeom->parts[i].numIndexWire; geom.parts[i].indexSolid.count = csfgeom->parts[i].numIndexSolid; geom.parts[i].indexWire.offset = offsetWire; geom.parts[i].indexSolid.offset = offsetSolid; offsetWire += csfgeom->parts[i].numIndexWire * sizeof(GLuint); offsetSolid += csfgeom->parts[i].numIndexSolid * sizeof(GLuint); } } for(int c = 1; c <= clones; c++) { for(int n = 0; n < numGeoms; n++) { m_geometryBboxes[n + numGeoms * c] = m_geometryBboxes[n]; const Geometry& geomorig = m_geometry[n]; Geometry& geom = m_geometry[n + numGeoms * c]; geom = geomorig; #if 1 geom.cloneIdx = n; #else geom.cloneIdx = -1; glCreateBuffers(1, &geom.vboGL); glNamedBufferStorage(geom.vboGL, geom.vboSize, 0, 0); glCreateBuffers(1, &geom.iboGL); glNamedBufferStorage(geom.iboGL, geom.iboSize, 0, 0); if(has_GL_NV_vertex_buffer_unified_memory) { glGetNamedBufferParameterui64vNV(geom.vboGL, GL_BUFFER_GPU_ADDRESS_NV, &geom.vboADDR); glMakeNamedBufferResidentNV(geom.vboGL, GL_READ_ONLY); glGetNamedBufferParameterui64vNV(geom.iboGL, GL_BUFFER_GPU_ADDRESS_NV, &geom.iboADDR); glMakeNamedBufferResidentNV(geom.iboGL, GL_READ_ONLY); } glCopyNamedBufferSubData(geomorig.vboGL, geom.vboGL, 0, 0, geom.vboSize); glCopyNamedBufferSubData(geomorig.iboGL, geom.iboGL, 0, 0, geom.iboSize); #endif } } glCreateBuffers(1, &m_geometryBboxesGL); glNamedBufferStorage(m_geometryBboxesGL, sizeof(BBox) * m_geometryBboxes.size(), &m_geometryBboxes[0], 0); glCreateTextures(GL_TEXTURE_BUFFER, 1, &m_geometryBboxesTexGL); glTextureBuffer(m_geometryBboxesTexGL, GL_RGBA32F, m_geometryBboxesGL); // nodes int numObjects = 0; m_matrices.resize(csf->numNodes * copies); for(int n = 0; n < csf->numNodes; n++) { CSFNode* csfnode = &csf->nodes[n]; memcpy(glm::value_ptr(m_matrices[n].objectMatrix), csfnode->objectTM, sizeof(float) * 16); memcpy(glm::value_ptr(m_matrices[n].worldMatrix), csfnode->worldTM, sizeof(float) * 16); m_matrices[n].objectMatrixIT = glm::transpose(glm::inverse(m_matrices[n].objectMatrix)); m_matrices[n].worldMatrixIT = glm::transpose(glm::inverse(m_matrices[n].worldMatrix)); if(csfnode->geometryIDX < 0) continue; numObjects++; } // objects m_objects.resize(numObjects * copies); m_objectAssigns.resize(numObjects * copies); numObjects = 0; for(int n = 0; n < csf->numNodes; n++) { CSFNode* csfnode = &csf->nodes[n]; if(csfnode->geometryIDX < 0) continue; Object& object = m_objects[numObjects]; object.matrixIndex = n; object.geometryIndex = csfnode->geometryIDX; m_objectAssigns[numObjects] = glm::ivec2(object.matrixIndex, object.geometryIndex); object.parts.resize(csfnode->numParts); for(int i = 0; i < csfnode->numParts; i++) { object.parts[i].active = 1; object.parts[i].matrixIndex = csfnode->parts[i].nodeIDX < 0 ? object.matrixIndex : csfnode->parts[i].nodeIDX; object.parts[i].materialIndex = csfnode->parts[i].materialIDX; } BBox bbox = m_geometryBboxes[object.geometryIndex].transformed(m_matrices[n].worldMatrix); m_bbox.merge(bbox); updateObjectDrawCache(object); numObjects++; } // compute clone move delta based on m_bbox; glm::vec4 dim = m_bbox.max - m_bbox.min; int sq = 1; int numAxis = 0; for(int i = 0; i < 3; i++) { numAxis += (cloneaxis & (1 << i)) ? 1 : 0; } assert(numAxis); switch(numAxis) { case 1: sq = copies; break; case 2: while(sq * sq < copies) { sq++; } break; case 3: while(sq * sq * sq < copies) { sq++; } break; } for(int c = 1; c <= clones; c++) { int numNodes = csf->numNodes; glm::vec4 shift = dim * 1.05f; float u = 0; float v = 0; float w = 0; switch(numAxis) { case 1: u = float(c); break; case 2: u = float(c % sq); v = float(c / sq); break; case 3: u = float(c % sq); v = float((c / sq) % sq); w = float(c / (sq * sq)); break; } float use = u; if(cloneaxis & (1 << 0)) { shift.x *= -use; if(numAxis > 1) use = v; } else { shift.x = 0; } if(cloneaxis & (1 << 1)) { shift.y *= use; if(numAxis > 2) use = w; else if(numAxis > 1) use = v; } else { shift.y = 0; } if(cloneaxis & (1 << 2)) { shift.z *= -use; } else { shift.z = 0; } shift.w = 0; // move all world matrices for(int n = 0; n < numNodes; n++) { MatrixNode& node = m_matrices[n + numNodes * c]; MatrixNode& nodeOrig = m_matrices[n]; node = nodeOrig; node.worldMatrix[3] = node.worldMatrix[3] + shift; node.worldMatrixIT = glm::transpose(glm::inverse(node.worldMatrix)); } { // patch object matrix of root MatrixNode& node = m_matrices[csf->rootIDX + numNodes * c]; node.objectMatrix[3] = node.objectMatrix[3] + shift; node.objectMatrixIT = glm::transpose(glm::inverse(node.objectMatrix)); } // clone objects for(int n = 0; n < numObjects; n++) { const Object& objectorig = m_objects[n]; Object& object = m_objects[n + numObjects * c]; object = objectorig; object.geometryIndex += c * numGeoms; object.matrixIndex += c * numNodes; for(size_t i = 0; i < object.parts.size(); i++) { object.parts[i].matrixIndex += c * numNodes; } for(size_t i = 0; i < object.cacheSolid.state.size(); i++) { object.cacheSolid.state[i].matrixIndex += c * numNodes; } for(size_t i = 0; i < object.cacheWire.state.size(); i++) { object.cacheWire.state[i].matrixIndex += c * numNodes; } m_objectAssigns[n + numObjects * c] = glm::ivec2(object.matrixIndex, object.geometryIndex); } } glCreateBuffers(1, &m_matricesGL); glNamedBufferStorage(m_matricesGL, sizeof(MatrixNode) * m_matrices.size(), &m_matrices[0], 0); //glMapNamedBufferRange(m_matricesGL, 0, sizeof(MatrixNode) * m_matrices.size(), GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT); glCreateTextures(GL_TEXTURE_BUFFER, 1, &m_matricesTexGL); glTextureBuffer(m_matricesTexGL, GL_RGBA32F, m_matricesGL); glCreateBuffers(1, &m_objectAssignsGL); glNamedBufferStorage(m_objectAssignsGL, sizeof(glm::ivec2) * m_objectAssigns.size(), &m_objectAssigns[0], 0); if(has_GL_NV_vertex_buffer_unified_memory) { glGetNamedBufferParameterui64vNV(m_materialsGL, GL_BUFFER_GPU_ADDRESS_NV, &m_materialsADDR); glMakeNamedBufferResidentNV(m_materialsGL, GL_READ_ONLY); glGetNamedBufferParameterui64vNV(m_matricesGL, GL_BUFFER_GPU_ADDRESS_NV, &m_matricesADDR); glMakeNamedBufferResidentNV(m_matricesGL, GL_READ_ONLY); if(has_GL_ARB_bindless_texture) { m_matricesTexGLADDR = glGetTextureHandleARB(m_matricesTexGL); glMakeTextureHandleResidentARB(m_matricesTexGLADDR); } } m_nodeTree.create(copies * csf->numNodes); for(int i = 0; i < copies; i++) { int cloneoffset = (csf->numNodes) * i; int root = csf->rootIDX + cloneoffset; recursiveHierarchy(m_nodeTree, csf, csf->rootIDX, cloneoffset); m_nodeTree.setNodeParent((NodeTree::nodeID)root, m_nodeTree.getTreeRoot()); m_nodeTree.addToTree((NodeTree::nodeID)root); } glCreateBuffers(1, &m_parentIDsGL); glNamedBufferStorage(m_parentIDsGL, m_nodeTree.getTreeCompactNodes().size() * sizeof(GLuint), &m_nodeTree.getTreeCompactNodes()[0], 0); glCreateBuffers(1, &m_matricesOrigGL); glNamedBufferStorage(m_matricesOrigGL, sizeof(MatrixNode) * m_matrices.size(), &m_matrices[0], 0); glCreateTextures(GL_TEXTURE_BUFFER, 1, &m_matricesOrigTexGL); glTextureBuffer(m_matricesOrigTexGL, GL_RGBA32F, m_matricesOrigGL); CSFileMemory_delete(mem); return true; } struct ListItem { CadScene::DrawStateInfo state; CadScene::DrawRange range; }; static bool ListItem_compare(const ListItem& a, const ListItem& b) { int diff = 0; diff = diff != 0 ? diff : (a.state.materialIndex - b.state.materialIndex); diff = diff != 0 ? diff : (a.state.matrixIndex - b.state.matrixIndex); diff = diff != 0 ? diff : int(a.range.offset - b.range.offset); return diff < 0; } static void fillCache(CadScene::DrawRangeCache& cache, const std::vector& list) { cache = CadScene::DrawRangeCache(); if(!list.size()) return; CadScene::DrawStateInfo state = list[0].state; CadScene::DrawRange range = list[0].range; int stateCount = 0; for(size_t i = 1; i < list.size() + 1; i++) { bool newrange = false; if(i == list.size() || list[i].state != state) { // push range stateCount++; cache.offsets.push_back(range.offset); cache.counts.push_back(range.count); // emit cache.state.push_back(state); cache.stateCount.push_back(stateCount); stateCount = 0; if(i == list.size()) { break; } else { state = list[i].state; range.offset = list[i].range.offset; range.count = 0; newrange = true; } } const CadScene::DrawRange& currange = list[i].range; if(newrange || (USE_CACHECOMBINE && currange.offset == (range.offset + sizeof(GLuint) * range.count))) { // merge range.count += currange.count; } else { // push stateCount++; cache.offsets.push_back(range.offset); cache.counts.push_back(range.count); range = currange; } } } void CadScene::updateObjectDrawCache(Object& object) { Geometry& geom = m_geometry[object.geometryIndex]; std::vector listSolid; std::vector listWire; listSolid.reserve(geom.parts.size()); listWire.reserve(geom.parts.size()); for(size_t i = 0; i < geom.parts.size(); i++) { if(!object.parts[i].active) continue; ListItem item; item.state.materialIndex = object.parts[i].materialIndex; item.range = geom.parts[i].indexSolid; item.state.matrixIndex = object.parts[i].matrixIndex; listSolid.push_back(item); item.range = geom.parts[i].indexWire; item.state.matrixIndex = object.parts[i].matrixIndex; listWire.push_back(item); } std::sort(listSolid.begin(), listSolid.end(), ListItem_compare); std::sort(listWire.begin(), listWire.end(), ListItem_compare); fillCache(object.cacheSolid, listSolid); fillCache(object.cacheWire, listWire); } void CadScene::enableVertexFormat(int attrPos, int attrNormal) { glVertexAttribFormat(attrPos, 3, GL_FLOAT, GL_FALSE, 0); glVertexAttribFormat(attrNormal, 3, GL_FLOAT, GL_FALSE, offsetof(CadScene::Vertex, normal)); glVertexAttribBinding(attrPos, 0); glVertexAttribBinding(attrNormal, 0); glEnableVertexAttribArray(attrPos); glEnableVertexAttribArray(attrNormal); glBindVertexBuffer(0, 0, 0, sizeof(CadScene::Vertex)); } void CadScene::disableVertexFormat(int attrPos, int attrNormal) { glDisableVertexAttribArray(attrPos); glDisableVertexAttribArray(attrNormal); glBindVertexBuffer(0, 0, 0, sizeof(CadScene::Vertex)); } void CadScene::unload() { if(m_geometry.empty()) return; glFinish(); if(has_GL_NV_vertex_buffer_unified_memory) { if(has_GL_ARB_bindless_texture) { glMakeTextureHandleNonResidentARB(m_matricesTexGLADDR); } glMakeNamedBufferNonResidentNV(m_matricesGL); glMakeNamedBufferNonResidentNV(m_materialsGL); } glDeleteTextures(1, &m_matricesOrigTexGL); glDeleteTextures(1, &m_matricesTexGL); glDeleteTextures(1, &m_geometryBboxesTexGL); glDeleteBuffers(1, &m_matricesOrigGL); glDeleteBuffers(1, &m_matricesGL); glDeleteBuffers(1, &m_materialsGL); glDeleteBuffers(1, &m_objectAssignsGL); glDeleteBuffers(1, &m_geometryBboxesGL); glDeleteBuffers(1, &m_parentIDsGL); for(size_t i = 0; i < m_geometry.size(); i++) { if(m_geometry[i].cloneIdx >= 0) continue; if(has_GL_NV_vertex_buffer_unified_memory) { glMakeNamedBufferNonResidentNV(m_geometry[i].iboGL); glMakeNamedBufferNonResidentNV(m_geometry[i].vboGL); } glDeleteBuffers(1, &m_geometry[i].iboGL); glDeleteBuffers(1, &m_geometry[i].vboGL); } m_matrices.clear(); m_geometryBboxes.clear(); m_geometry.clear(); m_objectAssigns.clear(); m_objects.clear(); m_geometryBboxes.clear(); m_nodeTree.clear(); glFinish(); } void CadScene::resetMatrices() { glCopyNamedBufferSubData(m_matricesOrigGL, m_matricesGL, 0, 0, sizeof(CadScene::MatrixNode) * m_matrices.size()); } ================================================ FILE: cadscene.hpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #ifndef CADSCENE_H__ #define CADSCENE_H__ #include // memset #include #include #include #include "nodetree.hpp" class CadScene { public: struct BBox { glm::vec4 min; glm::vec4 max; BBox() : min(FLT_MAX), max(-FLT_MAX) {} inline void merge( const glm::vec4& point ) { min = glm::min(min, point); max = glm::max(max, point); } inline void merge( const BBox& bbox ) { min = glm::min(min, bbox.min); max = glm::max(max, bbox.max); } inline BBox transformed ( const glm::mat4 &matrix, int dim=3) { int i; glm::vec4 box[16]; // create box corners box[0] = glm::vec4(min.x,min.y,min.z,min.w); box[1] = glm::vec4(max.x,min.y,min.z,min.w); box[2] = glm::vec4(min.x,max.y,min.z,min.w); box[3] = glm::vec4(max.x,max.y,min.z,min.w); box[4] = glm::vec4(min.x,min.y,max.z,min.w); box[5] = glm::vec4(max.x,min.y,max.z,min.w); box[6] = glm::vec4(min.x,max.y,max.z,min.w); box[7] = glm::vec4(max.x,max.y,max.z,min.w); box[8] = glm::vec4(min.x,min.y,min.z,max.w); box[9] = glm::vec4(max.x,min.y,min.z,max.w); box[10] = glm::vec4(min.x,max.y,min.z,max.w); box[11] = glm::vec4(max.x,max.y,min.z,max.w); box[12] = glm::vec4(min.x,min.y,max.z,max.w); box[13] = glm::vec4(max.x,min.y,max.z,max.w); box[14] = glm::vec4(min.x,max.y,max.z,max.w); box[15] = glm::vec4(max.x,max.y,max.z,max.w); // transform box corners // and find new mins,maxs BBox bbox; for (i = 0; i < (1< state; std::vector stateCount; std::vector offsets; std::vector counts; }; struct GeometryPart { DrawRange indexSolid; DrawRange indexWire; }; struct Geometry { GLuint vboGL; GLuint iboGL; GLuint64 vboADDR; GLuint64 iboADDR; size_t vboSize; size_t iboSize; std::vector parts; int numVertices; int numIndexSolid; int numIndexWire; int cloneIdx; }; struct ObjectPart { int active; int materialIndex; int matrixIndex; }; struct Object { int matrixIndex; int geometryIndex; std::vector parts; DrawRangeCache cacheSolid; DrawRangeCache cacheWire; }; std::vector m_materials; std::vector m_geometryBboxes; std::vector m_geometry; std::vector m_matrices; std::vector m_objects; std::vector m_objectAssigns; BBox m_bbox; GLuint m_materialsGL; GLuint64 m_materialsADDR; GLuint m_matricesGL; GLuint64 m_matricesADDR; GLuint m_matricesTexGL; GLuint64 m_matricesTexGLADDR; GLuint m_geometryBboxesGL; GLuint m_geometryBboxesTexGL; GLuint m_objectAssignsGL; GLuint m_parentIDsGL; GLuint m_matricesOrigGL; GLuint m_matricesOrigTexGL; NodeTree m_nodeTree; void updateObjectDrawCache(Object& object); bool loadCSF(const char* filename, int clones = 0, int cloneaxis=3); void unload(); static void enableVertexFormat(int attrPos, int attrNormal); static void disableVertexFormat(int attrPos, int attrNormal); void resetMatrices(); }; #endif ================================================ FILE: common.h ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #define VERTEX_POS 0 #define VERTEX_NORMAL 1 #define VERTEX_ASSIGNS 2 #define VERTEX_WIREMODE 3 #define UBO_SCENE 0 #define UBO_MATRIX 1 #define UBO_MATERIAL 2 #define TEX_MATRICES 0 #define USE_BASEINSTANCE 0 //#define UNI_WIREFRAME 0 #ifdef __cplusplus namespace csfviewer { using namespace glm; #endif struct SceneData { mat4 viewProjMatrix; mat4 viewMatrix; mat4 viewMatrixIT; vec4 viewPos; vec4 viewDir; vec4 wLightPos; ivec2 viewport; uvec2 tboMatrices; }; #ifdef __cplusplus } #endif #if defined(GL_core_profile) || defined(GL_compatibility_profile) || defined(GL_es_profile) #extension GL_NV_command_list : enable #if GL_NV_command_list layout(commandBindableNV) uniform; #endif // prevent this to be used by c++ layout(std140,binding=UBO_SCENE) uniform sceneBuffer { SceneData scene; }; // must match cadscene! layout(std140,binding=UBO_MATRIX) uniform matrixBuffer { mat4 worldMatrix; mat4 worldMatrixIT; mat4 objectMatrix; mat4 objectMatrixIT; } object; #extension GL_ARB_bindless_texture : enable #extension GL_NV_bindless_texture : enable #if GL_NV_bindless_texture #define matricesBuffer samplerBuffer(scene.tboMatrices) #else layout(binding=TEX_MATRICES) uniform samplerBuffer matricesBuffer; #endif // must match cadscene! #define NODE_MATRIX_WORLD 0 #define NODE_MATRIX_WORLDIT 1 #define NODE_MATRIX_OBJECT 2 #define NODE_MATRIX_OBJECTIT 3 #define NODE_MATRICES 4 mat4 getIndexedMatrix(int idx, int what) { int i = idx * NODE_MATRICES + what; return mat4( texelFetch(matricesBuffer, i*4 + 0), texelFetch(matricesBuffer, i*4 + 1), texelFetch(matricesBuffer, i*4 + 2), texelFetch(matricesBuffer, i*4 + 3)); } #endif ================================================ FILE: csf.cpp ================================================ /* * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #define CSF_IMPLEMENTATION #define CSF_SUPPORT_GLTF2 1 #define CSF_SUPPORT_FILEMAPPING 1 #include #define CGLTF_IMPLEMENTATION #include ================================================ FILE: csfviewer.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #define DEBUG_FILTER 1 #include #include #include #include #include #include #include #include #include #include #include #include #include "transformsystem.hpp" #include "cadscene.hpp" #include "renderer.hpp" #include #include "common.h" #include "glm/gtc/matrix_access.hpp" #include "glm/gtc/type_ptr.hpp" namespace csfviewer { int const SAMPLE_SIZE_WIDTH(800); int const SAMPLE_SIZE_HEIGHT(600); int const SAMPLE_MAJOR_VERSION(4); int const SAMPLE_MINOR_VERSION(5); class Sample : public nvgl::AppWindowProfilerGL { public: enum GuiEnums { GUI_RENDERER, GUI_MSAA, GUI_SHADE, GUI_STRATEGY, }; struct { nvgl::ProgramID draw_object, draw_object_tris, draw_object_line, draw_object_indexed, draw_object_indexed_tris, draw_object_indexed_line, cull_object_frustum, cull_object_hiz, cull_object_raster, cull_bit_temporallast, cull_bit_temporalnew, cull_bit_regular, cull_depth_mips, scan_prefixsum, scan_offsets, scan_combine, transform_leaves, transform_level, xplode; } programs; struct { GLuint scene = 0; GLuint scene2 = 0; } fbos; struct { GLuint scene_ubo = 0; } buffers; struct { GLuint64 scene_ubo; } addresses; struct { GLuint scene_color = 0; GLuint scene_color2 = 0; GLuint scene_depthstencil = 0; GLuint scene_depthstencil2 = 0; } textures; struct Tweak { int renderer = 0; ShadeType shade = SHADE_SOLID; Strategy strategy = STRATEGY_GROUPS; int clones = 0; bool cloneaxisX = true; bool cloneaxisY = true; bool cloneaxisZ = false; bool animateActive = false; float animateMin = 1; float animateDelta = 1; int zoom = 100; int msaa = 0; bool noUI = false; }; nvgl::ProgramManager m_progManager; ImGuiH::Registry m_ui; double m_uiTime = 0; Tweak m_tweak; Tweak m_lastTweak; std::string m_modelFilename; SceneData m_sceneUbo; CadScene m_scene; TransformSystem m_transformSystem; GLuint m_xplodeGroupSize; std::vector m_renderersSorted; std::string m_rendererName; Renderer* NV_RESTRICT m_renderer; Resources m_resources; size_t m_stateChangeID; void updateProgramDefine(); bool initProgram(); bool initScene(const char* filename, int clones, int cloneaxis); bool initFramebuffers(int width, int height); void initRenderer(int type, Strategy strategy); void deinitRenderer(); void getCullPrograms(CullingSystem::Programs& cullprograms); void getScanPrograms(ScanSystem::Programs& scanprograms); void getTransformPrograms(TransformSystem::Programs& xfromPrograms); void updatedPrograms(); void setupConfigParameters(); void setRendererFromName(); public: Sample() { setupConfigParameters(); } bool validateConfig() override; bool begin() override; void think(double time) override; void resize(int width, int height) override; void processUI(double time); nvh::CameraControl m_control; void end() override { ImGui::ShutdownGL(); } // return true to prevent m_windowState updates bool mouse_pos(int x, int y) override { if(m_tweak.noUI) return false; return ImGuiH::mouse_pos(x, y); } bool mouse_button(int button, int action) override { if(m_tweak.noUI) return false; return ImGuiH::mouse_button(button, action); } bool mouse_wheel(int wheel) override { if(m_tweak.noUI) return false; return ImGuiH::mouse_wheel(wheel); } bool key_char(int button) override { if(m_tweak.noUI) return false; return ImGuiH::key_char(button); } bool key_button(int button, int action, int mods) override { if(m_tweak.noUI) return false; return ImGuiH::key_button(button, action, mods); } }; void Sample::updateProgramDefine() {} void Sample::getTransformPrograms(TransformSystem::Programs& xformPrograms) { xformPrograms.transform_leaves = m_progManager.get(programs.transform_leaves); xformPrograms.transform_level = m_progManager.get(programs.transform_level); } void Sample::getCullPrograms(CullingSystem::Programs& cullprograms) { cullprograms.bit_regular = m_progManager.get(programs.cull_bit_regular); cullprograms.bit_temporallast = m_progManager.get(programs.cull_bit_temporallast); cullprograms.bit_temporalnew = m_progManager.get(programs.cull_bit_temporalnew); cullprograms.depth_mips = m_progManager.get(programs.cull_depth_mips); cullprograms.object_frustum = m_progManager.get(programs.cull_object_frustum); cullprograms.object_hiz = m_progManager.get(programs.cull_object_hiz); cullprograms.object_raster = m_progManager.get(programs.cull_object_raster); } void Sample::getScanPrograms(ScanSystem::Programs& scanprograms) { scanprograms.prefixsum = m_progManager.get(programs.scan_prefixsum); scanprograms.offsets = m_progManager.get(programs.scan_offsets); scanprograms.combine = m_progManager.get(programs.scan_combine); } bool Sample::initProgram() { bool validated(true); m_progManager.m_filetype = nvh::ShaderFileManager::FILETYPE_GLSL; m_progManager.addDirectory(std::string("GLSL_" PROJECT_NAME)); m_progManager.addDirectory(exePath() + std::string(PROJECT_RELDIRECTORY)); m_progManager.registerInclude("common.h"); updateProgramDefine(); programs.draw_object = m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "scene.vert.glsl"), nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "scene.frag.glsl")); programs.draw_object_tris = m_progManager.createProgram( nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define WIREMODE 0\n", "scene.vert.glsl"), nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "#define WIREMODE 0\n", "scene.frag.glsl")); programs.draw_object_line = m_progManager.createProgram( nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define WIREMODE 1\n", "scene.vert.glsl"), nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "#define WIREMODE 1\n", "scene.frag.glsl")); programs.draw_object_indexed = m_progManager.createProgram( nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define USE_INDEXING 1\n", "scene.vert.glsl"), nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "#define USE_INDEXING 1\n", "scene.frag.glsl")); programs.draw_object_indexed_tris = m_progManager.createProgram( nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define USE_INDEXING 1\n#define WIREMODE 0\n", "scene.vert.glsl"), nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "#define USE_INDEXING 1\n#define WIREMODE 0\n", "scene.frag.glsl")); programs.draw_object_indexed_line = m_progManager.createProgram( nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define USE_INDEXING 1\n#define WIREMODE 1\n", "scene.vert.glsl"), nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "#define USE_INDEXING 1\n#define WIREMODE 1\n", "scene.frag.glsl")); programs.cull_object_raster = m_progManager.createProgram( nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define DUALINDEX 1\n#define MATRICES 4\n", "cull-raster.vert.glsl"), nvgl::ProgramManager::Definition(GL_GEOMETRY_SHADER, "#define DUALINDEX 1\n#define MATRICES 4\n", "cull-raster.geo.glsl"), nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "#define DUALINDEX 1\n#define MATRICES 4\n", "cull-raster.frag.glsl")); programs.cull_object_frustum = m_progManager.createProgram(nvgl::ProgramManager::Definition( GL_VERTEX_SHADER, "#define DUALINDEX 1\n#define MATRICES 4\n", "cull-xfb.vert.glsl")); programs.cull_object_hiz = m_progManager.createProgram(nvgl::ProgramManager::Definition( GL_VERTEX_SHADER, "#define DUALINDEX 1\n#define MATRICES 4\n#define OCCLUSION\n", "cull-xfb.vert.glsl")); programs.cull_bit_regular = m_progManager.createProgram( nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define TEMPORAL 0\n", "cull-bitpack.vert.glsl")); programs.cull_bit_temporallast = m_progManager.createProgram( nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define TEMPORAL TEMPORAL_LAST\n", "cull-bitpack.vert.glsl")); programs.cull_bit_temporalnew = m_progManager.createProgram( nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "#define TEMPORAL TEMPORAL_NEW\n", "cull-bitpack.vert.glsl")); programs.cull_depth_mips = m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "cull-downsample.vert.glsl"), nvgl::ProgramManager::Definition(GL_FRAGMENT_SHADER, "cull-downsample.frag.glsl")); programs.scan_prefixsum = m_progManager.createProgram( nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, "#define TASK TASK_SUM\n", "scan.comp.glsl")); programs.scan_offsets = m_progManager.createProgram( nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, "#define TASK TASK_OFFSETS\n", "scan.comp.glsl")); programs.scan_combine = m_progManager.createProgram( nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, "#define TASK TASK_COMBINE\n", "scan.comp.glsl")); programs.transform_leaves = m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, "transform-leaves.comp.glsl")); programs.transform_level = m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, "transform-level.comp.glsl")); programs.xplode = m_progManager.createProgram(nvgl::ProgramManager::Definition(GL_COMPUTE_SHADER, "xplode-animation.comp.glsl")); validated = m_progManager.areProgramsValid(); return validated; } bool Sample::initScene(const char* filename, int clones, int cloneaxis) { m_scene.unload(); if(buffers.scene_ubo && has_GL_NV_shader_buffer_load) { glMakeNamedBufferNonResidentNV(buffers.scene_ubo); } nvgl::newBuffer(buffers.scene_ubo); glNamedBufferStorage(buffers.scene_ubo, sizeof(SceneData), NULL, GL_DYNAMIC_STORAGE_BIT); if(has_GL_NV_shader_buffer_load) { glGetNamedBufferParameterui64vNV(buffers.scene_ubo, GL_BUFFER_GPU_ADDRESS_NV, &addresses.scene_ubo); glMakeNamedBufferResidentNV(buffers.scene_ubo, GL_READ_ONLY); } m_resources.sceneUbo = buffers.scene_ubo; m_resources.sceneAddr = addresses.scene_ubo; m_resources.stateChangeID++; bool status = m_scene.loadCSF(filename, clones, cloneaxis); LOGI("\nscene %s\n", filename); LOGI("geometries: %6d\n", (uint32_t)m_scene.m_geometry.size()); LOGI("materials: %6d\n", (uint32_t)m_scene.m_materials.size()); LOGI("nodes: %6d\n", (uint32_t)m_scene.m_matrices.size()); LOGI("objects: %6d\n", (uint32_t)m_scene.m_objects.size()); LOGI("\n"); return status; } bool Sample::initFramebuffers(int width, int height) { bool layered = true; if(!fbos.scene || m_tweak.msaa != m_lastTweak.msaa) { nvgl::newFramebuffer(fbos.scene); nvgl::newFramebuffer(fbos.scene2); m_resources.fbo = fbos.scene; m_resources.fbo2 = fbos.scene2; m_resources.stateChangeID++; } if(layered) { if(has_GL_NV_bindless_texture && textures.scene_color) { glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_color)); glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_depthstencil)); } nvgl::newTexture(textures.scene_color, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY); nvgl::newTexture(textures.scene_depthstencil, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY); if(m_tweak.msaa) { glTextureStorage3DMultisample(textures.scene_color, m_tweak.msaa, GL_RGBA8, width, height, 2, GL_TRUE); glTextureStorage3DMultisample(textures.scene_depthstencil, m_tweak.msaa, GL_DEPTH24_STENCIL8, width, height, 2, GL_TRUE); } else { glTextureStorage3D(textures.scene_color, 1, GL_RGBA8, width, height, 2); glTextureStorage3D(textures.scene_depthstencil, 1, GL_DEPTH24_STENCIL8, width, height, 2); } glNamedFramebufferTextureLayer(fbos.scene, GL_COLOR_ATTACHMENT0, textures.scene_color, 0, 0); glNamedFramebufferTextureLayer(fbos.scene, GL_DEPTH_STENCIL_ATTACHMENT, textures.scene_depthstencil, 0, 0); glNamedFramebufferTextureLayer(fbos.scene2, GL_COLOR_ATTACHMENT0, textures.scene_color, 0, 1); glNamedFramebufferTextureLayer(fbos.scene2, GL_DEPTH_STENCIL_ATTACHMENT, textures.scene_depthstencil, 0, 1); if(has_GL_NV_bindless_texture) { glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_color)); glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_depthstencil)); } } else { if(has_GL_NV_bindless_texture && textures.scene_color) { glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_color)); glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_depthstencil)); glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_color2)); glMakeTextureHandleNonResidentNV(glGetTextureHandleNV(textures.scene_depthstencil2)); } nvgl::newTexture(textures.scene_color, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D); nvgl::newTexture(textures.scene_depthstencil, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D); if(m_tweak.msaa) { glTextureStorage2DMultisample(textures.scene_color, 1, GL_RGBA8, width, height, GL_TRUE); glTextureStorage2DMultisample(textures.scene_depthstencil, 1, GL_DEPTH24_STENCIL8, width, height, GL_TRUE); } else { glTextureStorage2D(textures.scene_color, 1, GL_RGBA8, width, height); glTextureStorage2D(textures.scene_depthstencil, 1, GL_DEPTH24_STENCIL8, width, height); } glNamedFramebufferTexture(fbos.scene, GL_COLOR_ATTACHMENT0, textures.scene_color, 0); glNamedFramebufferTexture(fbos.scene, GL_DEPTH_STENCIL_ATTACHMENT, textures.scene_depthstencil, 0); nvgl::newTexture(textures.scene_color2, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D); nvgl::newTexture(textures.scene_depthstencil2, m_tweak.msaa ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D); if(m_tweak.msaa) { glTextureStorage2DMultisample(textures.scene_color2, 1, GL_RGBA8, width, height, GL_TRUE); glTextureStorage2DMultisample(textures.scene_depthstencil2, 1, GL_DEPTH24_STENCIL8, width, height, GL_TRUE); } else { glTextureStorage2D(textures.scene_color2, 1, GL_RGBA8, width, height); glTextureStorage2D(textures.scene_depthstencil2, 1, GL_DEPTH24_STENCIL8, width, height); } glNamedFramebufferTexture(fbos.scene2, GL_COLOR_ATTACHMENT0, textures.scene_color2, 0); glNamedFramebufferTexture(fbos.scene2, GL_DEPTH_STENCIL_ATTACHMENT, textures.scene_depthstencil2, 0); if(has_GL_NV_bindless_texture) { glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_color)); glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_depthstencil)); glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_color2)); glMakeTextureHandleResidentNV(glGetTextureHandleNV(textures.scene_depthstencil2)); } } m_resources.fboTextureChangeID++; return true; } void Sample::deinitRenderer() { if(m_renderer) { m_renderer->deinit(); delete m_renderer; m_renderer = NULL; } } void Sample::initRenderer(int type, Strategy strategy) { deinitRenderer(); Renderer::getRegistry()[m_renderersSorted[type]]->updatedPrograms(m_progManager); m_renderer = Renderer::getRegistry()[m_renderersSorted[type]]->create(); m_renderer->m_strategy = strategy; m_renderer->init(&m_scene, m_resources); } bool Sample::begin() { m_renderer = NULL; m_stateChangeID = 0; ImGuiH::Init(m_windowState.m_winSize[0], m_windowState.m_winSize[1], this); ImGui::InitGL(); glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glEnable(GL_CULL_FACE); glEnable(GL_DEPTH_TEST); #if defined(NDEBUG) setVsync(false); #endif Renderer::s_bindless_ubo = !!m_contextWindow.extensionSupported("GL_NV_uniform_buffer_unified_memory"); LOGI("\nNV_uniform_buffer_unified_memory support: %s\n\n", Renderer::s_bindless_ubo ? "true" : "false"); bool validated(true); GLuint defaultVAO; glGenVertexArrays(1, &defaultVAO); glBindVertexArray(defaultVAO); validated = validated && initProgram(); validated = validated && initScene(m_modelFilename.c_str(), 0, 3); validated = validated && initFramebuffers(m_windowState.m_winSize[0], m_windowState.m_winSize[1]); const Renderer::Registry registry = Renderer::getRegistry(); for(size_t i = 0; i < registry.size(); i++) { if(registry[i]->isAvailable()) { if(!registry[i]->loadPrograms(m_progManager)) { LOGE("Failed to load resources for renderer %s\n", registry[i]->name()); return false; } uint sortkey = uint(i); sortkey |= registry[i]->priority() << 16; m_renderersSorted.push_back(sortkey); } } std::sort(m_renderersSorted.begin(), m_renderersSorted.end()); for(size_t i = 0; i < m_renderersSorted.size(); i++) { m_renderersSorted[i] &= 0xFFFF; m_ui.enumAdd(GUI_RENDERER, int(i), registry[m_renderersSorted[i]]->name()); } { m_ui.enumAdd(GUI_STRATEGY, STRATEGY_INDIVIDUAL, "drawcall individual"); m_ui.enumAdd(GUI_STRATEGY, STRATEGY_JOIN, "drawcall join"); m_ui.enumAdd(GUI_STRATEGY, STRATEGY_GROUPS, "material groups"); m_ui.enumAdd(GUI_SHADE, SHADE_SOLID, toString(SHADE_SOLID)); m_ui.enumAdd(GUI_SHADE, SHADE_SOLIDWIRE, toString(SHADE_SOLIDWIRE)); m_ui.enumAdd(GUI_SHADE, SHADE_SOLIDWIRE_SPLIT, "solid w edges (split test, only in sorted)"); m_ui.enumAdd(GUI_MSAA, 0, "none"); m_ui.enumAdd(GUI_MSAA, 2, "2x"); m_ui.enumAdd(GUI_MSAA, 4, "4x"); m_ui.enumAdd(GUI_MSAA, 8, "8x"); } m_control.m_sceneOrbit = glm::vec3(m_scene.m_bbox.max + m_scene.m_bbox.min) * 0.5f; m_control.m_sceneDimension = glm::length((m_scene.m_bbox.max - m_scene.m_bbox.min)); m_control.m_viewMatrix = glm::lookAt(m_control.m_sceneOrbit - (-vec3(1, 1, 1) * m_control.m_sceneDimension * 0.5f * (float(m_tweak.zoom) / 100.0f)), m_control.m_sceneOrbit, vec3(0, 1, 0)); m_sceneUbo.wLightPos = (m_scene.m_bbox.max + m_scene.m_bbox.min) * 0.5f + m_control.m_sceneDimension; m_sceneUbo.wLightPos.w = 1.0; updatedPrograms(); CullingSystem::Programs cullprogs; getCullPrograms(cullprogs); Renderer::s_cullsys.init(cullprogs, true); ScanSystem::Programs scanprogs; getScanPrograms(scanprogs); Renderer::s_scansys.init(scanprogs); //Renderer::s_scansys.test(); TransformSystem::Programs xformprogs; getTransformPrograms(xformprogs); m_transformSystem.init(xformprogs); initRenderer(m_tweak.renderer, m_tweak.strategy); return validated; } void Sample::processUI(double time) { int width = m_windowState.m_winSize[0]; int height = m_windowState.m_winSize[1]; // Update imgui configuration auto& imgui_io = ImGui::GetIO(); imgui_io.DeltaTime = static_cast(time - m_uiTime); imgui_io.DisplaySize = ImVec2(static_cast(width), static_cast(height)); m_uiTime = time; ImGui::NewFrame(); ImGui::SetNextWindowSize(ImGuiH::dpiScaled(350, 0), ImGuiCond_FirstUseEver); if(ImGui::Begin("NVIDIA " PROJECT_NAME, nullptr)) { m_ui.enumCombobox(GUI_RENDERER, "renderer", &m_tweak.renderer); m_ui.enumCombobox(GUI_STRATEGY, "strategy", &m_tweak.strategy); m_ui.enumCombobox(GUI_SHADE, "shademode", &m_tweak.shade); ImGui::Checkbox("xplode via GPU", &m_tweak.animateActive); ImGui::SliderFloat("xplode min", &m_tweak.animateMin, 0, 16.0f); ImGui::SliderFloat("xplode delta", &m_tweak.animateDelta, 0, 16.0f); ImGuiH::InputIntClamped("clones", &m_tweak.clones, 0, 255, 1, 10, ImGuiInputTextFlags_EnterReturnsTrue); ImGui::Checkbox("clone X", &m_tweak.cloneaxisX); ImGui::Checkbox("clone Y", &m_tweak.cloneaxisY); ImGui::Checkbox("clone Z", &m_tweak.cloneaxisZ); m_ui.enumCombobox(GUI_MSAA, "msaa", &m_tweak.msaa); } if(!m_tweak.cloneaxisX && !m_tweak.cloneaxisY && !m_tweak.cloneaxisZ) { m_tweak.cloneaxisX = true; } ImGui::End(); } void Sample::updatedPrograms() { CullingSystem::Programs cullprogs; getCullPrograms(cullprogs); Renderer::s_cullsys.update(cullprogs, true); ScanSystem::Programs scanprogs; getScanPrograms(scanprogs); Renderer::s_scansys.update(scanprogs); TransformSystem::Programs xformprogs; getTransformPrograms(xformprogs); m_transformSystem.update(xformprogs); m_resources.programUbo = m_progManager.get(programs.draw_object); m_resources.programUboLine = m_progManager.get(programs.draw_object_line); m_resources.programUboTris = m_progManager.get(programs.draw_object_tris); m_resources.programIdx = m_progManager.get(programs.draw_object_indexed); m_resources.programIdxLine = m_progManager.get(programs.draw_object_indexed_line); m_resources.programIdxTris = m_progManager.get(programs.draw_object_indexed_tris); GLuint groupsizes[3]; glGetProgramiv(m_progManager.get(programs.xplode), GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupsizes); m_xplodeGroupSize = groupsizes[0]; m_resources.stateChangeID++; } void Sample::think(double time) { NV_PROFILE_GL_SECTION("Frame"); processUI(time); m_control.processActions({m_windowState.m_winSize[0], m_windowState.m_winSize[1]}, glm::vec2(m_windowState.m_mouseCurrent[0], m_windowState.m_mouseCurrent[1]), m_windowState.m_mouseButtonFlags, m_windowState.m_mouseWheel); if(m_windowState.onPress(KEY_R)) { m_progManager.reloadPrograms(); Renderer::getRegistry()[m_tweak.renderer]->updatedPrograms(m_progManager); updatedPrograms(); } if(m_tweak.msaa != m_lastTweak.msaa) { initFramebuffers(m_windowState.m_winSize[0], m_windowState.m_winSize[1]); } if(m_tweak.clones != m_lastTweak.clones || m_tweak.cloneaxisX != m_lastTweak.cloneaxisX || m_tweak.cloneaxisY != m_lastTweak.cloneaxisY || m_tweak.cloneaxisZ != m_lastTweak.cloneaxisZ) { deinitRenderer(); initScene(m_modelFilename.c_str(), m_tweak.clones, (int(m_tweak.cloneaxisX) << 0) | (int(m_tweak.cloneaxisY) << 1) | (int(m_tweak.cloneaxisZ) << 2)); } if(m_tweak.renderer != m_lastTweak.renderer || m_tweak.strategy != m_lastTweak.strategy || m_tweak.cloneaxisX != m_lastTweak.cloneaxisX || m_tweak.cloneaxisY != m_lastTweak.cloneaxisY || m_tweak.cloneaxisZ != m_lastTweak.cloneaxisZ || m_tweak.clones != m_lastTweak.clones) { initRenderer(m_tweak.renderer, m_tweak.strategy); } if(!m_tweak.animateActive && m_lastTweak.animateActive) { m_scene.resetMatrices(); } m_lastTweak = m_tweak; int width = m_windowState.m_winSize[0]; int height = m_windowState.m_winSize[1]; { // generic state setup glViewport(0, 0, width, height); if(m_tweak.shade == SHADE_SOLIDWIRE_SPLIT) { glBindFramebuffer(GL_FRAMEBUFFER, fbos.scene2); glClearColor(0.2f, 0.2f, 0.2f, 0.0f); glClearDepth(1.0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); } glBindFramebuffer(GL_FRAMEBUFFER, fbos.scene); glClearColor(0.2f, 0.2f, 0.2f, 0.0f); glClearDepth(1.0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); glEnable(GL_DEPTH_TEST); m_sceneUbo.viewport = ivec2(width, height); glm::mat4 projection = glm::perspectiveRH_ZO((45.f), float(width) / float(height), m_control.m_sceneDimension * 0.001f, m_control.m_sceneDimension * 10.0f); glm::mat4 view = m_control.m_viewMatrix; m_sceneUbo.viewProjMatrix = projection * view; m_sceneUbo.viewMatrix = view; m_sceneUbo.viewMatrixIT = glm::transpose(glm::inverse(view)); m_sceneUbo.viewPos = glm::row(m_sceneUbo.viewMatrixIT, 3); m_sceneUbo.viewDir = -glm::row(view,2); m_sceneUbo.wLightPos = glm::row(m_sceneUbo.viewMatrixIT, 3); m_sceneUbo.wLightPos.w = 1.0; m_sceneUbo.tboMatrices = uvec2(m_scene.m_matricesTexGLADDR & 0xFFFFFFFF, m_scene.m_matricesTexGLADDR >> 32); glNamedBufferSubData(buffers.scene_ubo, 0, sizeof(SceneData), &m_sceneUbo); glDisable(GL_CULL_FACE); } if(m_tweak.animateActive) { { NV_PROFILE_GL_SECTION("Xplode"); float speed = 0.5; float scale = m_tweak.animateMin + (cosf(float(time) * speed) * 0.5f + 0.5f) * (m_tweak.animateDelta); GLuint totalNodes = GLuint(m_scene.m_matrices.size()); GLuint groupsize = m_xplodeGroupSize; glUseProgram(m_progManager.get(programs.xplode)); glUniform1f(0, scale); glUniform1i(1, totalNodes); nvgl::bindMultiTexture(GL_TEXTURE0, GL_TEXTURE_BUFFER, m_scene.m_matricesOrigTexGL); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_scene.m_matricesGL); glDispatchCompute((totalNodes + groupsize - 1) / groupsize, 1, 1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); nvgl::bindMultiTexture(GL_TEXTURE0, GL_TEXTURE_BUFFER, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); glUseProgram(0); } { NV_PROFILE_GL_SECTION("Tree"); TransformSystem::Buffer ids; TransformSystem::Buffer world; TransformSystem::Buffer object; ids.buffer = m_scene.m_parentIDsGL; ids.offset = 0; ids.size = sizeof(GLuint) * m_scene.m_matrices.size(); world.buffer = m_scene.m_matricesGL; world.offset = 0; world.size = sizeof(CadScene::MatrixNode) * m_scene.m_matrices.size(); object.buffer = m_scene.m_matricesGL; object.offset = 0; object.size = sizeof(CadScene::MatrixNode) * m_scene.m_matrices.size(); m_transformSystem.process(m_scene.m_nodeTree, ids, object, world); } } { NV_PROFILE_GL_SECTION("Render"); m_resources.cullView.viewPos = glm::value_ptr(m_sceneUbo.viewPos); m_resources.cullView.viewDir = glm::value_ptr(m_sceneUbo.viewDir); m_resources.cullView.viewProjMatrix = glm::value_ptr(m_sceneUbo.viewProjMatrix); m_renderer->draw(m_tweak.shade, m_resources, m_profiler, m_progManager); } { NV_PROFILE_GL_SECTION("Blit"); if(m_tweak.shade == SHADE_SOLIDWIRE_SPLIT) { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); int wh = width / 2; int hh = height / 2; glBindFramebuffer(GL_READ_FRAMEBUFFER, fbos.scene); glBlitFramebuffer(0, 0, wh, hh, 0, 0, wh, hh, GL_COLOR_BUFFER_BIT, GL_NEAREST); glBlitFramebuffer(wh, hh, width, height, wh, hh, width, height, GL_COLOR_BUFFER_BIT, GL_NEAREST); glBindFramebuffer(GL_READ_FRAMEBUFFER, fbos.scene2); glBlitFramebuffer(wh, 0, width, hh, wh, 0, width, hh, GL_COLOR_BUFFER_BIT, GL_NEAREST); glBlitFramebuffer(0, hh, wh, height, 0, hh, wh, height, GL_COLOR_BUFFER_BIT, GL_NEAREST); } else { // blit to background glBindFramebuffer(GL_READ_FRAMEBUFFER, fbos.scene); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); glBlitFramebuffer(0, 0, width, height, 0, 0, width, height, GL_COLOR_BUFFER_BIT, GL_NEAREST); } } if(!m_tweak.noUI) { NV_PROFILE_GL_SECTION("GUI"); ImGui::Render(); ImGui::RenderDrawDataGL(ImGui::GetDrawData()); } ImGui::EndFrame(); m_lastTweak = m_tweak; } void Sample::resize(int width, int height) { initFramebuffers(width, height); } void Sample::setRendererFromName() { if(!m_rendererName.empty()) { const Renderer::Registry registry = Renderer::getRegistry(); for(size_t i = 0; i < m_renderersSorted.size(); i++) { if(strcmp(m_rendererName.c_str(), registry[m_renderersSorted[i]]->name()) == 0) { m_tweak.renderer = int(i); } } } } static std::string addPath(std::string const& defaultPath, std::string const& filename) { if( #ifdef _WIN32 filename.find(':') != std::string::npos #else !filename.empty() && filename[0] == '/' #endif ) { return filename; } else { return defaultPath + "/" + filename; } } static bool endsWith(std::string const& s, std::string const& end) { if(s.length() >= end.length()) { return (0 == s.compare(s.length() - end.length(), end.length(), end)); } else { return false; } } void Sample::setupConfigParameters() { m_parameterList.addFilename(".csf", &m_modelFilename); m_parameterList.addFilename(".csf.gz", &m_modelFilename); m_parameterList.addFilename(".gltf", &m_modelFilename); m_parameterList.add("noui", &m_tweak.noUI, false); m_parameterList.add("renderer", (uint32_t*)&m_tweak.renderer); m_parameterList.add("renderernamed", &m_rendererName); m_parameterList.add("strategy", (uint32_t*)&m_tweak.strategy); m_parameterList.add("shademode", (uint32_t*)&m_tweak.shade); m_parameterList.add("msaa", &m_tweak.msaa); m_parameterList.add("clones", &m_tweak.clones); m_parameterList.add("xplode", &m_tweak.animateActive); m_parameterList.add("zoom", &m_tweak.zoom); } bool Sample::validateConfig() { if(m_modelFilename.empty()) { LOGI("no .csf model file specified\n"); LOGI("exe parameters...\n"); m_parameterList.print(); return false; } return true; } } // namespace csfviewer using namespace csfviewer; int main(int argc, const char** argv) { NVPSystem system(PROJECT_NAME); Sample sample; { std::vector directories; directories.push_back(NVPSystem::exePath()); directories.push_back(NVPSystem::exePath() + "/media"); directories.push_back(NVPSystem::exePath() + std::string(PROJECT_DOWNLOAD_RELDIRECTORY)); sample.m_modelFilename = nvh::findFile(std::string("geforce.csf.gz"), directories); } return sample.run(PROJECT_NAME, argc, argv, SAMPLE_SIZE_WIDTH, SAMPLE_SIZE_HEIGHT); } ================================================ FILE: cull-bitpack.vert.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 330 /**/ #define TEMPORAL_LAST 1 #define TEMPORAL_NEW 2 #ifndef TEMPORAL #define TEMPORAL 0 #endif #extension GL_ARB_explicit_attrib_location : require #extension GL_ARB_shader_storage_buffer_object : enable layout(location=0) in uvec4 instream[8]; #if TEMPORAL layout(location=9) in uint last; #endif #if GL_ARB_shader_storage_buffer_object layout(std430,binding=0) writeonly buffer outputBuffer { uint outstream[]; }; void storeOutput(uint value) { outstream[gl_VertexID] = value; } #else flat out uint outstream; void storeOutput(uint value) { outstream= value; } #endif void main () { uint bits = 0u; int outbit = 0; for (int i = 0; i < 8; i++){ for (int n = 0; n < 4; n++, outbit++){ uint checkbytes = instream[i][n]; bits |= (checkbytes & 1u) << outbit; } } #if TEMPORAL == TEMPORAL_LAST // render what was visible in last frame and passes current test bits &= last; #elif TEMPORAL == TEMPORAL_NEW // render what was not visible in last frame (already rendered), but is now visible bits &= (~last); #endif storeOutput(bits); } ================================================ FILE: cull-downsample.frag.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 330 /**/ uniform sampler2D depthTex; uniform int depthLod; uniform bool evenLod; in vec2 uv; void main() { ivec2 lodSize = textureSize(depthTex,depthLod); float depth = 0; if (evenLod){ ivec2 offsets[] = ivec2[]( ivec2(0,0), ivec2(0,1), ivec2(1,1), ivec2(1,0) ); ivec2 coord = ivec2(gl_FragCoord.xy); coord *= 2; for (int i = 0; i < 4; i++){ depth = max( depth, texelFetch(depthTex, clamp(coord + offsets[i], ivec2(0), lodSize - ivec2(1)), depthLod).r ); } } else{ // need this to handle non-power of two // very conservative vec2 offsets[] = vec2[]( vec2(-1,-1), vec2( 0,-1), vec2( 1,-1), vec2(-1, 0), vec2( 0, 0), vec2( 1, 0), vec2(-1, 1), vec2( 0, 1), vec2( 1, 1) ); vec2 coord = uv; vec2 texel = 1.0/(vec2(lodSize)); for (int i = 0; i < 9; i++){ vec2 pos = coord + offsets[i] * texel; depth = max( depth, #if 1 texelFetch(depthTex, clamp(ivec2(pos * lodSize), ivec2(0), lodSize - ivec2(1)), depthLod).r #else textureLod(depthTex, pos, depthLod).r #endif ); } } gl_FragDepth = depth; } ================================================ FILE: cull-downsample.vert.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 330 /**/ out vec2 uv; void main() { vec4 pos = vec4( (float( gl_VertexID &1)) * 4.0 - 1.0, (float((gl_VertexID>>1)&1)) * 4.0 - 1.0, 0, 1.0); uv = pos.xy * 0.5 + 0.5; gl_Position = pos; } ================================================ FILE: cull-raster.frag.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 430 /**/ layout(early_fragment_tests) in; layout(std430,binding=0) buffer visibleBuffer { int visibles[]; }; layout(location=0,index=0) out vec4 out_Color; flat in int objid; void main (){ visibles[objid] = 1; out_Color = unpackUnorm4x8(uint(objid)); } ================================================ FILE: cull-raster.geo.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 430 /**/ #ifndef MATRIX_WORLD #define MATRIX_WORLD 0 #endif #ifndef MATRIX_WORLD_IT #define MATRIX_WORLD_IT 1 #endif #ifndef MATRICES #define MATRICES 2 #endif #ifndef FLIPWIND #define FLIPWIND 1 #endif #ifndef PERSPECTIVE #define PERSPECTIVE 1 #endif // render the 3 visible sides based on view direction and box normal layout(points,invocations=3) in; // one side each invocation layout(triangle_strip,max_vertices=4) out; in VertexOut{ vec3 bboxCtr; vec3 bboxDim; flat int matrixIndex; flat int objid; } IN[1]; flat out int objid; uniform vec3 viewPos; uniform vec3 viewDir; uniform mat4 viewProjTM; uniform samplerBuffer matricesTex; void main() { int matindex = (IN[0].matrixIndex*MATRICES + MATRIX_WORLD)*4; mat4 worldTM = mat4( texelFetch(matricesTex,matindex + 0), texelFetch(matricesTex,matindex + 1), texelFetch(matricesTex,matindex + 2), texelFetch(matricesTex,matindex + 3)); vec3 faceNormal = vec3(0); vec3 edgeBasis0 = vec3(0); vec3 edgeBasis1 = vec3(0); int id = gl_InvocationID; if (id == 0) { faceNormal.x = IN[0].bboxDim.x; edgeBasis0.y = IN[0].bboxDim.y; edgeBasis1.z = IN[0].bboxDim.z; } else if(id == 1) { faceNormal.y = IN[0].bboxDim.y; edgeBasis1.x = IN[0].bboxDim.x; edgeBasis0.z = IN[0].bboxDim.z; } else if(id == 2) { faceNormal.z = IN[0].bboxDim.z; edgeBasis0.x = IN[0].bboxDim.x; edgeBasis1.y = IN[0].bboxDim.y; } vec3 worldCtr = (worldTM * vec4(IN[0].bboxCtr, 1)).xyz; #if PERSPECTIVE vec3 worldNormal = mat3(worldTM) * faceNormal; vec3 worldPos = worldCtr + worldNormal; float proj = sign(dot(worldPos - viewPos.xyz, worldNormal)); #else vec3 worldNormal = mat3(worldTM) * faceNormal; float proj = sign(dot(viewDir,worldNormal)); #endif #if FLIPWIND proj *= -1; #endif faceNormal = mat3(worldTM) * (faceNormal) * proj; edgeBasis0 = mat3(worldTM) * (edgeBasis0); edgeBasis1 = mat3(worldTM) * (edgeBasis1) * proj; #if FLIPWIND objid = IN[0].objid; gl_Position = viewProjTM * vec4(worldCtr + (faceNormal - edgeBasis0 - edgeBasis1),1); EmitVertex(); objid = IN[0].objid; gl_Position = viewProjTM * vec4(worldCtr + (faceNormal + edgeBasis0 - edgeBasis1),1); EmitVertex(); objid = IN[0].objid; gl_Position = viewProjTM * vec4(worldCtr + (faceNormal - edgeBasis0 + edgeBasis1),1); EmitVertex(); objid = IN[0].objid; gl_Position = viewProjTM * vec4(worldCtr + (faceNormal + edgeBasis0 + edgeBasis1),1); EmitVertex(); #else objid = IN[0].objid; gl_Position = viewProjTM * vec4(worldCtr + (faceNormal - edgeBasis0 - edgeBasis1),1); EmitVertex(); objid = IN[0].objid; gl_Position = viewProjTM * vec4(worldCtr + (faceNormal - edgeBasis0 + edgeBasis1),1); EmitVertex(); objid = IN[0].objid; gl_Position = viewProjTM * vec4(worldCtr + (faceNormal + edgeBasis0 - edgeBasis1),1); EmitVertex(); objid = IN[0].objid; gl_Position = viewProjTM * vec4(worldCtr + (faceNormal + edgeBasis0 + edgeBasis1),1); EmitVertex(); #endif } ================================================ FILE: cull-raster.vert.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 430 /**/ #ifndef MATRIX_WORLD #define MATRIX_WORLD 0 #endif #ifndef MATRIX_WORLD_IT #define MATRIX_WORLD_IT 1 #endif #ifndef MATRICES #define MATRICES 2 #endif layout(std430,binding=0) buffer visibleBuffer { int visibles[]; }; uniform samplerBuffer matricesTex; #ifdef DUALINDEX layout(location=0) in int bboxIndex; layout(location=2) in int matrixIndex; uniform samplerBuffer bboxesTex; vec4 bboxMin = texelFetch(bboxesTex, bboxIndex*2+0); vec4 bboxMax = texelFetch(bboxesTex, bboxIndex*2+1); #else layout(location=0) in vec4 bboxMin; layout(location=1) in vec4 bboxMax; layout(location=2) in int matrixIndex; #endif uniform vec3 viewPos; out VertexOut{ vec3 bboxCtr; vec3 bboxDim; flat int matrixIndex; flat int objid; } OUT; void main() { int objid = gl_VertexID; vec3 ctr =((bboxMin + bboxMax)*0.5).xyz; vec3 dim =((bboxMax - bboxMin)*0.5).xyz; OUT.bboxCtr = ctr; OUT.bboxDim = dim; OUT.matrixIndex = matrixIndex; OUT.objid = objid; { // if camera is inside the bbox then none of our // side faces will be visible, must treat object as // visible int matindex = (matrixIndex * MATRICES + MATRIX_WORLD_IT)*4; mat4 worldInvTransTM = mat4( texelFetch(matricesTex,matindex + 0), texelFetch(matricesTex,matindex + 1), texelFetch(matricesTex,matindex + 2), texelFetch(matricesTex,matindex + 3)); vec3 objPos = (vec4(viewPos,1) * worldInvTransTM).xyz; objPos -= ctr; if (all(lessThan(abs(objPos),dim))){ // inside bbox visibles[objid] = 1; } } } ================================================ FILE: cull-tokencmds.vert.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 440 /**/ #define SCAN_BATCHSIZE 2048 layout(location=0) in uint cmdOffset; layout(location=1) in uint cmdCullSize; layout(location=2) in uint cmdCullScan; uniform uint startOffset; uniform int startID; uniform uint endOffset; uniform int endID; uniform uint terminateCmd; layout(std430,binding=0) writeonly buffer outputBuffer { uint outcmds[]; }; layout(std430,binding=1) readonly buffer commandBuffer { uint incmds[]; }; layout(std430,binding=2) readonly buffer cullSizesBuffer { uint cullSizes[]; }; layout(std430,binding=3) readonly buffer cullScanBuffer { uint cullScan[]; }; layout(std430,binding=4) readonly buffer cullScanOffsetBuffer { uint cullScanOffsets[]; }; uint getOffset( int id, uint scan, uint size, bool exclusive) { int scanBatch = id / SCAN_BATCHSIZE; uint scanOffset = scan; scanOffset += scanBatch > 0 ? cullScanOffsets[ scanBatch-1] : 0; if (exclusive){ scanOffset -= size; } return scanOffset; } uint getOffset( int id, bool exclusive) { return getOffset(id, cullScan[id], cullSizes[id], exclusive); } uint rebaseOffset(uint cullOffset) { // where the current sequence starts uint startCullOffset = getOffset(startID, true); // rebase from where it should start uint outOffset = startOffset + (cullOffset - startCullOffset); return outOffset; } #define DEBUG 0 void main () { if (cmdCullSize > 0) { // cullOffset goes across "stateobject" sequences uint cullOffset = getOffset(gl_VertexID,cmdCullScan,cmdCullSize,true); uint outOffset = rebaseOffset(cullOffset); #if DEBUG outcmds[(gl_VertexID)*2+0] = outOffset; outcmds[(gl_VertexID)*2+1] = cmdOffset; #else for (uint i = 0; i < cmdCullSize; i++){ outcmds[outOffset+i] = incmds[cmdOffset+i]; } #endif } #if DEBUG else { outcmds[(gl_VertexID)*2+0] = ~0; outcmds[(gl_VertexID)*2+1] = cmdOffset; } #endif if (gl_VertexID == startID) { // add terminator if sequence not original uint lastOffset = rebaseOffset( getOffset(endID, false) ); if (lastOffset != endOffset) { #if !DEBUG outcmds[lastOffset] = terminateCmd; #endif } #if DEBUG && 0 outcmds[(startID)*2+0] = lastOffset; outcmds[(startID)*2+1] = endOffset; #endif } } ================================================ FILE: cull-tokensizes.vert.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 440 /**/ layout(location=0) in uint cmdSize; layout(location=1) in int cmdObject; layout(std430,binding=0) writeonly buffer outputBuffer { uint outsizes[]; }; layout(std430,binding=1) readonly buffer visibleBuffer { int visibles[]; }; #define DEBUG false void main () { if (cmdObject >= 0 && !DEBUG){ outsizes[gl_VertexID] = (visibles[cmdObject/32] & (1<<(cmdObject%32))) != 0 ? cmdSize : 0; } else{ outsizes[gl_VertexID] = cmdSize; } } ================================================ FILE: cull-xfb.vert.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 330 /**/ #ifndef MATRIX_WORLD #define MATRIX_WORLD 0 #endif #ifndef MATRIX_WORLD_IT #define MATRIX_WORLD_IT 1 #endif #ifndef MATRICES #define MATRICES 2 #endif #extension GL_ARB_explicit_attrib_location : require #extension GL_ARB_shader_storage_buffer_object : enable //#define OCCLUSION #ifdef DUALINDEX layout(location=0) in int bboxIndex; layout(location=2) in int matrixIndex; uniform samplerBuffer bboxesTex; vec4 bboxMin = texelFetch(bboxesTex, bboxIndex*2+0); vec4 bboxMax = texelFetch(bboxesTex, bboxIndex*2+1); #else layout(location=0) in vec4 bboxMin; layout(location=1) in vec4 bboxMax; layout(location=2) in int matrixIndex; #endif #if GL_ARB_shader_storage_buffer_object layout(std430,binding=0) writeonly buffer outputBuffer { int outstream[]; }; void storeOutput(int value) { outstream[gl_VertexID] = value; } #else flat out int outstream; void storeOutput(int value) { outstream = value; } #endif uniform mat4 viewProjTM; uniform samplerBuffer matricesTex; #ifdef OCCLUSION uniform sampler2D depthTex; #endif vec4 getBoxCorner(int n) { #if 1 bvec3 useMax = bvec3((n & 1) != 0, (n & 2) != 0, (n & 4) != 0); return vec4(mix(bboxMin.xyz, bboxMax.xyz, useMax),1); #else switch(n){ case 0: return vec4(bboxMin.x,bboxMin.y,bboxMin.z,1); case 1: return vec4(bboxMax.x,bboxMin.y,bboxMin.z,1); case 2: return vec4(bboxMin.x,bboxMax.y,bboxMin.z,1); case 3: return vec4(bboxMax.x,bboxMax.y,bboxMin.z,1); case 4: return vec4(bboxMin.x,bboxMin.y,bboxMax.z,1); case 5: return vec4(bboxMax.x,bboxMin.y,bboxMax.z,1); case 6: return vec4(bboxMin.x,bboxMax.y,bboxMax.z,1); case 7: return vec4(bboxMax.x,bboxMax.y,bboxMax.z,1); } #endif } vec3 projected(mat4 a, vec4 pos) { vec4 hpos = (a * pos); return hpos.xyz/hpos.w; } void main (){ int isvisible = 0; int matindex = (matrixIndex*MATRICES + MATRIX_WORLD)*4; mat4 worldTM = mat4( texelFetch(matricesTex,matindex + 0), texelFetch(matricesTex,matindex + 1), texelFetch(matricesTex,matindex + 2), texelFetch(matricesTex,matindex + 3)); mat4 worldViewProjTM = (viewProjTM * worldTM); // clipspace bbox vec3 clipmin = projected(worldViewProjTM, getBoxCorner(0)); vec3 clipmax = clipmin; for (int n = 1; n < 8; n++){ vec3 ab = projected(worldViewProjTM, getBoxCorner(n)); clipmin = min(clipmin,ab); clipmax = max(clipmax,ab); } isvisible = ( clipmin.x <= 1 && clipmin.y <= 1 && clipmin.z <= 1 && clipmax.x >= -1 && clipmax.y >= -1 && clipmax.z >= -1) ? 1 : 0; #ifdef OCCLUSION if (isvisible != 0){ clipmin = clipmin * 0.5 + 0.5; clipmax = clipmax * 0.5 + 0.5; vec2 size = (clipmax.xy - clipmin.xy); ivec2 texsize = textureSize(depthTex,0); float maxsize = max(size.x, size.y) * float(max(texsize.x,texsize.y)); float miplevel = ceil(log2(maxsize)); float depth = 0; float a = textureLod(depthTex,clipmin.xy,miplevel).r; float b = textureLod(depthTex,vec2(clipmax.x,clipmin.y),miplevel).r; float c = textureLod(depthTex,clipmax.xy,miplevel).r; float d = textureLod(depthTex,vec2(clipmin.x,clipmax.y),miplevel).r; depth = max(depth,max(max(max(a,b),c),d)); isvisible = clipmin.z <= depth ? 1 : 0; } #endif storeOutput(isvisible); } ================================================ FILE: cullingsystem.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include "cullingsystem.hpp" #include #include #define DEBUG_VISIBLEBOXES 0 inline unsigned int minDivide(unsigned int val, unsigned int alignment) { return (val+alignment-1)/alignment; } void CullingSystem::init( const Programs &programs, bool dualindex ) { update(programs,dualindex); glGenFramebuffers(1,&m_fbo); glCreateTextures(GL_TEXTURE_BUFFER,2,m_tbo); } void CullingSystem::update( const Programs &programs, bool dualindex ) { m_programs = programs; m_dualindex = dualindex; m_useSSBO = has_GL_VERSION_4_2 != 0; m_useRepesentativeTest = !!has_GL_NV_representative_fragment_test; if (!m_useSSBO) { const char* xfbstreams[] = {"outstream"}; glTransformFeedbackVaryings(programs.bit_regular,1,xfbstreams,GL_INTERLEAVED_ATTRIBS); glLinkProgram(programs.bit_regular); glTransformFeedbackVaryings(programs.bit_temporallast,1,xfbstreams,GL_INTERLEAVED_ATTRIBS); glLinkProgram(programs.bit_temporallast); glTransformFeedbackVaryings(programs.bit_temporalnew,1,xfbstreams,GL_INTERLEAVED_ATTRIBS); glLinkProgram(programs.bit_temporalnew); glTransformFeedbackVaryings(programs.object_frustum,1,xfbstreams,GL_INTERLEAVED_ATTRIBS); glLinkProgram(programs.object_frustum); glTransformFeedbackVaryings(programs.object_hiz,1,xfbstreams,GL_INTERLEAVED_ATTRIBS); glLinkProgram(programs.object_hiz); } glUseProgram(programs.depth_mips); glUniform1i(glGetUniformLocation(programs.depth_mips,"depthTex"),0); m_uniforms.depth_lod = glGetUniformLocation(programs.depth_mips,"depthLod"); m_uniforms.depth_even = glGetUniformLocation(programs.depth_mips,"evenLod"); glUseProgram(programs.object_frustum); glUniform1i(glGetUniformLocation(programs.object_frustum,"matricesTex"),0); if (dualindex){ glUniform1i(glGetUniformLocation(programs.object_frustum,"bboxesTex"),1); } m_uniforms.frustum_viewProj = glGetUniformLocation(programs.object_frustum, "viewProjTM"); glUseProgram(programs.object_hiz); glUniform1i(glGetUniformLocation(programs.object_hiz,"matricesTex"),0); if (dualindex){ glUniform1i(glGetUniformLocation(programs.object_frustum,"bboxesTex"),1); } glUniform1i(glGetUniformLocation(programs.object_hiz,"depthTex"),2); m_uniforms.hiz_viewProj = glGetUniformLocation(programs.object_hiz, "viewProjTM"); glUseProgram(programs.object_raster); glUniform1i(glGetUniformLocation(programs.object_raster,"matricesTex"),0); if (dualindex){ glUniform1i(glGetUniformLocation(programs.object_frustum,"bboxesTex"),1); } m_uniforms.raster_viewProj = glGetUniformLocation(programs.object_raster, "viewProjTM"); m_uniforms.raster_viewPos = glGetUniformLocation(programs.object_raster, "viewPos"); m_uniforms.raster_viewDir = glGetUniformLocation(programs.object_raster, "viewDir"); glUseProgram(0); } void CullingSystem::deinit() { glDeleteFramebuffers(1,&m_fbo); glDeleteTextures(2,m_tbo); } void CullingSystem::buildDepthMipmaps( GLuint textureDepth, int width, int height ) { int level = 0; int dim = width > height ? width : height; int twidth = width; int theight = height; int wasEven = 0; glBindFramebuffer(GL_FRAMEBUFFER,m_fbo); glDepthFunc(GL_ALWAYS); glUseProgram(m_programs.depth_mips); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, textureDepth); while (dim){ if (level){ twidth = twidth < 1 ? 1 : twidth; theight = theight < 1 ? 1 : theight; glViewport(0,0,twidth,theight); glFramebufferTexture2D(GL_FRAMEBUFFER,GL_DEPTH_STENCIL_ATTACHMENT,GL_TEXTURE_2D, textureDepth, level); glUniform1i(m_uniforms.depth_lod, level-1); glUniform1i(m_uniforms.depth_even, wasEven); glDrawArrays(GL_TRIANGLES,0,3); } wasEven = (twidth % 2 == 0) && (theight % 2 == 0); dim /= 2; twidth /= 2; theight /= 2; level++; } glUseProgram(0); glViewport(0,0,width,height); glBindFramebuffer(GL_FRAMEBUFFER,0); glBindTexture(GL_TEXTURE_2D, 0); glDepthFunc(GL_LEQUAL); glViewport(0,0,width,height); } void CullingSystem::testBboxes( Job &job, bool raster ) { // send the scene's bboxes as points stream glBindBuffer(GL_ARRAY_BUFFER, job.m_bufferObjectBbox.buffer); if (m_dualindex){ glVertexAttribIPointer(0, 1, GL_INT, job.m_bufferObjectBbox.stride, (const void*) job.m_bufferObjectBbox.offset); glVertexAttribDivisor(0, 0); glEnableVertexAttribArray(0); } else{ GLsizei stride = job.m_bufferObjectBbox.stride ? job.m_bufferObjectBbox.stride : sizeof(float)*4*2; glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, stride, (const void*)job.m_bufferObjectBbox.offset); glVertexAttribDivisor(0, 0); glEnableVertexAttribArray(0); glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, stride, (const void*)(sizeof(float)*4 + job.m_bufferObjectBbox.offset)); glVertexAttribDivisor(1, 0); glEnableVertexAttribArray(1); } glBindBuffer(GL_ARRAY_BUFFER, job.m_bufferObjectMatrix.buffer); glVertexAttribIPointer(2, 1, GL_INT, job.m_bufferObjectMatrix.stride, (const void*) job.m_bufferObjectMatrix.offset); glVertexAttribDivisor(2, 0); glEnableVertexAttribArray(2); glBindBuffer(GL_ARRAY_BUFFER, 0); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_BUFFER, m_tbo[0]); job.m_bufferMatrices.TexBuffer(GL_TEXTURE_BUFFER,GL_RGBA32F); if (m_dualindex){ glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_BUFFER, m_tbo[1]); job.m_bufferBboxes.TexBuffer(GL_TEXTURE_BUFFER,GL_RGBA32F); } if (raster){ if (m_useRepesentativeTest) { glEnable( GL_REPRESENTATIVE_FRAGMENT_TEST_NV ); } #if !DEBUG_VISIBLEBOXES glDepthMask(GL_FALSE); glColorMask(GL_FALSE,GL_FALSE,GL_FALSE,GL_FALSE); #endif } else if (m_useSSBO){ glEnable(GL_RASTERIZER_DISCARD); job.m_bufferVisOutput.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0); } else{ glEnable(GL_RASTERIZER_DISCARD); // setup transform feedback job.m_bufferVisOutput.BindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,0); glBeginTransformFeedback(GL_POINTS); } glDrawArrays(GL_POINTS,0,job.m_numObjects); if (raster){ if (m_useRepesentativeTest) { glDisable( GL_REPRESENTATIVE_FRAGMENT_TEST_NV ); } #if !DEBUG_VISIBLEBOXES glDepthMask(GL_TRUE); glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE); #endif } else if (m_useSSBO){ glBindBufferBase(GL_SHADER_STORAGE_BUFFER,0,0); glDisable(GL_RASTERIZER_DISCARD); } else{ glEndTransformFeedback(); glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER,0,0); glDisable(GL_RASTERIZER_DISCARD); } if (m_dualindex){ glBindTexture(GL_TEXTURE_BUFFER, 0); glActiveTexture(GL_TEXTURE0); } glBindTexture(GL_TEXTURE_BUFFER, 0); glDisableVertexAttribArray(0); glDisableVertexAttribArray(1); glDisableVertexAttribArray(2); } void CullingSystem::bitsFromOutput( Job &job, BitType type) { // for GL 3.3 compatibility we use xfb // in GL 4.3 SSBO is used // // using compute instead of "invisible" point drawing // would be better if we had really huge thread counts glEnable(GL_RASTERIZER_DISCARD); glBindBuffer(GL_ARRAY_BUFFER, job.m_bufferVisOutput.buffer); for (int i = 0; i < 8; i++){ glVertexAttribIPointer(i, 4, GL_UNSIGNED_INT, sizeof(int)*32, (const void*)(i*sizeof(int)*4 + job.m_bufferVisOutput.offset)); glVertexAttribDivisor(i, 0); glEnableVertexAttribArray(i); } if (type == BITS_CURRENT){ glUseProgram(m_programs.bit_regular); } else{ glUseProgram(type == BITS_CURRENT_AND_LAST ? m_programs.bit_temporallast : m_programs.bit_temporalnew); glBindBuffer(GL_ARRAY_BUFFER, job.m_bufferVisBitsLast.buffer); glVertexAttribIPointer(9, 1, GL_UNSIGNED_INT, sizeof(int), (const void*)job.m_bufferVisBitsLast.offset); glEnableVertexAttribArray(9); } if (m_useSSBO){ job.m_bufferVisBitsCurrent.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0); glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT); } else{ job.m_bufferVisBitsCurrent.BindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,0); glBeginTransformFeedback(GL_POINTS); } glDrawArrays(GL_POINTS,0, minDivide(job.m_numObjects,32)); if (m_useSSBO){ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0); } else{ glEndTransformFeedback(); glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, 0); } glDisableVertexAttribArray(9); for (int i = 0; i < 8; i++){ glDisableVertexAttribArray(i); } glDisable(GL_RASTERIZER_DISCARD); glBindBuffer(GL_ARRAY_BUFFER, 0); } void CullingSystem::resultFromBits( Job &job ) { job.resultFromBits(job.m_bufferVisBitsCurrent); } void CullingSystem::resultClient(Job &job) { job.resultClient(); } void CullingSystem::buildOutput( MethodType method, Job &job, const View& view ) { switch(method){ case METHOD_FRUSTUM: { glUseProgram(m_programs.object_frustum); glUniformMatrix4fv(m_uniforms.frustum_viewProj, 1 ,GL_FALSE, view.viewProjMatrix); testBboxes(job,false); } break; case METHOD_HIZ: { glUseProgram(m_programs.object_hiz); glUniformMatrix4fv(m_uniforms.hiz_viewProj, 1, GL_FALSE, view.viewProjMatrix); glActiveTexture(GL_TEXTURE2); glBindTexture(GL_TEXTURE_2D,job.m_textureDepthWithMipmaps); testBboxes(job,false); glActiveTexture(GL_TEXTURE2); glBindTexture(GL_TEXTURE_2D,0); glActiveTexture(GL_TEXTURE0); } break; case METHOD_RASTER: { // clear visibles job.m_bufferVisOutput.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0); glClearBufferData(GL_SHADER_STORAGE_BUFFER, GL_R32UI,GL_RED_INTEGER,GL_UNSIGNED_INT,0); glUseProgram(m_programs.object_raster); glUniformMatrix4fv(m_uniforms.raster_viewProj, 1, GL_FALSE, view.viewProjMatrix); glUniform3fv(m_uniforms.raster_viewPos, 1, view.viewPos); glUniform3fv(m_uniforms.raster_viewDir, 1, view.viewDir); glEnable( GL_POLYGON_OFFSET_FILL ); glPolygonOffset(-1,-1); testBboxes(job,true); glPolygonOffset(0,0); glDisable( GL_POLYGON_OFFSET_FILL ); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glBindBufferBase (GL_SHADER_STORAGE_BUFFER,0,0); } break; } } void CullingSystem::swapBits( Job &job ) { Buffer temp = job.m_bufferVisBitsCurrent; job.m_bufferVisBitsCurrent = job.m_bufferVisBitsLast; job.m_bufferVisBitsLast = temp; } void CullingSystem::JobIndirectUnordered::resultFromBits( const Buffer& bufferVisBitsCurrent ) { glEnable(GL_RASTERIZER_DISCARD); glUseProgram(m_program_indirect_compact); m_bufferIndirectCounter.BindBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0); m_bufferIndirectCounter.ClearBufferSubData (GL_ATOMIC_COUNTER_BUFFER, GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, 0); bufferVisBitsCurrent. BindBufferRange(GL_SHADER_STORAGE_BUFFER, 2); m_bufferObjectIndirects.BindBufferRange(GL_SHADER_STORAGE_BUFFER, 1); m_bufferIndirectResult. BindBufferRange(GL_SHADER_STORAGE_BUFFER, 0); m_bufferIndirectResult. ClearBufferSubData(GL_SHADER_STORAGE_BUFFER, GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, 0); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glDrawArrays(GL_POINTS,0,m_numObjects); glDisable(GL_RASTERIZER_DISCARD); glBindBufferBase (GL_ATOMIC_COUNTER_BUFFER, 0, 0); glBindBufferBase (GL_SHADER_STORAGE_BUFFER, 2, 0); glBindBufferBase (GL_SHADER_STORAGE_BUFFER, 1, 0); glBindBufferBase (GL_SHADER_STORAGE_BUFFER, 0, 0); } void CullingSystem::JobReadback::resultFromBits( const Buffer& bufferVisBitsCurrent ) { GLsizeiptr size = sizeof(int) * minDivide(m_numObjects,32); glBindBuffer(GL_COPY_READ_BUFFER, bufferVisBitsCurrent.buffer ); glBindBuffer(GL_COPY_WRITE_BUFFER, m_bufferVisBitsReadback.buffer ); glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, bufferVisBitsCurrent.offset, m_bufferVisBitsReadback.offset, size); glBindBuffer( GL_COPY_READ_BUFFER, 0 ); glBindBuffer( GL_COPY_WRITE_BUFFER, 0 ); } void CullingSystem::JobReadback::resultClient() { glBindBuffer(GL_COPY_WRITE_BUFFER, m_bufferVisBitsReadback.buffer); glGetBufferSubData(GL_COPY_WRITE_BUFFER, m_bufferVisBitsReadback.offset, m_bufferVisBitsReadback.size, m_hostVisBits); glBindBuffer( GL_COPY_WRITE_BUFFER, 0); } void CullingSystem::JobReadbackPersistent::resultFromBits(const Buffer& bufferVisBitsCurrent) { GLsizeiptr size = sizeof( int ) * minDivide( m_numObjects, 32 ); glCopyNamedBufferSubData( bufferVisBitsCurrent.buffer, m_bufferVisBitsReadback.buffer, bufferVisBitsCurrent.offset, m_bufferVisBitsReadback.offset, size); if (m_fence) { glDeleteSync( m_fence ); } m_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } void CullingSystem::JobReadbackPersistent::resultClient() { if (m_fence) { GLsizeiptr size = sizeof( int ) * minDivide( m_numObjects, 32 ); // as some samples read-back within same frame (not recommended) we use the flush here, normally one wouldnt use it glClientWaitSync(m_fence, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); glDeleteSync(m_fence); m_fence = NULL; memcpy( m_hostVisBits, ((uint8_t*)m_bufferVisBitsMapping) + m_bufferVisBitsReadback.offset, size ); } } ================================================ FILE: cullingsystem.hpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #ifndef CULLINGSYSTEM_H__ #define CULLINGSYSTEM_H__ #include #include #include class CullingSystem { public: struct Programs { GLuint object_frustum; GLuint object_hiz; GLuint object_raster; GLuint bit_temporallast; GLuint bit_temporalnew; GLuint bit_regular; GLuint depth_mips; }; enum MethodType { METHOD_FRUSTUM, METHOD_HIZ, METHOD_RASTER, NUM_METHODS, }; enum BitType { BITS_CURRENT, BITS_CURRENT_AND_LAST, BITS_CURRENT_AND_NOT_LAST, NUM_BITS, }; struct Buffer { GLuint buffer; GLsizei stride; GLintptr offset; GLsizeiptr size; void create( size_t sizei, const void* data, GLbitfield flags ) { size = sizei; offset = 0; stride = 0; glCreateBuffers( 1, &buffer ); glNamedBufferStorage( buffer, size, data, flags ); } Buffer( GLuint buffer, size_t sizei = 0 ) : buffer( buffer ) , offset( 0 ) , stride( 0 ) { if (!sizei) { if (sizeof( GLsizeiptr ) > 4) glGetNamedBufferParameteri64v( buffer, GL_BUFFER_SIZE, (GLint64*)&size ); else glGetNamedBufferParameteriv( buffer, GL_BUFFER_SIZE, (GLint*)&size ); } else { size = sizei; } } Buffer() : buffer(0) , stride(0) , offset(0) , size(0) { } inline void BindBufferRange(GLenum target, GLuint index) const { glBindBufferRange(target, index, buffer, offset, size); } inline void TexBuffer(GLenum target, GLenum internalformat) const { glTexBufferRange(target, internalformat, buffer, offset, size); } inline void ClearBufferSubData(GLenum target,GLenum internalformat,GLenum format,GLenum type,const GLvoid* data) const { glClearBufferSubData(target,internalformat,offset,size,format,type,data); } }; class Job { public: int m_numObjects; // world-space matrices {mat4 world, mat4 worldInverseTranspose} Buffer m_bufferMatrices; Buffer m_bufferBboxes; // only used in dualindex mode (2 x vec4) // 1 32-bit integer per object (index) Buffer m_bufferObjectMatrix; // object-space bounding box (2 x vec4) // or 1 32-bit integer per object (dualindex mode) Buffer m_bufferObjectBbox; // 1 32-bit integer per object Buffer m_bufferVisOutput; // 1 32-bit integer per 32 objects (1 bit per object) Buffer m_bufferVisBitsCurrent; Buffer m_bufferVisBitsLast; // for HiZ GLuint m_textureDepthWithMipmaps; // derive from this class and implement this function how you want to // deal with the results that are provided in the buffer virtual void resultFromBits( const Buffer& bufferVisBitsCurrent ) = 0; // for readback methods we need to wait for a result virtual void resultClient() {}; }; class JobReadback : public Job { public: // 1 32-bit integer per 32 objects (1 bit per object) Buffer m_bufferVisBitsReadback; uint32_t* m_hostVisBits; // Do not use this Job class unless you have to. Persistent // mapped buffers are preferred. // Copies result into readback buffer void resultFromBits( const Buffer& bufferVisBitsCurrent ); // getBufferData into hostVisBits (blocking!) void resultClient(); }; class JobReadbackPersistent : public Job { public: // 1 32-bit integer per 32 objects (1 bit per object) Buffer m_bufferVisBitsReadback; void* m_bufferVisBitsMapping; uint32_t* m_hostVisBits; GLsync m_fence; // Copies result into readback buffer and records // a fence. void resultFromBits(const Buffer& bufferVisBitsCurrent); // waits on fence and copies mapping into hostVisBits void resultClient(); }; // multidrawindirect based class JobIndirectUnordered : public Job { public: GLuint m_program_indirect_compact; // 1 indirectSize per object, Buffer m_bufferObjectIndirects; Buffer m_bufferIndirectResult; // 1 integer Buffer m_bufferIndirectCounter; void resultFromBits( const Buffer& bufferVisBitsCurrent ); }; struct View { const float* viewProjMatrix; const float* viewDir; const float* viewPos; }; void init( const Programs &programs, bool dualindex ); void deinit(); void update( const Programs &programs, bool dualindex ); // helper function for HiZ method, leaves fbo bound to 0 void buildDepthMipmaps(GLuint textureDepth, int width, int height); // assumes relevant fbo bound for raster method void buildOutput( MethodType method, Job &job, const View& view ); void bitsFromOutput ( Job &job, BitType type ); void resultFromBits ( Job &job ); void resultClient ( Job &job ); // swaps the Current/Last bit array (for temporal coherent techniques) void swapBits ( Job &job ); private: struct Uniforms { GLint depth_lod; GLint depth_even; GLint frustum_viewProj; GLint hiz_viewProj; GLint raster_viewProj; GLint raster_viewDir; GLint raster_viewPos; }; void testBboxes( Job &job, bool raster); Programs m_programs; Uniforms m_uniforms; GLuint m_fbo; GLuint m_tbo[2]; bool m_dualindex; bool m_useSSBO; bool m_useRepesentativeTest; }; #endif ================================================ FILE: nodetree.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include "nodetree.hpp" #include ////////////////////////////////////////////////////////////////////////// static inline void clearNode(NodeTree::Node &node) { node.level = -1; node.leafidx = NodeTree::INVALID; node.levelidx = NodeTree::INVALID; node.parentidx = NodeTree::INVALID; node.childidx = NodeTree::INVALID; node.siblingidx = NodeTree::INVALID; } NodeTree::NodeTree() { m_levelsUsed = 0; m_treeCompactChangeID = 0; m_nodesActive = 0; clearNode(m_root); m_root.levelidx = 0; m_root.level = -1; } const NodeTree::Level* NodeTree::getUsedLevel( int level ) const { if (0 <= level && level < m_levelsUsed){ return &m_levels[level]; } return nullptr; } unsigned int NodeTree::getTreeParentChangeID() const { return m_treeCompactChangeID; } const std::vector& NodeTree::getTreeCompactNodes() const { return m_treeCompactNodes; } NodeTree::nodeID NodeTree::createNode() { nodeID id; if (!m_unusedNodes.empty()){ id = m_unusedNodes[m_unusedNodes.size()-1]; m_unusedNodes.pop_back(); } else{ Node node; m_nodes.push_back(node); m_treeCompactNodes.push_back(compactID()); id = (nodeID)(m_nodes.size()-1); } Node& node = getNode(id); clearNode(node); return id; } void NodeTree::deleteNode( nodeID nodeidx ) { assert (isValid(nodeidx) && nodeidx != ROOT); const Node &node = getNode(nodeidx); // make children unlinked while (isValid(node.childidx)){ setNodeParent(node.childidx,INVALID); } // remove self from parent list setNodeParent(nodeidx,INVALID); m_unusedNodes.push_back(nodeidx); } void NodeTree::setNodeParent( nodeID nodeidx, nodeID parentidx ) { assert (isValid(nodeidx) && nodeidx != ROOT); Node &node = getNode(nodeidx); if (node.parentidx == parentidx) return; if (isValid(node.parentidx)){ // unlink from old Node& parent = getNode(node.parentidx); bool found = false; if (parent.childidx == nodeidx){ parent.childidx = node.siblingidx; found = true; } else if (isValid(parent.childidx)){ nodeID child = parent.childidx; while(isValid(getNode(child).siblingidx)){ if (getNode(child).siblingidx == nodeidx){ getNode(child).siblingidx = node.siblingidx; found = true; break; } child = getNode(child).siblingidx; } } assert(found && "node was not a child of parent"); node.siblingidx = INVALID; updateLeafNode(node.parentidx); } if (isValid(parentidx)){ // link to new Node& parent = getNode(parentidx); node.siblingidx = parent.childidx; parent.childidx = nodeidx; updateLeafNode(node.parentidx); } if (isNodeInTree(nodeidx)){ updateLevelNode(nodeidx, isNodeInTree(parentidx) ? parentidx : INVALID); } node.parentidx = parentidx; } void NodeTree::addToTree( nodeID nodeidx ) { assert (isValid(nodeidx) && nodeidx != ROOT); const Node &node = getNode(nodeidx); assert (!isNodeInTree(nodeidx) && "must not be already added to tree"); assert ( isNodeInTree(node.parentidx) && "parent must be already added to tree"); updateLevelNode(nodeidx,node.parentidx); } void NodeTree::removeFromTree( nodeID nodeidx ) { assert (isValid(nodeidx) && nodeidx != ROOT); const Node &node = getNode(nodeidx); assert (isNodeInTree(nodeidx) && "must be already added to tree"); updateLevelNode(nodeidx,INVALID); } void NodeTree::addToLevel( nodeID nodeidx, nodeID parentidx ) { Node& node = getNode(nodeidx); const Node& parent = getNode(parentidx); Level& level = getLevel(parent.level+1); level.changeID++; node.levelidx = (lvlID)level.nodes.size(); node.level = parent.level+1; level.nodes.push_back(nodeidx); if (!isValid(node.childidx)){ addLeafNode(nodeidx); } m_levelsUsed = node.level+1 > m_levelsUsed ? node.level+1 : m_levelsUsed; m_nodesActive++; } void NodeTree::removeFromLevel( nodeID nodeidx ) { Node& node = getNode(nodeidx); Level& level = getLevel(node.level); level.changeID++; level.nodes[node.levelidx] = level.nodes[level.nodes.size()-1]; getNode(level.nodes[node.levelidx]).levelidx = node.levelidx; level.nodes.pop_back(); if (isValid(node.leafidx)){ removeLeafNode(nodeidx); } if (node.level+1 == m_levelsUsed && level.nodes.empty()){ m_levelsUsed--; } node.level = -1; node.levelidx = INVALID; node.leafidx = INVALID; m_nodesActive--; } void NodeTree::removeLeafNode( nodeID nodeidx ) { assert(isNodeInTree(nodeidx)); Node& node = getNode(nodeidx); Level& level = getLevel(node.level); // remove level.leaves[node.leafidx] = level.leaves[level.leaves.size()-1]; getNode(level.leaves[node.leafidx]).leafidx = node.leafidx; level.leaves.pop_back(); } void NodeTree::addLeafNode( nodeID nodeidx ) { assert(isNodeInTree(nodeidx)); Node& node = getNode(nodeidx); Level& level = getLevel(node.level); // add node.leafidx = (lvlID)level.leaves.size(); level.leaves.push_back(nodeidx); } void NodeTree::updateLeafNode( nodeID nodeidx ) { if (!isNodeInTree(nodeidx)) return; Node& node = getNode(nodeidx); if (!isValid(node.childidx) && isValid(node.leafidx)){ removeLeafNode(nodeidx); } else if (isValid(node.childidx) && !isValid(node.leafidx)){ addLeafNode(nodeidx); } } void NodeTree::updateLevelNode( nodeID nodeidx, nodeID parentidx ) { // at this point node.parentidx is still the old value Node &node = getNode(nodeidx); // update level parent buffer to reflect last state always m_treeCompactNodes[nodeidx].parent = parentidx; m_treeCompactChangeID++; if (isValid(node.levelidx)){ // already active if (isValid(parentidx)){ const Node& parent = getNode(parentidx); int oldlevel = node.level; int newlevel = parent.level + 1; // we remain in the same level and only our parent has changed if (oldlevel == newlevel){ return; } removeFromLevel(nodeidx); addToLevel(nodeidx,parentidx); } else{ removeFromLevel(nodeidx); } } else if (isValid(parentidx)){ // was inactive // add to level addToLevel(nodeidx,parentidx); } m_treeCompactNodes[nodeidx].level = node.level; nodeID child = node.childidx; while (isValid(child)){ updateLevelNode(child, isValid(parentidx) ? nodeidx : INVALID ); child = getNode(child).siblingidx; } } void NodeTree::reserve( int numNodes ) { m_nodes.reserve( numNodes ); m_treeCompactNodes.reserve( numNodes ); } void NodeTree::create( int numNodes ) { Node node; clearNode(node); m_nodes.resize( numNodes, node ); m_treeCompactNodes.resize( numNodes, compactID() ); } void NodeTree::clear() { m_nodesActive = 0; m_levelsUsed = 0; m_treeCompactChangeID = 0; m_levels.clear(); m_nodes.clear(); m_treeCompactNodes.clear(); } ================================================ FILE: nodetree.hpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #pragma once #include class NodeTree { public: enum Flags { INVALID = 0xFFFFFFFF, ROOT = 0x7FFFFFFF, LEVELBITS = 8, PARENTBITS = 32 - LEVELBITS }; static constexpr unsigned INVALID_LEVEL = (1 << LEVELBITS) - 1; static constexpr unsigned INVALID_PARENT = (1 << PARENTBITS) - 1; struct compactID { unsigned level : LEVELBITS; unsigned parent : PARENTBITS; compactID(){ level = INVALID_LEVEL; parent = INVALID_PARENT; } }; typedef unsigned int nodeID; typedef unsigned int lvlID; struct Level { unsigned int changeID; std::vector nodes; std::vector leaves; Level(){ changeID = 0; } }; struct Node { nodeID parentidx; lvlID levelidx; lvlID leafidx; int level; nodeID childidx; nodeID siblingidx; }; private: Node m_root; // general nodes std::vector m_nodes; std::vector m_unusedNodes; // actual nodes added to tree std::vector m_treeCompactNodes; std::vector m_levels; unsigned int m_treeCompactChangeID; int m_nodesActive; int m_levelsUsed; public: NodeTree(); const Level* getUsedLevel(int level) const; inline int getNumUsedLevel() const { return m_levelsUsed; } unsigned int getTreeParentChangeID() const; const std::vector& getTreeCompactNodes() const; inline nodeID getTreeRoot() { return ROOT; } inline const Node& getNode(nodeID nodeidx) const { if (nodeidx == ROOT) return m_root; else return m_nodes[nodeidx]; } inline bool isValid(unsigned int id) { return id != INVALID; } inline bool isNodeInTree(nodeID nodeidx) { return isValid(nodeidx) && isValid(getNode(nodeidx).levelidx); } inline nodeID getParentNode(nodeID nodeidx) const { return getNode(nodeidx).parentidx; } nodeID createNode(); void deleteNode(nodeID nodeidx); void setNodeParent(nodeID nodeidx, nodeID parentidx); void addToTree(nodeID nodeidx); void removeFromTree(nodeID nodeidx); void reserve(int numNodes); void create(int numNodes); void clear(); int getNumActiveNodes() const { return m_nodesActive; } private: inline Level& getLevel(int level) { if ((int)m_levels.size() < level+1){ m_levels.resize(level+1); } return m_levels[level]; } inline Node& getNode(nodeID nodeidx) { if (nodeidx == ROOT) return m_root; else return m_nodes[nodeidx]; } void addToLevel(nodeID nodeidx, nodeID parentidx); void removeFromLevel(nodeID nodeidx); void removeLeafNode(nodeID nodeidx); void addLeafNode(nodeID nodeidx); void updateLeafNode(nodeID nodeidx); void updateLevelNode(nodeID nodeidx, nodeID parentidx); }; ================================================ FILE: nvtoken.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include "nvtoken.hpp" namespace nvtoken { ////////////////////////////////////////////////////////////////////////// // generic GLuint s_nvcmdlist_header[NVTOKEN_TYPES] = {0}; GLuint s_nvcmdlist_headerSizes[NVTOKEN_TYPES] = {0}; GLushort s_nvcmdlist_stages[NVTOKEN_STAGES] = {0}; bool s_nvcmdlist_bindless = false; static inline GLuint nvtokenHeaderSW(GLuint type, GLuint size){ return type | (size<<16); } static inline GLenum nvtokenHeaderCommandSW(GLuint header) { return header & 0xFFFF; } static inline GLuint nvtokenHeaderSizeSW(GLuint header) { return header>>16; } static inline GLenum nvtokenHeaderCommand(GLuint header) { for (int i = 0; i < NVTOKEN_TYPES; i++){ if (header == s_nvcmdlist_header[i]) return i; } assert(0 && "can't find header"); return -1; } template static void nvtokenRegisterSize() { s_nvcmdlist_headerSizes[T::ID] = sizeof(T); } void nvtokenInitInternals( bool hwsupport, bool bindlessSupport) { assert( !hwsupport || (hwsupport && bindlessSupport) ); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); nvtokenRegisterSize(); for (int i = 0; i < NVTOKEN_TYPES; i++){ GLuint sz = s_nvcmdlist_headerSizes[i]; assert(sz); } s_nvcmdlist_bindless = bindlessSupport; if (hwsupport){ for (int i = 0; i < NVTOKEN_TYPES; i++){ s_nvcmdlist_header[i] = glGetCommandHeaderNV(i,s_nvcmdlist_headerSizes[i]); } s_nvcmdlist_stages[NVTOKEN_STAGE_VERTEX] = glGetStageIndexNV(GL_VERTEX_SHADER); s_nvcmdlist_stages[NVTOKEN_STAGE_TESS_CONTROL] = glGetStageIndexNV(GL_TESS_CONTROL_SHADER); s_nvcmdlist_stages[NVTOKEN_STAGE_TESS_EVALUATION] = glGetStageIndexNV(GL_TESS_EVALUATION_SHADER); s_nvcmdlist_stages[NVTOKEN_STAGE_GEOMETRY] = glGetStageIndexNV(GL_GEOMETRY_SHADER); s_nvcmdlist_stages[NVTOKEN_STAGE_FRAGMENT] = glGetStageIndexNV(GL_FRAGMENT_SHADER); } else{ for (int i = 0; i < NVTOKEN_TYPES; i++){ s_nvcmdlist_header[i] = nvtokenHeaderSW(i,s_nvcmdlist_headerSizes[i]); } for (int i = 0; i < NVTOKEN_STAGES; i++){ s_nvcmdlist_stages[i] = i; } } } #define TOSTRING(a) case a: return #a; const char* nvtokenCommandToString(GLenum type){ switch (type){ TOSTRING(GL_NOP_COMMAND_NV ); TOSTRING(GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV); TOSTRING(GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV ); TOSTRING(GL_ELEMENT_ADDRESS_COMMAND_NV ); TOSTRING(GL_ATTRIBUTE_ADDRESS_COMMAND_NV ); TOSTRING(GL_UNIFORM_ADDRESS_COMMAND_NV ); TOSTRING(GL_BLEND_COLOR_COMMAND_NV ); TOSTRING(GL_STENCIL_REF_COMMAND_NV ); TOSTRING(GL_TERMINATE_SEQUENCE_COMMAND_NV ); TOSTRING(GL_LINE_WIDTH_COMMAND_NV ); TOSTRING(GL_POLYGON_OFFSET_COMMAND_NV ); TOSTRING(GL_ALPHA_REF_COMMAND_NV ); TOSTRING(GL_VIEWPORT_COMMAND_NV ); TOSTRING(GL_SCISSOR_COMMAND_NV ); TOSTRING(GL_DRAW_ELEMENTS_COMMAND_NV ); TOSTRING(GL_DRAW_ARRAYS_COMMAND_NV ); TOSTRING(GL_DRAW_ELEMENTS_STRIP_COMMAND_NV ); TOSTRING(GL_DRAW_ARRAYS_STRIP_COMMAND_NV ); } return NULL; } ////////////////////////////////////////////////////////////////////////// void nvtokenGetStats( const void* NV_RESTRICT stream, size_t streamSize, int stats[NVTOKEN_TYPES] ) { const GLubyte* NV_RESTRICT current = (GLubyte*)stream; const GLubyte* streamEnd = current + streamSize; while (current < streamEnd){ const GLuint* header = (const GLuint*)current; GLenum type = nvtokenHeaderCommand(*header); stats[type]++; current += s_nvcmdlist_headerSizes[type]; } } // Emulation related static inline GLenum nvtokenDrawCommandSequenceSW( const void* NV_RESTRICT stream, size_t streamSize, GLenum mode, GLenum type, const StateSystem::State& state ) { const GLubyte* NV_RESTRICT current = (GLubyte*)stream; const GLubyte* streamEnd = current + streamSize; GLenum modeStrip; if (mode == GL_LINES) modeStrip = GL_LINE_STRIP; else if (mode == GL_TRIANGLES) modeStrip = GL_TRIANGLE_STRIP; /*else if (mode == GL_QUADS) modeStrip = GL_QUAD_STRIP;*/ else if (mode == GL_LINES_ADJACENCY) modeStrip = GL_LINE_STRIP_ADJACENCY; else if (mode == GL_TRIANGLES_ADJACENCY) modeStrip = GL_TRIANGLE_STRIP_ADJACENCY; else modeStrip = mode; GLenum modeSpecial; if (mode == GL_LINES) modeSpecial = GL_LINE_LOOP; else if (mode == GL_TRIANGLES) modeSpecial = GL_TRIANGLE_FAN; else modeSpecial = mode; while (current < streamEnd){ const GLuint* header = (const GLuint*)current; GLenum cmdtype = nvtokenHeaderCommand(*header); // if you always use emulation on non-native tokens you can use // cmdtype = nvtokenHeaderCommandSW(header->encoded) switch(cmdtype){ case GL_TERMINATE_SEQUENCE_COMMAND_NV: { return type; } break; case GL_NOP_COMMAND_NV: { } break; case GL_DRAW_ELEMENTS_COMMAND_NV: { const DrawElementsCommandNV* cmd = (const DrawElementsCommandNV*)current; glDrawElementsBaseVertex(mode, cmd->count, type, (const GLvoid*)(cmd->firstIndex * sizeof(GLuint)), cmd->baseVertex); } break; case GL_DRAW_ARRAYS_COMMAND_NV: { const DrawArraysCommandNV* cmd = (const DrawArraysCommandNV*)current; glDrawArrays(mode, cmd->first, cmd->count); } break; case GL_DRAW_ELEMENTS_STRIP_COMMAND_NV: { const DrawElementsCommandNV* cmd = (const DrawElementsCommandNV*)current; glDrawElementsBaseVertex(modeStrip, cmd->count, type, (const GLvoid*)(cmd->firstIndex * sizeof(GLuint)), cmd->baseVertex); } break; case GL_DRAW_ARRAYS_STRIP_COMMAND_NV: { const DrawArraysCommandNV* cmd = (const DrawArraysCommandNV*)current; glDrawArrays(modeStrip, cmd->first, cmd->count); } break; case GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV: { const DrawElementsInstancedCommandNV* cmd = (const DrawElementsInstancedCommandNV*)current; assert (cmd->mode == mode || cmd->mode == modeStrip || cmd->mode == modeSpecial); glDrawElementsIndirect(cmd->mode, type, &cmd->count); } break; case GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV: { const DrawArraysInstancedCommandNV* cmd = (const DrawArraysInstancedCommandNV*)current; assert (cmd->mode == mode || cmd->mode == modeStrip || cmd->mode == modeSpecial); glDrawArraysIndirect(cmd->mode, &cmd->count); } break; case GL_ELEMENT_ADDRESS_COMMAND_NV: { const ElementAddressCommandNV* cmd = (const ElementAddressCommandNV*)current; type = cmd->typeSizeInByte == 4 ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT; if (s_nvcmdlist_bindless){ glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, GLuint64(cmd->addressLo) | (GLuint64(cmd->addressHi)<<32), 0x7FFFFFFF); } else{ const ElementAddressCommandEMU* cmd = (const ElementAddressCommandEMU*)current; glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, cmd->buffer); } } break; case GL_ATTRIBUTE_ADDRESS_COMMAND_NV: { if (s_nvcmdlist_bindless){ const AttributeAddressCommandNV* cmd = (const AttributeAddressCommandNV*)current; glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, cmd->index, GLuint64(cmd->addressLo) | (GLuint64(cmd->addressHi)<<32), 0x7FFFFFFF); } else{ const AttributeAddressCommandEMU* cmd = (const AttributeAddressCommandEMU*)current; glBindVertexBuffer(cmd->index, cmd->buffer, cmd->offset, state.vertexformat.bindings[cmd->index].stride); } } break; case GL_UNIFORM_ADDRESS_COMMAND_NV: { if (s_nvcmdlist_bindless){ const UniformAddressCommandNV* cmd = (const UniformAddressCommandNV*)current; glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV, cmd->index, GLuint64(cmd->addressLo) | (GLuint64(cmd->addressHi)<<32), 0x10000); } else{ const UniformAddressCommandEMU* cmd = (const UniformAddressCommandEMU*)current; glBindBufferRange(GL_UNIFORM_BUFFER,cmd->index, cmd->buffer, cmd->offset256 * 256, cmd->size4*4); } } break; case GL_BLEND_COLOR_COMMAND_NV: { const BlendColorCommandNV* cmd = (const BlendColorCommandNV*)current; glBlendColor(cmd->red,cmd->green,cmd->blue,cmd->alpha); } break; case GL_STENCIL_REF_COMMAND_NV: { const StencilRefCommandNV* cmd = (const StencilRefCommandNV*)current; glStencilFuncSeparate(GL_FRONT, state.stencil.funcs[StateSystem::FACE_FRONT].func, cmd->frontStencilRef, state.stencil.funcs[StateSystem::FACE_FRONT].mask); glStencilFuncSeparate(GL_BACK, state.stencil.funcs[StateSystem::FACE_BACK ].func, cmd->backStencilRef, state.stencil.funcs[StateSystem::FACE_BACK ].mask); } break; case GL_LINE_WIDTH_COMMAND_NV: { const LineWidthCommandNV* cmd = (const LineWidthCommandNV*)current; glLineWidth(cmd->lineWidth); } break; case GL_POLYGON_OFFSET_COMMAND_NV: { const PolygonOffsetCommandNV* cmd = (const PolygonOffsetCommandNV*)current; glPolygonOffset(cmd->scale,cmd->bias); } break; case GL_ALPHA_REF_COMMAND_NV: {/* const AlphaRefCommandNV* cmd = (const AlphaRefCommandNV*)current; glAlphaFunc(state.alpha.mode, cmd->alphaRef); */ } break; case GL_VIEWPORT_COMMAND_NV: { const ViewportCommandNV* cmd = (const ViewportCommandNV*)current; glViewport(cmd->x, cmd->y, cmd->width, cmd->height); } break; case GL_SCISSOR_COMMAND_NV: { const ScissorCommandNV* cmd = (const ScissorCommandNV*)current; glScissor(cmd->x,cmd->y,cmd->width,cmd->height); } break; case GL_FRONT_FACE_COMMAND_NV: { FrontFaceCommandNV* cmd = (FrontFaceCommandNV*)current; glFrontFace(cmd->frontFace?GL_CW:GL_CCW); } break; } GLuint tokenSize = s_nvcmdlist_headerSizes[cmdtype]; assert(tokenSize); current += tokenSize; } return type; } void nvtokenDrawCommandsSW(GLenum mode, const void* NV_RESTRICT stream, size_t streamSize, const GLintptr* NV_RESTRICT offsets, const GLsizei* NV_RESTRICT sizes, GLuint count, StateSystem::State &state) { const char* NV_RESTRICT tokens = (const char*)stream; GLenum type = GL_UNSIGNED_SHORT; for (GLuint i = 0; i < count; i++) { size_t offset = offsets[i]; size_t size = sizes[i]; assert(size + offset <= streamSize); type = nvtokenDrawCommandSequenceSW(&tokens[offset], size, mode, type, state); } } #if NVTOKEN_STATESYSTEM void nvtokenDrawCommandsStatesSW(const void* NV_RESTRICT stream, size_t streamSize, const GLintptr* NV_RESTRICT offsets, const GLsizei* NV_RESTRICT sizes, const GLuint* NV_RESTRICT states, const GLuint* NV_RESTRICT fbos, GLuint count, StateSystem &stateSystem) { int lastFbo = ~0; const char* NV_RESTRICT tokens = (const char*)stream; StateSystem::StateID lastID; GLenum type = GL_UNSIGNED_SHORT; for (GLuint i = 0; i < count; i++) { GLuint fbo; StateSystem::StateID curID = states[i]; const StateSystem::State& state = stateSystem.get(curID); if (fbos[i]){ fbo = fbos[i]; } else{ fbo = state.fbo.fboDraw; } if (fbo != lastFbo){ glBindFramebuffer(GL_FRAMEBUFFER, fbo); lastFbo = fbo; } if (i == 0){ stateSystem.applyGL( curID, true ); // quite costly } else { stateSystem.applyGL( curID, lastID, true ); } lastID = curID; size_t offset = offsets[i]; size_t size = sizes[i]; GLenum mode = state.basePrimitiveMode; assert(size + offset <= streamSize); type = nvtokenDrawCommandSequenceSW(&tokens[offset], size, mode, type, state); } } #endif } ================================================ FILE: nvtoken.hpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include #include #include #define NVTOKEN_STATESYSTEM 1 #include "platform.h" #include #if NVTOKEN_STATESYSTEM // not needed if emulation is not used, or implemented differently #include "statesystem.hpp" #else namespace StateSystem { // Minimal emulation layer enum Faces { FACE_FRONT, FACE_BACK, MAX_FACES, }; struct State { struct { struct { GLsizei stride; }bindings[16]; }vertexformat; struct { GLenum mode; }alpha; struct { struct { GLenum func; GLuint mask; }funcs[MAX_FACES]; }stencil; }; } #endif namespace nvtoken { ////////////////////////////////////////////////////////////////////////// // generic // not the cleanest way #define NVTOKEN_TYPES (GL_FRONT_FACE_COMMAND_NV+1) enum NVTokenShaderStage { NVTOKEN_STAGE_VERTEX, NVTOKEN_STAGE_TESS_CONTROL, NVTOKEN_STAGE_TESS_EVALUATION, NVTOKEN_STAGE_GEOMETRY, NVTOKEN_STAGE_FRAGMENT, NVTOKEN_STAGES, }; extern bool s_nvcmdlist_bindless; extern GLuint s_nvcmdlist_header[NVTOKEN_TYPES]; extern GLuint s_nvcmdlist_headerSizes[NVTOKEN_TYPES]; extern GLushort s_nvcmdlist_stages[NVTOKEN_STAGES]; class NVPointerStream { public: size_t m_max; unsigned char* m_begin; unsigned char* m_end; unsigned char* NV_RESTRICT m_cur; void init(void* data, size_t size) { m_begin = (unsigned char*)data; m_end = m_begin + size; m_cur = m_begin; m_max = size; } size_t size() const { return m_cur - m_begin; } size_t capacity() const { return m_max; } }; struct NVTokenSequence { std::vector offsets; std::vector sizes; std::vector states; std::vector fbos; }; #pragma pack(push,1) typedef struct { GLuint header; GLuint buffer; GLuint _pad; GLuint typeSizeInByte; } ElementAddressCommandEMU; typedef struct { GLuint header; GLuint index; GLuint buffer; GLuint offset; } AttributeAddressCommandEMU; typedef struct { GLuint header; GLushort index; GLushort stage; GLuint buffer; GLushort offset256; GLushort size4; } UniformAddressCommandEMU; struct NVTokenNop { static const GLenum ID = GL_NOP_COMMAND_NV; NOPCommandNV cmd; NVTokenNop() { cmd.header = s_nvcmdlist_header[ID]; } }; struct NVTokenTerminate { static const GLenum ID = GL_TERMINATE_SEQUENCE_COMMAND_NV; TerminateSequenceCommandNV cmd; NVTokenTerminate() { cmd.header = s_nvcmdlist_header[ID]; } }; struct NVTokenDrawElemsInstanced { static const GLenum ID = GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV; DrawElementsInstancedCommandNV cmd; NVTokenDrawElemsInstanced() { cmd.mode = GL_TRIANGLES; cmd.baseInstance = 0; cmd.baseVertex = 0; cmd.firstIndex = 0; cmd.count = 0; cmd.instanceCount = 1; cmd.header = s_nvcmdlist_header[ID]; } void setMode(GLenum primmode) { cmd.mode = primmode; } void setParams(GLuint count, GLuint firstIndex=0, GLuint baseVertex=0) { cmd.count = count; cmd.firstIndex = firstIndex; cmd.baseVertex = baseVertex; } void setInstances(GLuint count, GLuint baseInstance=0){ cmd.baseInstance = baseInstance; cmd.instanceCount = count; } }; struct NVTokenDrawArraysInstanced { static const GLenum ID = GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV; DrawArraysInstancedCommandNV cmd; NVTokenDrawArraysInstanced() { cmd.mode = GL_TRIANGLES; cmd.baseInstance = 0; cmd.first = 0; cmd.count = 0; cmd.instanceCount = 1; cmd.header = s_nvcmdlist_header[ID]; } void setMode(GLenum primmode) { cmd.mode = primmode; } void setParams(GLuint count, GLuint first=0) { cmd.count = count; cmd.first = first; } void setInstances(GLuint count, GLuint baseInstance=0){ cmd.baseInstance = baseInstance; cmd.instanceCount = count; } }; struct NVTokenDrawElems { static const GLenum ID = GL_DRAW_ELEMENTS_COMMAND_NV; DrawElementsCommandNV cmd; NVTokenDrawElems() { cmd.baseVertex = 0; cmd.firstIndex = 0; cmd.count = 0; cmd.header = s_nvcmdlist_header[ID]; } void setParams(GLuint count, GLuint firstIndex=0, GLuint baseVertex=0) { cmd.count = count; cmd.firstIndex = firstIndex; cmd.baseVertex = baseVertex; } void setMode(GLenum primmode) { assert(primmode != GL_TRIANGLE_FAN && /* primmode != GL_POLYGON && */ primmode != GL_LINE_LOOP); if (primmode == GL_LINE_STRIP || primmode == GL_TRIANGLE_STRIP || /* primmode == GL_QUAD_STRIP || */ primmode == GL_LINE_STRIP_ADJACENCY || primmode == GL_TRIANGLE_STRIP_ADJACENCY) { cmd.header = s_nvcmdlist_header[GL_DRAW_ELEMENTS_STRIP_COMMAND_NV]; } else { cmd.header = s_nvcmdlist_header[GL_DRAW_ELEMENTS_COMMAND_NV]; } } }; struct NVTokenDrawArrays { static const GLenum ID = GL_DRAW_ARRAYS_COMMAND_NV; DrawArraysCommandNV cmd; NVTokenDrawArrays() { cmd.first = 0; cmd.count = 0; cmd.header = s_nvcmdlist_header[ID]; } void setParams(GLuint count, GLuint first=0) { cmd.count = count; cmd.first = first; } void setMode(GLenum primmode) { assert(primmode != GL_TRIANGLE_FAN && /* primmode != GL_POLYGON && */ primmode != GL_LINE_LOOP); if (primmode == GL_LINE_STRIP || primmode == GL_TRIANGLE_STRIP || /* primmode == GL_QUAD_STRIP || */ primmode == GL_LINE_STRIP_ADJACENCY || primmode == GL_TRIANGLE_STRIP_ADJACENCY) { cmd.header = s_nvcmdlist_header[GL_DRAW_ARRAYS_STRIP_COMMAND_NV]; } else { cmd.header = s_nvcmdlist_header[GL_DRAW_ARRAYS_COMMAND_NV]; } } }; struct NVTokenDrawElemsStrip { static const GLenum ID = GL_DRAW_ELEMENTS_STRIP_COMMAND_NV; DrawElementsCommandNV cmd; NVTokenDrawElemsStrip() { cmd.baseVertex = 0; cmd.firstIndex = 0; cmd.count = 0; cmd.header = s_nvcmdlist_header[ID]; } void setParams(GLuint count, GLuint firstIndex=0, GLuint baseVertex=0) { cmd.count = count; cmd.firstIndex = firstIndex; cmd.baseVertex = baseVertex; } }; struct NVTokenDrawArraysStrip { static const GLenum ID = GL_DRAW_ARRAYS_STRIP_COMMAND_NV; DrawArraysCommandNV cmd; NVTokenDrawArraysStrip() { cmd.first = 0; cmd.count = 0; cmd.header = s_nvcmdlist_header[ID]; } void setParams(GLuint count, GLuint first=0) { cmd.count = count; cmd.first = first; } }; struct NVTokenVbo { static const GLenum ID = GL_ATTRIBUTE_ADDRESS_COMMAND_NV; union { AttributeAddressCommandNV cmd; AttributeAddressCommandEMU cmdEMU; }; void setBinding(GLuint idx){ cmd.index = idx; } void setBuffer(GLuint buffer, GLuint64 address, GLuint offset) { if (s_nvcmdlist_bindless){ address += offset; cmd.addressLo = GLuint(address & 0xFFFFFFFF); cmd.addressHi = GLuint(address >> 32); } else{ cmdEMU.buffer = buffer; cmdEMU.offset = offset; } } NVTokenVbo() { cmd.header = s_nvcmdlist_header[ID]; } }; struct NVTokenIbo { static const GLenum ID = GL_ELEMENT_ADDRESS_COMMAND_NV; union{ ElementAddressCommandNV cmd; ElementAddressCommandEMU cmdEMU; }; void setType(GLenum type){ if (type == GL_UNSIGNED_BYTE){ cmd.typeSizeInByte = 1; } else if (type == GL_UNSIGNED_SHORT){ cmd.typeSizeInByte = 2; } else if (type == GL_UNSIGNED_INT){ cmd.typeSizeInByte = 4; } else{ assert(0 && "illegal type"); } } void setBuffer(GLuint buffer, GLuint64 address) { if (s_nvcmdlist_bindless){ cmd.addressLo = GLuint(address & 0xFFFFFFFF); cmd.addressHi = GLuint(address >> 32); } else{ cmdEMU.buffer = buffer; cmdEMU._pad = 0; } } NVTokenIbo() { cmd.header = s_nvcmdlist_header[ID]; } }; struct NVTokenUbo { static const GLenum ID = GL_UNIFORM_ADDRESS_COMMAND_NV; union{ UniformAddressCommandNV cmd; UniformAddressCommandEMU cmdEMU; }; void setBuffer(GLuint buffer, GLuint64 address, GLuint offset, GLuint size) { assert(size % 4 == 0 && offset % 256 == 0); if (s_nvcmdlist_bindless){ address += offset; cmd.addressLo = GLuint(address & 0xFFFFFFFF); cmd.addressHi = GLuint(address >> 32); } else{ cmdEMU.buffer = buffer; cmdEMU.offset256 = offset / 256; cmdEMU.size4 = size / 4; } } void setBinding(GLuint idx, NVTokenShaderStage stage){ cmd.index = idx; cmd.stage = s_nvcmdlist_stages[stage]; } NVTokenUbo() { cmd.header = s_nvcmdlist_header[ID]; } }; struct NVTokenBlendColor{ static const GLenum ID = GL_BLEND_COLOR_COMMAND_NV; BlendColorCommandNV cmd; NVTokenBlendColor() { cmd.header = s_nvcmdlist_header[ID]; } }; struct NVTokenStencilRef{ static const GLenum ID = GL_STENCIL_REF_COMMAND_NV; StencilRefCommandNV cmd; NVTokenStencilRef() { cmd.header = s_nvcmdlist_header[ID]; } } ; struct NVTokenLineWidth{ static const GLenum ID = GL_LINE_WIDTH_COMMAND_NV; LineWidthCommandNV cmd; NVTokenLineWidth() { cmd.header = s_nvcmdlist_header[ID]; } }; struct NVTokenPolygonOffset{ static const GLenum ID = GL_POLYGON_OFFSET_COMMAND_NV; PolygonOffsetCommandNV cmd; NVTokenPolygonOffset() { cmd.header = s_nvcmdlist_header[ID]; } }; struct NVTokenAlphaRef{ static const GLenum ID = GL_ALPHA_REF_COMMAND_NV; AlphaRefCommandNV cmd; NVTokenAlphaRef() { cmd.header = s_nvcmdlist_header[ID]; } }; struct NVTokenViewport{ static const GLenum ID = GL_VIEWPORT_COMMAND_NV; ViewportCommandNV cmd; NVTokenViewport() { cmd.header = s_nvcmdlist_header[ID]; } }; struct NVTokenScissor { static const GLenum ID = GL_SCISSOR_COMMAND_NV; ScissorCommandNV cmd; NVTokenScissor() { cmd.header = s_nvcmdlist_header[ID]; } }; struct NVTokenFrontFace { static const GLenum ID = GL_FRONT_FACE_COMMAND_NV; FrontFaceCommandNV cmd; NVTokenFrontFace() { cmd.header = s_nvcmdlist_header[ID]; } void setFrontFace(GLenum winding){ cmd.frontFace = winding == GL_CCW; } }; #pragma pack(pop) template void nvtokenMakeNop(T & token){ NVTokenNop *nop = (NVTokenNop*)&token; for (size_t i = 0; i < (sizeof(T))/4; i++){ nop[i] = NVTokenNop(); } } template size_t nvtokenEnqueue(std::string& queue, T& data) { size_t offset = queue.size(); std::string cmd = std::string((const char*)&data,sizeof(T)); queue += cmd; return offset; } template size_t nvtokenEnqueue(NVPointerStream& queue, T& data) { assert(queue.m_cur + sizeof(T) <= queue.m_end); size_t offset = queue.m_cur - queue.m_begin; memcpy(queue.m_cur,&data,sizeof(T)); queue.m_cur += sizeof(T); return offset; } ////////////////////////////////////////////////////////// void nvtokenInitInternals( bool hwsupport, bool bindlessSupport); const char* nvtokenCommandToString( GLenum type ); void nvtokenGetStats( const void* NV_RESTRICT stream, size_t streamSize, int stats[NVTOKEN_TYPES]); void nvtokenDrawCommandsSW(GLenum mode, const void* NV_RESTRICT stream, size_t streamSize, const GLintptr* NV_RESTRICT offsets, const GLsizei* NV_RESTRICT sizes, GLuint count, StateSystem::State &state); #if NVTOKEN_STATESYSTEM void nvtokenDrawCommandsStatesSW(const void* NV_RESTRICT stream, size_t streamSize, const GLintptr* NV_RESTRICT offsets, const GLsizei* NV_RESTRICT sizes, const GLuint* NV_RESTRICT states, const GLuint* NV_RESTRICT fbos, GLuint count, StateSystem &stateSystem); #endif } ================================================ FILE: renderer.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include #include #include "renderer.hpp" #include "common.h" #pragma pack(1) namespace csfviewer { ////////////////////////////////////////////////////////////////////////// bool Renderer::s_bindless_ubo = false; CullingSystem Renderer::s_cullsys; ScanSystem Renderer::s_scansys; const char* toString( enum ShadeType st ) { switch(st){ case SHADE_SOLID: return "solid"; case SHADE_SOLIDWIRE: return "solid w edges"; case SHADE_SOLIDWIRE_SPLIT: return "solid w edges (split)"; } return NULL; } static void FillCache( std::vector& drawItems, const CadScene::Object& obj, const CadScene::Geometry& geo, bool solid, int objectIndex ) { int begin = 0; const CadScene::DrawRangeCache &cache = solid ? obj.cacheSolid : obj.cacheWire; for (size_t s = 0; s < cache.state.size(); s++) { const CadScene::DrawStateInfo &state = cache.state[s]; for (int d = 0; d < cache.stateCount[s]; d++){ // evict Renderer::DrawItem di; di.geometryIndex = obj.geometryIndex; di.matrixIndex = state.matrixIndex; di.materialIndex = state.materialIndex; di.objectIndex = objectIndex; di.solid = solid; di.range.offset = cache.offsets[begin + d]; di.range.count = cache.counts [begin + d]; drawItems.push_back(di); } begin += cache.stateCount[s]; } } static void FillJoin( std::vector& drawItems, const CadScene::Object& obj, const CadScene::Geometry& geo, bool solid, int objectIndex ) { CadScene::DrawRange range; int lastMaterial = -1; int lastMatrix = -1; for (size_t p = 0; p < obj.parts.size(); p++){ const CadScene::ObjectPart& part = obj.parts[p]; const CadScene::GeometryPart& mesh = geo.parts[p]; if (!part.active) continue; if (part.materialIndex != lastMaterial || part.matrixIndex != lastMatrix){ if (range.count){ // evict Renderer::DrawItem di; di.geometryIndex = obj.geometryIndex; di.matrixIndex = lastMatrix; di.materialIndex = lastMaterial; di.objectIndex = objectIndex; di.solid = solid; di.range = range; drawItems.push_back(di); } range = CadScene::DrawRange(); lastMaterial = part.materialIndex; lastMatrix = part.matrixIndex; } if (!range.count){ range.offset = solid ? mesh.indexSolid.offset : mesh.indexWire.offset; } range.count += solid ? mesh.indexSolid.count : mesh.indexWire.count; } // evict Renderer::DrawItem di; di.geometryIndex = obj.geometryIndex; di.matrixIndex = lastMatrix; di.materialIndex = lastMaterial; di.objectIndex = objectIndex; di.solid = solid; di.range = range; drawItems.push_back(di); } static void FillIndividual( std::vector& drawItems, const CadScene::Object& obj, const CadScene::Geometry& geo, bool solid, int objectIndex ) { for (size_t p = 0; p < obj.parts.size(); p++){ const CadScene::ObjectPart& part = obj.parts[p]; const CadScene::GeometryPart& mesh = geo.parts[p]; if (!part.active) continue; Renderer::DrawItem di; di.geometryIndex = obj.geometryIndex; di.matrixIndex = part.matrixIndex; di.materialIndex = part.materialIndex; di.objectIndex = objectIndex; di.solid = solid; di.range = solid ? mesh.indexSolid : mesh.indexWire; drawItems.push_back(di); } } void Renderer::fillDrawItems( std::vector& drawItems, size_t from, size_t to, bool solid, bool wire ) { const CadScene* NV_RESTRICT scene = m_scene; for (size_t i = from; i < scene->m_objects.size() && i < to; i++){ const CadScene::Object& obj = scene->m_objects[i]; const CadScene::Geometry& geo = scene->m_geometry[obj.geometryIndex]; if (m_strategy == STRATEGY_GROUPS){ if (solid) FillCache(drawItems, obj, geo, true, int(i)); if (wire) FillCache(drawItems, obj, geo, false, int(i)); } else if (m_strategy == STRATEGY_JOIN) { if (solid) FillJoin(drawItems, obj, geo, true, int(i)); if (wire) FillJoin(drawItems, obj, geo, false, int(i)); } else if (m_strategy == STRATEGY_INDIVIDUAL){ if (solid) FillIndividual(drawItems, obj, geo, true, int(i)); if (wire) FillIndividual(drawItems, obj, geo, false, int(i)); } } } } ================================================ FILE: renderer.hpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #ifndef RENDERER_H__ #define RENDERER_H__ // bindless UBO #ifndef GL_UNIFORM_BUFFER_UNIFIED_NV #define GL_UNIFORM_BUFFER_UNIFIED_NV 0x936E #endif #ifndef GL_UNIFORM_BUFFER_ADDRESS_NV #define GL_UNIFORM_BUFFER_ADDRESS_NV 0x936F #endif #ifndef GL_UNIFORM_BUFFER_LENGTH_NV #define GL_UNIFORM_BUFFER_LENGTH_NV 0x9370 #endif #include "cadscene.hpp" #include #include #include #include #include "cullingsystem.hpp" #include "scansystem.hpp" namespace csfviewer { #define USE_NOFILTER 0 // some renderers support turning off redundancy filter #define USE_WIRE_SHADERSWITCH 0 // If set we use two different shaders for tris and lines, // otherwise we use an immediate mode vertexattrib as pseudo uniform toggle. // Enable this to stress shader switching in app (becomes primary bottleneck) enum Strategy { STRATEGY_GROUPS, STRATEGY_JOIN, STRATEGY_INDIVIDUAL, }; enum ShadeType { SHADE_SOLID, SHADE_SOLIDWIRE, SHADE_SOLIDWIRE_SPLIT, // this mode is not "sane" it is only meant for performance testing of fbo toggles NUM_SHADES, }; const char* toString(enum ShadeType st); struct Resources { GLuint sceneUbo; GLuint64 sceneAddr; GLuint programUbo; GLuint programUboTris; GLuint programUboLine; GLuint programIdx; GLuint programIdxTris; GLuint programIdxLine; GLuint fbo; GLuint fbo2; size_t stateChangeID; size_t fboTextureChangeID; CullingSystem::View cullView; // ugly hack mutable GLuint programUsed; mutable GLuint programUsedTris; mutable GLuint programUsedLine; void usingUboProgram(bool ubo=true) const { programUsed = ubo ? programUbo : programIdx; programUsedTris = ubo ? programUboTris : programIdxTris; programUsedLine = ubo ? programUboLine : programIdxLine; } Resources() { stateChangeID = 0; fboTextureChangeID = 0; } }; #if USE_WIRE_SHADERSWITCH #define SetWireMode(state) glUseProgram((state) ? resources.programUsedLine : resources.programUsedTris ) #else #define SetWireMode(state) glVertexAttribI1i(VERTEX_WIREMODE,(state)) #endif class Renderer { public: struct DrawItem { bool solid; int materialIndex; int geometryIndex; int matrixIndex; int objectIndex; CadScene::DrawRange range; }; static bool DrawItem_compare_groups(const DrawItem& a, const DrawItem& b) { int diff = 0; diff = diff != 0 ? diff : (a.solid == b.solid ? 0 : ( a.solid ? -1 : 1 )); diff = diff != 0 ? diff : (a.materialIndex - b.materialIndex); diff = diff != 0 ? diff : (a.geometryIndex - b.geometryIndex); diff = diff != 0 ? diff : (a.matrixIndex - b.matrixIndex); return diff < 0; } class Type { public: Type() { getRegistry().push_back(this); } public: virtual bool loadPrograms( nvgl::ProgramManager &mgr ) { return true; } virtual void updatedPrograms( nvgl::ProgramManager &mgr ) { } virtual bool isAvailable() const = 0; virtual const char* name() const = 0; virtual Renderer* create() const = 0; virtual unsigned int priority() const { return 0xFF; } }; typedef std::vector Registry; static bool s_bindless_ubo; static Registry& getRegistry() { static Registry s_registry; return s_registry; } static CullingSystem s_cullsys; static ScanSystem s_scansys; public: virtual void init(const CadScene* NV_RESTRICT scene, const Resources& resources) {} virtual void deinit() {} virtual void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager ) {} virtual ~Renderer() {} void fillDrawItems( std::vector& drawItems, size_t from, size_t to, bool solid, bool wire); Strategy m_strategy; const CadScene* NV_RESTRICT m_scene; }; } #endif ================================================ FILE: rendererindexedmdi.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include #include #include "renderer.hpp" #include "common.h" #define USE_VERTEX_ASSIGNS (!USE_BASEINSTANCE) #define USE_GPU_INDIRECT 1 #define USE_CPU_INDIRECT (!USE_GPU_INDIRECT) namespace csfviewer { ////////////////////////////////////////////////////////////////////////// class RendererIndexedMDI: public Renderer { public: class Type : public Renderer::Type { bool isAvailable() const { return true; } const char* name() const { return "indexedmdi"; } Renderer* create() const { RendererIndexedMDI* renderer = new RendererIndexedMDI(); return renderer; } unsigned int priority() const { return 3; } }; class TypeVbum : public Renderer::Type { bool isAvailable() const { return !!has_GL_NV_vertex_buffer_unified_memory; } const char* name() const { return "indexedmdi_bindless"; } Renderer* create() const { RendererIndexedMDI* renderer = new RendererIndexedMDI(); renderer->m_vbum = true; return renderer; } unsigned int priority() const { return 3; } }; class TypeSort : public Renderer::Type { bool isAvailable() const { return true; } const char* name() const { return "indexedmdi_sorted"; } Renderer* create() const { RendererIndexedMDI* renderer = new RendererIndexedMDI(); renderer->m_sort = true; return renderer; } unsigned int priority() const { return 3; } }; class TypeSortVbum : public Renderer::Type { bool isAvailable() const { return !!has_GL_NV_vertex_buffer_unified_memory; } const char* name() const { return "indexedmdi_sorted_bindless"; } Renderer* create() const { RendererIndexedMDI* renderer = new RendererIndexedMDI(); renderer->m_vbum = true; renderer->m_sort = true; return renderer; } unsigned int priority() const { return 3; } }; private: struct DrawIndirectGL { GLuint count; GLuint instanceCount; GLuint firstIndex; GLint baseVertex; GLuint baseInstance; DrawIndirectGL () : count(0) , instanceCount(1) , firstIndex(0) , baseVertex(0) , baseInstance(0) {} }; struct IndexedCommand { DrawIndirectGL cmd; }; struct ShadeCommand { std::vector indirects; std::vector assigns; std::vector sizes; std::vector offsets; std::vector geometries; std::vector solids; #if USE_GPU_INDIRECT GLuint indirectGL; GLuint64 indirectADDR; #endif #if USE_VERTEX_ASSIGNS GLuint assignGL; GLuint64 assignADDR; #endif ShadeCommand() { #if USE_GPU_INDIRECT indirectGL = 0; #endif #if USE_VERTEX_ASSIGNS assignGL = 0; #endif } }; public: void init(const CadScene* NV_RESTRICT scene, const Resources& resources); void deinit(); void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager); bool m_vbum; bool m_sort; RendererIndexedMDI() : m_vbum(false) , m_sort(false) { } private: ShadeCommand m_shades[NUM_SHADES]; GLuint packBaseInstance( int matrixIndex, int materialIndex ) { assert( materialIndex <= 0xFFF ); assert( matrixIndex <= 0xFFFFF ); return (GLuint(matrixIndex) | (GLuint(materialIndex) << 20)); } void GenerateIndirects(std::vector& drawItems, ShadeType shade, const CadScene* NV_RESTRICT scene, const Resources& resources ) { int lastMaterial = -1; int lastGeometry = -1; int lastMatrix = -1; bool lastSolid = true; ShadeCommand& sc = m_shades[shade]; sc.assigns.clear(); sc.indirects.clear(); sc.sizes.clear(); sc.offsets.clear(); sc.solids.clear(); sc.geometries.clear(); std::vector& assigns = sc.assigns; std::vector& indirectStream = sc.indirects; size_t begin = 0; int numAssigns = 0; for (int i = 0; i < drawItems.size(); i++){ const DrawItem& di = drawItems[i]; if (shade == SHADE_SOLID && !di.solid){ if (m_sort) break; continue; } if (lastGeometry != di.geometryIndex || (shade == SHADE_SOLIDWIRE && di.solid != lastSolid)){ sc.offsets.push_back( begin ); sc.sizes. push_back( GLsizei((indirectStream.size()-begin)) ); sc.solids. push_back( lastSolid ); sc.geometries.push_back( lastGeometry ); begin = indirectStream.size(); } #if USE_VERTEX_ASSIGNS if (lastMatrix != di.matrixIndex || lastMaterial != di.materialIndex) { // push indices assigns.push_back(di.matrixIndex); assigns.push_back(di.materialIndex); numAssigns++; lastMatrix = di.matrixIndex; lastMaterial = di.materialIndex; } #endif IndexedCommand drawelems; drawelems.cmd.count = di.range.count; drawelems.cmd.firstIndex = GLuint((di.range.offset )/sizeof(GLuint)); #if USE_VERTEX_ASSIGNS drawelems.cmd.baseInstance = numAssigns - 1; #else drawelems.cmd.baseInstance = packBaseInstance(di.matrixIndex, di.materialIndex); #endif indirectStream.push_back(drawelems); lastGeometry = di.geometryIndex; lastSolid = di.solid; } sc.offsets.push_back( begin ); sc.sizes. push_back( GLsizei((indirectStream.size()-begin)) ); sc.solids. push_back( lastSolid ); sc.geometries.push_back( lastGeometry ); } }; static RendererIndexedMDI::Type s_indexed; static RendererIndexedMDI::TypeVbum s_indexed_vbum; static RendererIndexedMDI::TypeSort s_indexedsort; static RendererIndexedMDI::TypeSortVbum s_indexedsort_vbum; void RendererIndexedMDI::init( const CadScene* NV_RESTRICT scene, const Resources& resources ) { m_scene = scene; resources.usingUboProgram(false); std::vector drawItems; fillDrawItems(drawItems,0,scene->m_objects.size(), true, true); if (m_sort){ std::sort(drawItems.begin(),drawItems.end(),DrawItem_compare_groups); } // build SC GenerateIndirects(drawItems, SHADE_SOLID, scene, resources); GenerateIndirects(drawItems, SHADE_SOLIDWIRE, scene, resources); for (size_t i = 0; i <= SHADE_SOLIDWIRE; i++){ ShadeCommand& sc = m_shades[i]; #if USE_GPU_INDIRECT glCreateBuffers(1,&sc.indirectGL); glNamedBufferStorage( sc.indirectGL, sizeof(IndexedCommand) * sc.indirects.size(), &sc.indirects[0], 0 ); if (m_vbum){ glGetNamedBufferParameterui64vNV(sc.indirectGL, GL_BUFFER_GPU_ADDRESS_NV, &sc.indirectADDR); glMakeNamedBufferResidentNV(sc.indirectGL, GL_READ_ONLY); } #endif #if USE_VERTEX_ASSIGNS glCreateBuffers(1,&sc.assignGL); glNamedBufferStorage( sc.assignGL, sizeof(int) * sc.assigns.size(), &sc.assigns[0], 0 ); if (m_vbum){ glGetNamedBufferParameterui64vNV(sc.assignGL, GL_BUFFER_GPU_ADDRESS_NV, &sc.assignADDR); glMakeNamedBufferResidentNV(sc.assignGL, GL_READ_ONLY); } #endif } m_shades[SHADE_SOLIDWIRE_SPLIT] = m_shades[SHADE_SOLIDWIRE]; } void RendererIndexedMDI::deinit() { for (size_t i = 0; i < SHADE_SOLIDWIRE; i++){ ShadeCommand& sc = m_shades[i]; if (m_vbum){ #if USE_GPU_INDIRECT glMakeNamedBufferNonResidentNV(sc.indirectGL); #endif #if USE_VERTEX_ASSIGNS glMakeNamedBufferNonResidentNV(sc.assignGL); #endif } #if USE_GPU_INDIRECT glDeleteBuffers(1,&sc.indirectGL); #endif #if USE_VERTEX_ASSIGNS glDeleteBuffers(1,&sc.assignGL); #endif } } void RendererIndexedMDI::draw( ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager ) { const CadScene* NV_RESTRICT scene = m_scene; bool vbum = m_vbum; scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL); glUseProgram(resources.programIdx); if (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT){ glEnable(GL_POLYGON_OFFSET_FILL); glPolygonOffset(1,1); } SetWireMode(GL_FALSE); #if USE_VERTEX_ASSIGNS glVertexAttribIFormat(VERTEX_ASSIGNS,2,GL_INT,0); glVertexAttribBinding(VERTEX_ASSIGNS,1); glEnableVertexAttribArray(VERTEX_ASSIGNS); glBindVertexBuffer(1,0,0,sizeof(GLint)*2); glVertexBindingDivisor(1,1); #endif if (vbum){ glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); #if USE_GPU_INDIRECT glEnableClientState(GL_DRAW_INDIRECT_UNIFIED_NV); #endif } if (vbum && s_bindless_ubo){ glEnableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV); glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV, UBO_MATERIAL, scene->m_materialsADDR, sizeof(CadScene::Material) * scene->m_materials.size() ); glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV, UBO_SCENE,resources.sceneAddr,sizeof(SceneData)); } else{ glBindBufferBase(GL_UNIFORM_BUFFER, UBO_SCENE, resources.sceneUbo); glBindBufferBase(GL_UNIFORM_BUFFER, UBO_MATERIAL, scene->m_materialsGL); } nvgl::bindMultiTexture(GL_TEXTURE0 + TEX_MATRICES, GL_TEXTURE_BUFFER, scene->m_matricesTexGL); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); { ShadeCommand& sc = m_shades[shadetype]; if (vbum){ #if USE_GPU_INDIRECT glBufferAddressRangeNV(GL_DRAW_INDIRECT_ADDRESS_NV, 0, sc.indirectADDR, sc.indirects.size() * sizeof(IndexedCommand) ); #endif #if USE_VERTEX_ASSIGNS glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 1, sc.assignADDR, sc.assigns.size() * sizeof(GLint)); #endif } else{ #if USE_GPU_INDIRECT glBindBuffer(GL_DRAW_INDIRECT_BUFFER, sc.indirectGL); #endif #if USE_VERTEX_ASSIGNS glBindVertexBuffer(1, sc.assignGL, 0, sizeof(GLint)*2); #endif } #if USE_CPU_INDIRECT size_t offset = (size_t)&sc.indirects[0]; #else size_t offset = 0; #endif int lastGeometry = -1; bool lastSolid = true; for (size_t i = 0; i < sc.geometries.size(); i++){ int geometryIndex = sc.geometries[i]; if (geometryIndex != lastGeometry){ const CadScene::Geometry& geo = m_scene->m_geometry[ geometryIndex ]; if (vbum){ glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, geo.vboADDR, geo.numVertices * sizeof(CadScene::Vertex)); glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV,0, geo.iboADDR, (geo.numIndexSolid+geo.numIndexWire) * sizeof(GLuint)); } else{ glBindVertexBuffer(0, geo.vboGL, 0, sizeof(CadScene::Vertex)); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, geo.iboGL); } lastGeometry = geometryIndex; } bool solid = sc.solids[i]; if (solid != lastSolid){ SetWireMode((!solid)); } glMultiDrawElementsIndirect(solid ? GL_TRIANGLES : GL_LINES,GL_UNSIGNED_INT, (const void*)(offset + sc.offsets[i] * sizeof(IndexedCommand)), GLsizei(sc.sizes[i]), 0); lastSolid = solid; } } #if USE_VERTEX_ASSIGNS glDisableVertexAttribArray(VERTEX_ASSIGNS); glBindVertexBuffer(1,0,0,0); glVertexBindingDivisor(1,0); #endif glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); nvgl::bindMultiTexture(GL_TEXTURE0 + TEX_MATRICES, GL_TEXTURE_BUFFER, 0); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, 0); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glBindVertexBuffer(0,0,0,0); if (vbum){ glDisableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glDisableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); #if USE_GPU_INDIRECT glDisableClientState(GL_DRAW_INDIRECT_UNIFIED_NV); #endif if (s_bindless_ubo){ glDisableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV); } } if (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT){ glDisable(GL_POLYGON_OFFSET_FILL); glPolygonOffset(0,0); } SetWireMode(GL_FALSE); scene->disableVertexFormat(VERTEX_POS,VERTEX_NORMAL); } } ================================================ FILE: renderertoken.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include "tokenbase.hpp" #include "common.h" namespace csfviewer { ////////////////////////////////////////////////////////////////////////// class RendererToken: public Renderer, public TokenRendererBase { public: class Type : public Renderer::Type { bool isAvailable() const { return TokenRendererBase::hasNativeCommandList(); } const char* name() const { return "tokenbuffer"; } Renderer* create() const { RendererToken* renderer = new RendererToken(); return renderer; } unsigned int priority() const { return 9; } }; class TypeAddr : public Renderer::Type { bool isAvailable() const { return TokenRendererBase::hasNativeCommandList(); } const char* name() const { return "tokenbuffer_address"; } Renderer* create() const { RendererToken* renderer = new RendererToken(); renderer->m_useaddress = true; return renderer; } unsigned int priority() const { return 9; } }; class TypeList : public Renderer::Type { bool isAvailable() const { return TokenRendererBase::hasNativeCommandList(); } const char* name() const { return "tokenlist"; } Renderer* create() const { RendererToken* renderer = new RendererToken(); renderer->m_uselist = true; return renderer; } unsigned int priority() const { return 8; } }; class TypeEmu : public Renderer::Type { bool isAvailable() const { return true; } const char* name() const { return "tokenbuffer_emulated"; } Renderer* create() const { RendererToken* renderer = new RendererToken(); renderer->m_emulate = true; return renderer; } unsigned int priority() const { return 9; } }; class TypeSort : public Renderer::Type { bool isAvailable() const { return TokenRendererBase::hasNativeCommandList(); } const char* name() const { return "tokenbuffer_sorted"; } Renderer* create() const { RendererToken* renderer = new RendererToken(); renderer->m_sort = true; return renderer; } unsigned int priority() const { return 9; } }; class TypeSortAddr : public Renderer::Type { bool isAvailable() const { return TokenRendererBase::hasNativeCommandList(); } const char* name() const { return "tokenbuffer_sorted_address"; } Renderer* create() const { RendererToken* renderer = new RendererToken(); renderer->m_useaddress = true; renderer->m_sort = true; return renderer; } unsigned int priority() const { return 9; } }; class TypeSortList : public Renderer::Type { bool isAvailable() const { return TokenRendererBase::hasNativeCommandList(); } const char* name() const { return "tokenlist_sorted"; } Renderer* create() const { RendererToken* renderer = new RendererToken(); renderer->m_uselist = true; renderer->m_sort = true; return renderer; } unsigned int priority() const { return 8; } }; class TypeSortEmu : public Renderer::Type { bool isAvailable() const { return true; } const char* name() const { return "tokenbuffer_sorted_emulated"; } Renderer* create() const { RendererToken* renderer = new RendererToken(); renderer->m_emulate = true; renderer->m_sort = true; return renderer; } unsigned int priority() const { return 9; } }; public: void init(const CadScene* NV_RESTRICT scene, const Resources& resources); void deinit(); void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager); private: std::vector m_drawItems; void GenerateTokens(std::vector& drawItems, ShadeType shade, const CadScene* NV_RESTRICT scene, const Resources& resources ) { int lastMaterial = -1; int lastGeometry = -1; int lastMatrix = -1; bool lastSolid = true; ShadeCommand& sc = m_shades[shade]; sc.fbos.clear(); sc.offsets.clear(); sc.sizes.clear(); sc.states.clear(); std::string& tokenStream = m_tokenStreams[shade]; tokenStream.clear(); size_t begin = 0; { NVTokenUbo ubo; ubo.cmd.index = UBO_SCENE; ubo.cmd.stage = UBOSTAGE_VERTEX; ubo.setBuffer(resources.sceneUbo, resources.sceneAddr, 0, sizeof(SceneData)); nvtokenEnqueue(tokenStream, ubo); ubo.cmd.stage = UBOSTAGE_FRAGMENT; nvtokenEnqueue(tokenStream, ubo); #if USE_POLYOFFSETTOKEN NVTokenPolygonOffset offset; offset.cmd.bias = 1; offset.cmd.scale = 1; nvtokenEnqueue(tokenStream, offset); #endif } for (int i = 0; i < drawItems.size(); i++){ const DrawItem& di = drawItems[i]; if (shade == SHADE_SOLID && !di.solid){ continue; } if (shade == SHADE_SOLIDWIRE && di.solid != lastSolid){ sc.offsets.push_back( begin ); sc.sizes. push_back( GLsizei((tokenStream.size()-begin)) ); sc.states. push_back( m_stateObjects[ lastSolid ? STATE_TRISOFFSET : STATE_LINES ] ); sc.fbos. push_back( 0 ); begin = tokenStream.size(); } if (lastGeometry != di.geometryIndex){ const CadScene::Geometry &geo = scene->m_geometry[di.geometryIndex]; NVTokenVbo vbo; vbo.cmd.index = 0; vbo.setBuffer(geo.vboGL, geo.vboADDR, 0); nvtokenEnqueue(tokenStream, vbo); NVTokenIbo ibo; ibo.setBuffer(geo.iboGL, geo.iboADDR); ibo.cmd.typeSizeInByte = 4; nvtokenEnqueue(tokenStream, ibo); lastGeometry = di.geometryIndex; } if (lastMatrix != di.matrixIndex){ NVTokenUbo ubo; ubo.cmd.index = UBO_MATRIX; ubo.cmd.stage = UBOSTAGE_VERTEX; ubo.setBuffer(scene->m_matricesGL, scene->m_matricesADDR, sizeof(CadScene::MatrixNode) * di.matrixIndex, sizeof(CadScene::MatrixNode)); nvtokenEnqueue(tokenStream, ubo); lastMatrix = di.matrixIndex; } if (lastMaterial != di.materialIndex){ NVTokenUbo ubo; ubo.cmd.index = UBO_MATERIAL; ubo.cmd.stage = UBOSTAGE_FRAGMENT; ubo.setBuffer(scene->m_materialsGL, scene->m_materialsADDR, sizeof(CadScene::Material) * di.materialIndex, sizeof(CadScene::Material)); nvtokenEnqueue(tokenStream, ubo); lastMaterial = di.materialIndex; } NVTokenDrawElemsUsed drawelems; drawelems.setMode(di.solid ? GL_TRIANGLES : GL_LINES); drawelems.cmd.count = di.range.count; drawelems.cmd.firstIndex = GLuint((di.range.offset )/sizeof(GLuint)); nvtokenEnqueue(tokenStream, drawelems); lastSolid = di.solid; } sc.offsets.push_back( begin ); sc.sizes. push_back( GLsizei((tokenStream.size()-begin)) ); if (shade == SHADE_SOLID){ sc.states. push_back( m_stateObjects[ STATE_TRIS ] ); } else{ sc.states. push_back( m_stateObjects[ lastSolid ? STATE_TRISOFFSET : STATE_LINES ] ); } sc.fbos. push_back( 0 ); } }; static RendererToken::Type s_token; static RendererToken::TypeAddr s_token_addr; static RendererToken::TypeList s_token_list; static RendererToken::TypeEmu s_token_emu; static RendererToken::TypeSort s_sorttoken; static RendererToken::TypeSortAddr s_sorttoken_addr; static RendererToken::TypeSortList s_sorttoken_list; static RendererToken::TypeSortEmu s_sorttoken_emu; void RendererToken::init(const CadScene* NV_RESTRICT scene, const Resources& resources) { TokenRendererBase::init(s_bindless_ubo, !!has_GL_NV_vertex_buffer_unified_memory); resources.usingUboProgram(true); m_scene = scene; std::vector drawItems; fillDrawItems(drawItems,0,scene->m_objects.size(), true, true); if (USE_PERFRAMEBUILD){ m_drawItems = drawItems; } if (m_sort){ std::sort(drawItems.begin(),drawItems.end(),DrawItem_compare_groups); } GenerateTokens(drawItems, SHADE_SOLID, scene, resources); TokenRendererBase::printStats(SHADE_SOLID); GenerateTokens(drawItems, SHADE_SOLIDWIRE, scene, resources); TokenRendererBase::printStats(SHADE_SOLIDWIRE); TokenRendererBase::finalize(resources); } void RendererToken::deinit() { TokenRendererBase::deinit(); m_drawItems.clear(); } void RendererToken::draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager) { const CadScene* NV_RESTRICT scene = m_scene; // do state setup (primarily for sake of state capturing) scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL); if (m_bindlessVboUbo){ glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); glEnableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV); } else{ glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE,resources.sceneUbo); } if (USE_PERFRAMEBUILD){ #if 0 std::vector drawItems; { nvh::Profiler::Section _tempTimer(profiler ,"Copy"); drawItems = m_drawItems; } #else std::vector& drawItems = m_drawItems; #endif { nvh::Profiler::Section _tempTimer(profiler ,"Sort"); std::sort(drawItems.begin(),drawItems.end(),DrawItem_compare_groups); } { nvh::Profiler::Section _tempTimer(profiler ,"Token"); GenerateTokens(drawItems, shadetype, scene, resources); } if (!m_emulate && !m_uselist){ nvh::Profiler::Section _tempTimer(profiler ,"Build"); ShadeCommand & shade = m_shades[shadetype]; glInvalidateBufferData(m_tokenBuffers[shadetype]); glNamedBufferSubData(m_tokenBuffers[shadetype],shade.offsets[0], m_tokenStreams[shadetype].size(), &m_tokenStreams[shadetype][0]); } } if (USE_STATEOBJ_REBUILD){ nvh::Profiler::Section section(profiler,"state"); for (int i = 0; i < 25; i++){ m_stateChangeID = resources.stateChangeID + 1; m_fboStateChangeID = resources.fboTextureChangeID + 1; captureState(resources); } } else{ captureState(resources); } if (!USE_POLYOFFSETTOKEN && (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT)){ glPolygonOffset(1,1); } if (m_hwsupport){ if (m_uselist){ glCallCommandListNV(m_commandLists[shadetype]); } else{ ShadeCommand & shade = m_shades[shadetype]; if (m_useaddress){ glDrawCommandsStatesAddressNV(&shade.addresses[0], &shade.sizes[0], &shade.states[0], &shade.fbos[0], int(shade.states.size()) ); } else{ glDrawCommandsStatesNV(m_tokenBuffers[shadetype], &shade.offsets[0], &shade.sizes[0], &shade.states[0], &shade.fbos[0], int(shade.states.size()) ); } } } else{ ShadeCommand & shade = m_shades[shadetype]; std::string& stream = m_tokenStreams[shadetype]; renderShadeCommandSW(&stream[0], stream.size(), shade); } glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, 0); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATRIX, 0); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glBindVertexBuffer(0,0,0,0); glDisable(GL_POLYGON_OFFSET_FILL); glPolygonOffset(0,0); if (m_bindlessVboUbo){ glDisableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glDisableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); glDisableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV); } scene->disableVertexFormat(VERTEX_POS,VERTEX_NORMAL); } } ================================================ FILE: renderertokensortcull.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include "tokenbase.hpp" #include "cullingsystem.hpp" #include "common.h" namespace csfviewer { ////////////////////////////////////////////////////////////////////////// #define USE_TEMPORALRASTER 1 #define USE_OBJECTSORT_CULLING 1 class RendererCullSortToken : public Renderer, public TokenRendererBase { public: class Shared { public: nvgl::ProgramID token_sizes, token_scan, token_cmds; static Shared& get() { static Shared res; return res; } Shared() : loaded(false) {} bool load(nvgl::ProgramManager &progManager) { if (loaded) return true; loaded = true; token_sizes = progManager.createProgram( nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "cull-tokensizes.vert.glsl")); token_cmds = progManager.createProgram( nvgl::ProgramManager::Definition(GL_VERTEX_SHADER, "cull-tokencmds.vert.glsl")); if (!progManager.areProgramsValid()) return false; return true; } private: bool loaded; }; class Type : public Renderer::Type { bool isAvailable() const { return TokenRendererBase::hasNativeCommandList(); } const char* name() const { return "tokenbuffer_cullsorted"; } Renderer* create() const { RendererCullSortToken* renderer = new RendererCullSortToken(); return renderer; } bool loadPrograms( nvgl::ProgramManager &mgr) { return Shared::get().load(mgr); } unsigned int priority() const { return 9; } }; class TypeEmu : public Renderer::Type { bool isAvailable() const { return !!has_GL_NV_vertex_buffer_unified_memory; } const char* name() const { return "tokenbuffer_cullsorted_emulated"; } Renderer* create() const { RendererCullSortToken* renderer = new RendererCullSortToken(); renderer->m_emulate = true; return renderer; } bool loadPrograms( nvgl::ProgramManager &mgr ) { return Shared::get().load(mgr); } unsigned int priority() const { return 9; } }; public: void init(const CadScene* NV_RESTRICT scene, const Resources& resources); void deinit(); void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager); void drawScene(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager, const char*what); private: static bool DrawItem_compare_groups(const DrawItem& a, const DrawItem& b) { int diff = 0; diff = diff != 0 ? diff : (a.solid == b.solid ? 0 : ( a.solid ? -1 : 1 )); #if USE_OBJECTSORT_CULLING diff = diff != 0 ? diff : (a.objectIndex - b.objectIndex); #endif diff = diff != 0 ? diff : (a.materialIndex - b.materialIndex); diff = diff != 0 ? diff : (a.geometryIndex - b.geometryIndex); diff = diff != 0 ? diff : (a.matrixIndex - b.matrixIndex); return diff < 0; } struct CullSequence { GLuint offset; GLint endoffset; int first; int num; }; struct CullShade { GLuint numTokens; std::vector sequnces; // static buffers ScanSystem::Buffer tokenOrig; // for each command, #cmds rounded to multiple of 4 ScanSystem::Buffer tokenSizes; // in integers ScanSystem::Buffer tokenObjects; // -1 if no drawcall, otherwise object ScanSystem::Buffer tokenOffsets; // offsets for each command ScanSystem::Buffer tokenOutSizes; ScanSystem::Buffer tokenOutScan; ScanSystem::Buffer tokenOutScanOffset; }; class CullJobToken : public CullingSystem::Job { public: void resultFromBits( const CullingSystem::Buffer& bufferVisBitsCurrent ); GLuint program_sizes; GLuint program_cmds; // dynamic ScanSystem::Buffer tokenOut; CullShade* NV_RESTRICT cullshade; }; std::vector m_drawItems; CullJobToken m_culljob; CullShade m_cullshades[NUM_SHADES]; GLuint m_maxGrps; void PrepareCullJob(ShadeType shade); template static void handleToken(std::vector &tokenSizes, std::vector &tokenOffsets,std::vector& tokenObjects, T &token, size_t stream, int obj=-1) { tokenSizes.push_back(GLuint(sizeof(T) / sizeof(GLuint) )); tokenOffsets.push_back(GLuint( (stream - sizeof(T))/ sizeof(GLuint) )); tokenObjects.push_back(obj); } void GenerateTokens(std::vector& drawItems, ShadeType shade, const CadScene* NV_RESTRICT scene, const Resources& resources ) { int lastMaterial = -1; int lastGeometry = -1; int lastMatrix = -1; int lastObject = -1; bool lastSolid = true; ShadeCommand& sc = m_shades[shade]; CullShade& cull = m_cullshades[shade]; sc.fbos.clear(); sc.offsets.clear(); sc.sizes.clear(); sc.states.clear(); std::string& tokenStream = m_tokenStreams[shade]; tokenStream.clear(); cull.numTokens = 0; GLuint beginToken = 0; size_t begin = 0; size_t start = begin; std::vector tokenSizes; std::vector tokenOffsets; std::vector tokenObjects; { NVTokenUbo ubo; ubo.cmd.index = UBO_SCENE; ubo.cmd.stage = UBOSTAGE_VERTEX; ubo.setBuffer(resources.sceneUbo, resources.sceneAddr, 0, sizeof(SceneData) ); nvtokenEnqueue(tokenStream, ubo); handleToken(tokenSizes,tokenOffsets,tokenObjects, ubo, tokenStream.size()-start, -1); cull.numTokens++; ubo.cmd.stage = UBOSTAGE_FRAGMENT; nvtokenEnqueue(tokenStream, ubo); handleToken(tokenSizes,tokenOffsets,tokenObjects, ubo, tokenStream.size()-start, -1); cull.numTokens++; #if USE_POLYOFFSETTOKEN NVTokenPolygonOffset offset; offset.cmd.bias = 1; offset.cmd.scale = 1; nvtokenEnqueue(tokenStream, offset); handleToken(tokenSizes,tokenOffsets,tokenObjects, offset, tokenStream.size()-start, -1); cull.numTokens++; #endif } for (int i = 0; i < drawItems.size(); i++){ const DrawItem& di = drawItems[i]; if (shade == SHADE_SOLID && !di.solid){ continue; } int bufferObjIndex = -1; #if USE_OBJECTSORT_CULLING bufferObjIndex = di.objectIndex; if (di.objectIndex != lastObject || di.solid != lastSolid){ // whenever an object changes or we switches from solid to edges (happens only once in this sorted scenario) // we have to ensure all buffers are reset as well lastObject = di.objectIndex; lastMaterial = -1; lastGeometry = -1; lastMatrix = -1; } #endif if (shade == SHADE_SOLIDWIRE && di.solid != lastSolid){ sc.offsets.push_back( begin ); sc.sizes. push_back( GLsizei((tokenStream.size()-begin)) ); sc.states. push_back( m_stateObjects[ lastSolid ? STATE_TRISOFFSET : STATE_LINES ] ); sc.fbos. push_back( 0 ); CullSequence cullseq; cullseq.num = cull.numTokens - beginToken; cullseq.first = beginToken; cullseq.offset = GLuint((begin-start)/sizeof(GLuint)); cullseq.endoffset = GLuint((tokenStream.size()-start)/sizeof(GLuint)); cull.sequnces.push_back(cullseq); beginToken = cull.numTokens; begin = tokenStream.size(); } if (lastGeometry != di.geometryIndex){ const CadScene::Geometry &geo = scene->m_geometry[di.geometryIndex]; NVTokenVbo vbo; vbo.cmd.index = 0; vbo.setBuffer(geo.vboGL, geo.vboADDR, 0); nvtokenEnqueue(tokenStream, vbo); handleToken(tokenSizes,tokenOffsets,tokenObjects, vbo, tokenStream.size()-start, bufferObjIndex); cull.numTokens++; NVTokenIbo ibo; ibo.setBuffer(geo.iboGL, geo.iboADDR); ibo.cmd.typeSizeInByte = 4; nvtokenEnqueue(tokenStream, ibo); handleToken(tokenSizes,tokenOffsets,tokenObjects, vbo, tokenStream.size()-start, bufferObjIndex); cull.numTokens++; lastGeometry = di.geometryIndex; } if (lastMatrix != di.matrixIndex){ NVTokenUbo ubo; ubo.cmd.index = UBO_MATRIX; ubo.cmd.stage = UBOSTAGE_VERTEX; ubo.setBuffer(scene->m_matricesGL, scene->m_matricesADDR, sizeof(CadScene::MatrixNode) * di.matrixIndex, sizeof(CadScene::MatrixNode) ); nvtokenEnqueue(tokenStream, ubo); handleToken(tokenSizes,tokenOffsets,tokenObjects, ubo, tokenStream.size()-start, bufferObjIndex); cull.numTokens++; lastMatrix = di.matrixIndex; } if (lastMaterial != di.materialIndex){ NVTokenUbo ubo; ubo.cmd.index = UBO_MATERIAL; ubo.cmd.stage = UBOSTAGE_FRAGMENT; ubo.setBuffer(scene->m_materialsGL, scene->m_materialsADDR, sizeof(CadScene::Material) * di.materialIndex, sizeof(CadScene::Material) ); nvtokenEnqueue(tokenStream, ubo); handleToken(tokenSizes,tokenOffsets,tokenObjects, ubo, tokenStream.size()-start, bufferObjIndex); cull.numTokens++; lastMaterial = di.materialIndex; } NVTokenDrawElemsUsed drawelems; drawelems.setMode(di.solid ? GL_TRIANGLES : GL_LINES); drawelems.cmd.count = di.range.count; drawelems.cmd.firstIndex = GLuint((di.range.offset )/sizeof(GLuint)); nvtokenEnqueue(tokenStream, drawelems); handleToken(tokenSizes,tokenOffsets,tokenObjects, drawelems, tokenStream.size()-start, di.objectIndex); cull.numTokens++; lastSolid = di.solid; } sc.offsets.push_back( begin ); sc.sizes. push_back( GLsizei((tokenStream.size()-begin)) ); if (shade == SHADE_SOLID){ sc.states. push_back( m_stateObjects[ STATE_TRIS ] ); } else{ sc.states. push_back( m_stateObjects[ lastSolid ? STATE_TRISOFFSET : STATE_LINES ] ); } sc.fbos. push_back( 0 ); CullSequence cullseq; cullseq.num = cull.numTokens - beginToken; cullseq.first = beginToken; cullseq.offset = GLuint((begin-start)/sizeof(GLuint)); cullseq.endoffset = GLuint((tokenStream.size()-start)/sizeof(GLuint)); cull.sequnces.push_back(cullseq); // create buffers for culling cull.tokenOrig.create(tokenStream.size() - start,&tokenStream[start], 0); cull.tokenOffsets.create(sizeof(GLuint)*cull.numTokens,&tokenOffsets[0], 0); cull.tokenSizes. create(sizeof(GLuint)*cull.numTokens,&tokenSizes[0], 0); cull.tokenObjects.create(sizeof(GLint)*cull.numTokens,&tokenObjects[0], 0); int round4 = ((cull.numTokens+3)/4)*4; cull.tokenOutScan. create(sizeof(GLuint)*round4,NULL, 0); cull.tokenOutScanOffset.create(std::max(ScanSystem::getOffsetSize(round4), size_t(16)),NULL, 0); cull.tokenOutSizes. create(sizeof(GLuint)*round4,NULL, 0); } }; // not yet fully implemented static RendererCullSortToken::Type s_cullsorttoken; static RendererCullSortToken::TypeEmu s_cullsorttoken_emu; void RendererCullSortToken::init(const CadScene* NV_RESTRICT scene, const Resources& resources) { TokenRendererBase::init(s_bindless_ubo, !!has_GL_NV_vertex_buffer_unified_memory); resources.usingUboProgram(true); m_scene = scene; glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT,0,(GLint*)&m_maxGrps); std::vector drawItems; fillDrawItems(drawItems,0,scene->m_objects.size(), true, true); std::sort(drawItems.begin(),drawItems.end(),DrawItem_compare_groups); GenerateTokens(drawItems, SHADE_SOLID, scene, resources); TokenRendererBase::printStats(SHADE_SOLID); GenerateTokens(drawItems, SHADE_SOLIDWIRE, scene, resources); TokenRendererBase::printStats(SHADE_SOLIDWIRE); TokenRendererBase::finalize(resources); if (m_emulate){ for (int i = 0; i < NUM_SHADES; i++){ glNamedBufferStorage(m_tokenBuffers[i], m_tokenStreams[i].size(), &m_tokenStreams[i][0], GL_MAP_READ_BIT); } } m_culljob.m_numObjects = int(m_scene->m_objects.size()); int roundedBits = (m_culljob.m_numObjects+31)/32; int roundedInts = roundedBits*32; m_culljob.m_bufferBboxes = CullingSystem::Buffer(m_scene->m_geometryBboxesGL, sizeof(CadScene::BBox) * m_scene->m_geometryBboxes.size()); m_culljob.m_bufferMatrices = CullingSystem::Buffer(m_scene->m_matricesGL, sizeof(CadScene::MatrixNode) * m_scene->m_matrices.size()); m_culljob.m_bufferObjectMatrix = CullingSystem::Buffer(m_scene->m_objectAssignsGL, sizeof(GLint)*2* m_scene->m_objectAssigns.size()); m_culljob.m_bufferObjectMatrix.stride = sizeof(GLint)*2; m_culljob.m_bufferObjectBbox = m_culljob.m_bufferObjectMatrix; m_culljob.m_bufferObjectBbox.offset = sizeof(GLint); m_culljob.m_bufferObjectBbox.size -= sizeof(GLint); m_culljob.m_bufferObjectBbox.stride = sizeof(GLint)*2; m_culljob.m_bufferVisBitsCurrent.create(sizeof(int)*roundedBits,NULL,0); GLuint full = ~0; glClearNamedBufferData(m_culljob.m_bufferVisBitsCurrent.buffer,GL_R32UI,GL_RED_INTEGER,GL_UNSIGNED_INT,&full); m_culljob.m_bufferVisBitsLast.create(sizeof(int)*roundedBits,NULL,0); glClearNamedBufferData(m_culljob.m_bufferVisBitsLast.buffer,GL_R32UI,GL_RED_INTEGER,GL_UNSIGNED_INT,0); m_culljob.m_bufferVisOutput.create(sizeof(int)*roundedInts,NULL,0); m_cullshades[SHADE_SOLIDWIRE_SPLIT] = m_cullshades[SHADE_SOLIDWIRE]; } void RendererCullSortToken::deinit() { for (int i = 0; i < 2; i++){ CullShade &cs = m_cullshades[i]; glDeleteBuffers(1,&cs.tokenOrig.buffer); glDeleteBuffers(1,&cs.tokenOffsets.buffer); glDeleteBuffers(1,&cs.tokenSizes.buffer); glDeleteBuffers(1,&cs.tokenObjects.buffer); glDeleteBuffers(1,&cs.tokenOutScan.buffer); glDeleteBuffers(1,&cs.tokenOutScanOffset.buffer); glDeleteBuffers(1,&cs.tokenOutSizes.buffer); } glDeleteBuffers(1,&m_culljob.m_bufferVisBitsCurrent.buffer); glDeleteBuffers(1,&m_culljob.m_bufferVisBitsLast.buffer); glDeleteBuffers(1,&m_culljob.m_bufferVisOutput.buffer); TokenRendererBase::deinit(); m_drawItems.clear(); } void RendererCullSortToken::PrepareCullJob(ShadeType shade) { ShadeCommand& sc = m_shades[shade]; RendererCullSortToken::CullJobToken& job = m_culljob; job.cullshade = &m_cullshades[shade]; // setup buffer offsets job.tokenOut.buffer = m_tokenBuffers[shade]; job.tokenOut.offset = sc.offsets[0]; job.tokenOut.size = m_cullshades[shade].tokenOrig.size; } void RendererCullSortToken::CullJobToken::resultFromBits( const CullingSystem::Buffer& bufferVisBitsCurrent ) { // first compute sizes based on culling result glUseProgram(program_sizes); glBindBuffer(GL_ARRAY_BUFFER, cullshade->tokenSizes.buffer); glVertexAttribIPointer(0,1,GL_UNSIGNED_INT,0,(const void*)cullshade->tokenSizes.offset); glBindBuffer(GL_ARRAY_BUFFER, cullshade->tokenObjects.buffer); glVertexAttribIPointer(1,1,GL_INT,0,(const void*)cullshade->tokenObjects.offset); glEnableVertexAttribArray(0); glEnableVertexAttribArray(1); cullshade->tokenOutSizes.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0); bufferVisBitsCurrent.BindBufferRange(GL_SHADER_STORAGE_BUFFER,1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT); GLuint numTokens = cullshade->numTokens; glEnable(GL_RASTERIZER_DISCARD); glDrawArrays(GL_POINTS,0, numTokens); glDisableVertexAttribArray(0); glDisableVertexAttribArray(1); Renderer::s_scansys.scanData(((numTokens+3)/4)*4,cullshade->tokenOutSizes,cullshade->tokenOutScan,cullshade->tokenOutScanOffset); glUseProgram(program_cmds); glUniform1ui(glGetUniformLocation(program_cmds,"terminateCmd"),s_nvcmdlist_header[GL_TERMINATE_SEQUENCE_COMMAND_NV]); glBindBuffer(GL_ARRAY_BUFFER, cullshade->tokenOffsets.buffer); glVertexAttribIPointer(0,1,GL_UNSIGNED_INT,0,(const void*)cullshade->tokenOffsets.offset); glBindBuffer(GL_ARRAY_BUFFER, cullshade->tokenOutSizes.buffer); glVertexAttribIPointer(1,1,GL_UNSIGNED_INT,0,(const void*)cullshade->tokenOutSizes.offset); glBindBuffer(GL_ARRAY_BUFFER, cullshade->tokenOutScan.buffer); glVertexAttribIPointer(2,1,GL_UNSIGNED_INT,0,(const void*)cullshade->tokenOutScan.offset); glEnableVertexAttribArray(0); glEnableVertexAttribArray(1); glEnableVertexAttribArray(2); tokenOut.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0); cullshade->tokenOrig.BindBufferRange(GL_SHADER_STORAGE_BUFFER,1); cullshade->tokenOutSizes.BindBufferRange(GL_SHADER_STORAGE_BUFFER,2); cullshade->tokenOutScan.BindBufferRange(GL_SHADER_STORAGE_BUFFER,3); cullshade->tokenOutScanOffset.BindBufferRange(GL_SHADER_STORAGE_BUFFER,4); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT); for (GLuint i = 0; i < cullshade->sequnces.size() ; i++){ glUniform1ui(glGetUniformLocation(program_cmds,"startOffset"),cullshade->sequnces[i].offset); glUniform1i (glGetUniformLocation(program_cmds,"startID"),cullshade->sequnces[i].first); glUniform1ui(glGetUniformLocation(program_cmds,"endOffset"),cullshade->sequnces[i].endoffset); glUniform1i (glGetUniformLocation(program_cmds,"endID"),cullshade->sequnces[i].first + cullshade->sequnces[i].num - 1); glDrawArrays(GL_POINTS,cullshade->sequnces[i].first,cullshade->sequnces[i].num); } glDisableVertexAttribArray(0); glDisableVertexAttribArray(1); glDisableVertexAttribArray(2); glBindBuffer(GL_ARRAY_BUFFER,0); for (GLuint i = 0; i < 5; i++){ glBindBufferBase(GL_SHADER_STORAGE_BUFFER,i,0); } glDisable(GL_RASTERIZER_DISCARD); } void RendererCullSortToken::drawScene(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager, const char*what) { const CadScene* NV_RESTRICT scene = m_scene; nvh::Profiler::Section section(profiler,what); // do state setup (primarily for sake of state capturing) m_scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL); if (m_bindlessVboUbo){ glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); glEnableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV); } else{ glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE,resources.sceneUbo); } captureState(resources); if (!USE_POLYOFFSETTOKEN && (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT)){ glPolygonOffset(1,1); } if (m_hwsupport){ if (m_uselist){ glCallCommandListNV(m_commandLists[shadetype]); } else{ ShadeCommand & shade = m_shades[shadetype]; glDrawCommandsStatesNV(m_tokenBuffers[shadetype], &shade.offsets[0], &shade.sizes[0], &shade.states[0], &shade.fbos[0], int(shade.states.size()) ); } } else{ ShadeCommand & shade = m_shades[shadetype]; std::string& stream = m_tokenStreams[shadetype]; renderShadeCommandSW(&stream[0], stream.size(), shade); } glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, 0); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATRIX, 0); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glBindVertexBuffer(0,0,0,0); glDisable(GL_POLYGON_OFFSET_FILL); glPolygonOffset(0,0); if (m_bindlessVboUbo){ glDisableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glDisableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); glDisableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV); } scene->disableVertexFormat(VERTEX_POS,VERTEX_NORMAL); } #define CULL_TEMPORAL_NOFRUSTUM 1 void RendererCullSortToken::draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager) { // broken in other types atm //shadetype = SHADE_SOLID; m_culljob.program_cmds = progManager.get( Shared::get().token_cmds ); m_culljob.program_sizes = progManager.get( Shared::get().token_sizes ); PrepareCullJob(shadetype); CullingSystem& cullSys = Renderer::s_cullsys; #if !USE_TEMPORALRASTER { nvh::Profiler::Section section(profiler,"CullF"); cullSys.buildOutput( CullingSystem::METHOD_FRUSTUM, m_culljob, resources.cullView ); cullSys.bitsFromOutput( m_culljob, CullingSystem::BITS_CURRENT ); { nvh::Profiler::Section section(profiler,"ResF"); cullSys.resultFromBits( m_culljob ); } if (m_emulate){ nvh::Profiler::Section read(profiler,"Read"); m_culljob.tokenOut.GetNamedBufferSubData(&m_tokenStream[m_culljob.tokenOut.offset]); GLuint* first = (GLuint*)&m_tokenStream[m_culljob.tokenOut.offset]; first[0] = first[0]; } else { glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_culljob.tokenOut.buffer); glMemoryBarrier(GL_COMMAND_BARRIER_BIT); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); //glFinish(); } } drawScene(shadetype,resources,profiler,progManager, "Last"); #else { nvh::Profiler::Section section(profiler,"CullF"); #if CULL_TEMPORAL_NOFRUSTUM { nvh::Profiler::Section section(profiler,"ResF"); cullSys.resultFromBits( m_culljob ); } cullSys.swapBits( m_culljob ); // last/output #else cullSys.buildOutput( CullingSystem::METHOD_FRUSTUM, m_culljob, resources.cullView ); cullSys.bitsFromOutput( m_culljob, CullingSystem::BITS_CURRENT_AND_LAST ); { nvh::Profiler::Section section(profiler,"ResF"); cullSys.resultFromBits( m_culljob ); } #endif if (m_emulate){ nvh::Profiler::Section read(profiler,"Read"); void* data = &m_tokenStreams[shadetype][m_culljob.tokenOut.offset]; m_culljob.tokenOut.GetNamedBufferSubData(data); } else { glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_culljob.tokenOut.buffer); glMemoryBarrier(GL_COMMAND_BARRIER_BIT); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); //glFinish(); } } drawScene(shadetype,resources,profiler,progManager, "Last"); { nvh::Profiler::Section section(profiler,"CullR"); cullSys.buildOutput( CullingSystem::METHOD_RASTER, m_culljob, resources.cullView ); cullSys.bitsFromOutput( m_culljob, CullingSystem::BITS_CURRENT_AND_NOT_LAST ); { nvh::Profiler::Section section(profiler,"ResR"); cullSys.resultFromBits( m_culljob ); } // for next frame cullSys.bitsFromOutput( m_culljob, CullingSystem::BITS_CURRENT ); #if !CULL_TEMPORAL_NOFRUSTUM cullSys.swapBits( m_culljob ); // last/output #endif if (m_emulate){ nvh::Profiler::Section read(profiler,"Read"); void* data = &m_tokenStreams[shadetype][m_culljob.tokenOut.offset]; m_culljob.tokenOut.GetNamedBufferSubData(data); } else { glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_culljob.tokenOut.buffer); glMemoryBarrier(GL_COMMAND_BARRIER_BIT); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); //glFinish(); } } drawScene(shadetype,resources,profiler,progManager, "New"); #endif } } ================================================ FILE: renderertokenstream.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include "tokenbase.hpp" #include "common.h" namespace csfviewer { ////////////////////////////////////////////////////////////////////////// class RendererTokenStream: public Renderer, public TokenRendererBase { public: class Type : public Renderer::Type { bool isAvailable() const { return TokenRendererBase::hasNativeCommandList(); } const char* name() const { return "tokenstream"; } Renderer* create() const { RendererTokenStream* renderer = new RendererTokenStream(); return renderer; } unsigned int priority() const { return 10; } }; class TypeEmu : public Renderer::Type { bool isAvailable() const { return !!has_GL_NV_vertex_buffer_unified_memory; } const char* name() const { return "tokenstream_emulated"; } Renderer* create() const { RendererTokenStream* renderer = new RendererTokenStream(); renderer->m_emulate = true; return renderer; } unsigned int priority() const { return 10; } }; public: void init(const CadScene* NV_RESTRICT scene, const Resources& resources); void deinit(); void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager); private: static const size_t bufferSize = 1024*16; std::vector m_drawItems; size_t GenerateTokens(NVPointerStream& tokenStream, std::vector& drawItems, size_t from, ShadeType shade, const CadScene* NV_RESTRICT scene, const Resources& resources ) { int lastMaterial = -1; int lastGeometry = -1; int lastMatrix = -1; bool lastSolid = true; ShadeCommand& sc = m_shades[shade]; sc.fbos.clear(); sc.offsets.clear(); sc.sizes.clear(); sc.states.clear(); size_t begin = 0; { NVTokenUbo ubo; ubo.cmd.index = UBO_SCENE; ubo.cmd.stage = UBOSTAGE_VERTEX; ubo.setBuffer(resources.sceneUbo, resources.sceneAddr, 0, sizeof(SceneData)); nvtokenEnqueue(tokenStream, ubo); ubo.cmd.stage = UBOSTAGE_FRAGMENT; nvtokenEnqueue(tokenStream, ubo); #if USE_POLYOFFSETTOKEN NVTokenPolygonOffset offset; offset.cmd.bias = 1; offset.cmd.scale = 1; nvtokenEnqueue(tokenStream, offset); #endif } size_t i = from; for (; i < drawItems.size(); i++){ const DrawItem& di = drawItems[i]; if (tokenStream.size() + sizeof(NVTokenIbo) + sizeof(NVTokenVbo) + sizeof(NVTokenUbo)*2 + sizeof(NVTokenDrawElemsUsed) > tokenStream.capacity()){ break; } if (shade == SHADE_SOLID && !di.solid){ continue; } if ((shade == SHADE_SOLIDWIRE || shade == SHADE_SOLIDWIRE_SPLIT) && di.solid != lastSolid){ sc.offsets.push_back( begin ); sc.sizes. push_back( GLsizei((tokenStream.size()-begin)) ); sc.states. push_back( m_stateObjects[ lastSolid ? STATE_TRISOFFSET : STATE_LINES ] ); if ( shade == SHADE_SOLIDWIRE_SPLIT ){ sc.fbos. push_back( USE_STATEFBO_SPLIT ? 0 : ( di.solid ? resources.fbo : resources.fbo2 ) ); } else{ sc.fbos.push_back(0); } begin = tokenStream.size(); } if (lastGeometry != di.geometryIndex){ const CadScene::Geometry &geo = scene->m_geometry[di.geometryIndex]; NVTokenVbo vbo; vbo.cmd.index = 0; vbo.setBuffer(geo.vboGL, geo.vboADDR, 0); nvtokenEnqueue(tokenStream, vbo); NVTokenIbo ibo; ibo.setBuffer(geo.iboGL, geo.iboADDR); ibo.cmd.typeSizeInByte = 4; nvtokenEnqueue(tokenStream, ibo); lastGeometry = di.geometryIndex; } if (lastMatrix != di.matrixIndex){ NVTokenUbo ubo; ubo.cmd.index = UBO_MATRIX; ubo.cmd.stage = UBOSTAGE_VERTEX; ubo.setBuffer(scene->m_matricesGL, scene->m_matricesADDR, sizeof(CadScene::MatrixNode) * di.matrixIndex, sizeof(CadScene::MatrixNode)); nvtokenEnqueue(tokenStream, ubo); lastMatrix = di.matrixIndex; } if (lastMaterial != di.materialIndex){ NVTokenUbo ubo; ubo.cmd.index = UBO_MATERIAL; ubo.cmd.stage = UBOSTAGE_FRAGMENT; ubo.setBuffer(scene->m_materialsGL, scene->m_materialsADDR, sizeof(CadScene::Material) * di.materialIndex, sizeof(CadScene::Material)); nvtokenEnqueue(tokenStream, ubo); lastMaterial = di.materialIndex; } NVTokenDrawElemsUsed drawelems; drawelems.setMode(di.solid ? GL_TRIANGLES : GL_LINES); drawelems.cmd.count = di.range.count; drawelems.cmd.firstIndex = GLuint((di.range.offset )/sizeof(GLuint)); nvtokenEnqueue(tokenStream, drawelems); lastSolid = di.solid; } sc.offsets.push_back( begin ); sc.sizes. push_back( GLsizei((tokenStream.size()-begin)) ); if (shade == SHADE_SOLID){ sc.states. push_back( m_stateObjects[ STATE_TRIS ] ); } else{ sc.states. push_back( m_stateObjects[ lastSolid ? STATE_TRISOFFSET : STATE_LINES ] ); } if ( shade == SHADE_SOLIDWIRE_SPLIT ){ sc.fbos. push_back( USE_STATEFBO_SPLIT ? 0 : ( lastSolid ? resources.fbo : resources.fbo2 ) ); } else{ sc.fbos.push_back(0); } return i; } }; static RendererTokenStream::Type s_sorttoken; static RendererTokenStream::TypeEmu s_sorttoken_emu; void RendererTokenStream::init(const CadScene* NV_RESTRICT scene, const Resources& resources) { TokenRendererBase::init(s_bindless_ubo, !!has_GL_NV_vertex_buffer_unified_memory); resources.usingUboProgram(true); m_scene = scene; fillDrawItems(m_drawItems,0,scene->m_objects.size(), true, true); TokenRendererBase::finalize(resources,false); for (int i = 0; i < NUM_SHADES; i++){ m_tokenStreams[i].resize(bufferSize); glNamedBufferData(m_tokenBuffers[i], bufferSize, 0, GL_DYNAMIC_DRAW); } } void RendererTokenStream::deinit() { TokenRendererBase::deinit(); m_drawItems.clear(); } void RendererTokenStream::draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager) { const CadScene* NV_RESTRICT scene = m_scene; // do state setup (primarily for sake of state capturing) scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL); if (m_bindlessVboUbo){ glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); glEnableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV); } else{ glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE,resources.sceneUbo); } captureState(resources); if (!USE_POLYOFFSETTOKEN && (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT)){ glPolygonOffset(1,1); } bool useSub = true; bool usePersistent = false; size_t begin = 0; while (begin < m_drawItems.size()) { NVPointerStream stream; GLuint buffer; void* bufferPtr = NULL; if (m_hwsupport && !useSub){ if (usePersistent){ // not ideal, best would be finding max frame usage and then keep * 4 the size to account for driver/gpu // race glCreateBuffers(1,&buffer); glNamedBufferStorage(buffer, bufferSize, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_CLIENT_STORAGE_BIT); bufferPtr = glMapNamedBufferRange(buffer, 0, bufferSize, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); } else{ buffer = m_tokenBuffers[shadetype]; bufferPtr = glMapNamedBufferRange(buffer, 0, bufferSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); } } else{ bufferPtr = &m_tokenStreams[shadetype][0]; } stream.init(bufferPtr,bufferSize); { nvh::Profiler::Section _tempTimer(profiler ,"Token"); begin = GenerateTokens(stream, m_drawItems, begin, shadetype, scene, resources); } if (useSub){ buffer = m_tokenBuffers[shadetype]; nvh::Profiler::Section _tempTimer(profiler ,"Send"); glInvalidateBufferData(buffer); glNamedBufferSubData(buffer,0,stream.size(), stream.m_begin); } { nvh::Profiler::Section _tempTimer(profiler ,"Draw"); if (m_hwsupport){ ShadeCommand & shade = m_shades[shadetype]; glDrawCommandsStatesNV(buffer, &shade.offsets[0], &shade.sizes[0], &shade.states[0], &shade.fbos[0], int(shade.states.size()) ); } else{ ShadeCommand & shade = m_shades[shadetype]; renderShadeCommandSW(stream.m_begin, stream.size(), shade); } } if (m_hwsupport && !useSub){ if (usePersistent){ glDeleteBuffers(1,&buffer); } else{ glUnmapNamedBuffer(buffer); } } } profiler.accumulationSplit(); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, 0); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATRIX, 0); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glBindVertexBuffer(0,0,0,0); glDisable(GL_POLYGON_OFFSET_FILL); glPolygonOffset(0,0); if (m_bindlessVboUbo){ glDisableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glDisableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); glDisableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV); } scene->disableVertexFormat(VERTEX_POS,VERTEX_NORMAL); } } ================================================ FILE: rendereruborange.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include #include #include "renderer.hpp" #include "common.h" namespace csfviewer { ////////////////////////////////////////////////////////////////////////// class RendererUboRange: public Renderer { public: class Type : public Renderer::Type { bool isAvailable() const { return true; } const char* name() const { return "uborange"; } Renderer* create() const { RendererUboRange* renderer = new RendererUboRange(); return renderer; } unsigned int priority() const { return 0; } }; class TypeEmu : public Renderer::Type { bool isAvailable() const { return !!has_GL_NV_vertex_buffer_unified_memory; } const char* name() const { return "uborange_bindless"; } Renderer* create() const { RendererUboRange* renderer = new RendererUboRange(); renderer->m_vbum = true; return renderer; } unsigned int priority() const { return 0; } }; class TypeSort : public Renderer::Type { bool isAvailable() const { return true; } const char* name() const { return "uborange_sorted"; } Renderer* create() const { RendererUboRange* renderer = new RendererUboRange(); renderer->m_sort = true; return renderer; } unsigned int priority() const { return 1; } }; class TypeSortEmu : public Renderer::Type { bool isAvailable() const { return !!has_GL_NV_vertex_buffer_unified_memory; } const char* name() const { return "uborange_sorted_bindless"; } Renderer* create() const { RendererUboRange* renderer = new RendererUboRange(); renderer->m_vbum = true; renderer->m_sort = true; return renderer; } unsigned int priority() const { return 1; } }; public: void init(const CadScene* NV_RESTRICT scene, const Resources& resources); void deinit(); void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager); RendererUboRange() : m_vbum(false) , m_sort(false) { } bool m_vbum; bool m_sort; private: std::vector m_drawItems; }; static RendererUboRange::Type s_uborange; static RendererUboRange::TypeEmu s_uborange_emu; static RendererUboRange::TypeSort s_sortuborange; static RendererUboRange::TypeSortEmu s_sortuborange_emu; void RendererUboRange::init(const CadScene* NV_RESTRICT scene, const Resources& resources) { m_scene = scene; fillDrawItems(m_drawItems,0,scene->m_objects.size(), true, true); if (m_sort){ std::sort(m_drawItems.begin(),m_drawItems.end(),DrawItem_compare_groups); } } void RendererUboRange::deinit() { m_drawItems.clear(); } void RendererUboRange::draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager) { const CadScene* NV_RESTRICT scene = m_scene; bool vbum = m_vbum; scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL); if (vbum){ glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); if (s_bindless_ubo){ glEnableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV); glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV,UBO_SCENE,resources.sceneAddr,sizeof(SceneData)); } else{ glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE,resources.sceneUbo); } } else{ glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE,resources.sceneUbo); } glUseProgram(resources.programUbo); SetWireMode(GL_FALSE); if (shadetype == SHADE_SOLIDWIRE || shadetype == SHADE_SOLIDWIRE_SPLIT){ glEnable(GL_POLYGON_OFFSET_FILL); glPolygonOffset(1,1); } { int lastMaterial = -1; int lastGeometry = -1; int lastMatrix = -1; bool lastSolid = true; GLenum mode = GL_TRIANGLES; for (int i = 0; i < m_drawItems.size(); i++){ const DrawItem& di = m_drawItems[i]; if (shadetype == SHADE_SOLID && !di.solid){ if (m_sort) break; continue; } if (lastSolid != di.solid){ SetWireMode( di.solid ? GL_FALSE : GL_TRUE ); if (shadetype == SHADE_SOLIDWIRE_SPLIT){ glBindFramebuffer(GL_FRAMEBUFFER, di.solid ? resources.fbo : resources.fbo2); } } if (lastGeometry != di.geometryIndex){ const CadScene::Geometry &geo = scene->m_geometry[di.geometryIndex]; if (vbum){ glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, geo.vboADDR, geo.numVertices * sizeof(CadScene::Vertex)); glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV,0, geo.iboADDR, (geo.numIndexSolid+geo.numIndexWire) * sizeof(GLuint)); } else{ glBindVertexBuffer(0, geo.vboGL, 0, sizeof(CadScene::Vertex)); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, geo.iboGL); } lastGeometry = di.geometryIndex; } if (lastMatrix != di.matrixIndex){ if (vbum && s_bindless_ubo){ glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV,UBO_MATRIX, scene->m_matricesADDR + sizeof(CadScene::MatrixNode) * di.matrixIndex, sizeof(CadScene::MatrixNode)); } else{ glBindBufferRange(GL_UNIFORM_BUFFER,UBO_MATRIX, scene->m_matricesGL, sizeof(CadScene::MatrixNode) * di.matrixIndex, sizeof(CadScene::MatrixNode)); } lastMatrix = di.matrixIndex; } if (lastMaterial != di.materialIndex){ if (m_vbum && s_bindless_ubo){ glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV,UBO_MATERIAL, scene->m_materialsADDR +sizeof(CadScene::Material) * di.materialIndex, sizeof(CadScene::Material)); } else{ glBindBufferRange(GL_UNIFORM_BUFFER,UBO_MATERIAL, scene->m_materialsGL, sizeof(CadScene::Material) * di.materialIndex, sizeof(CadScene::Material)); } lastMaterial = di.materialIndex; } glDrawElements( di.solid ? GL_TRIANGLES : GL_LINES, di.range.count, GL_UNSIGNED_INT, (void*) di.range.offset); lastSolid = di.solid; } } glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, 0); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATRIX, 0); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glBindVertexBuffer(0,0,0,0); glDisable(GL_POLYGON_OFFSET_FILL); glPolygonOffset(0,0); if (vbum){ glDisableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glDisableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); if (s_bindless_ubo){ glDisableClientState(GL_UNIFORM_BUFFER_UNIFIED_NV); } } scene->disableVertexFormat(VERTEX_POS,VERTEX_NORMAL); } } ================================================ FILE: rendererubosub.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include #include #include "renderer.hpp" #include "common.h" namespace csfviewer { ////////////////////////////////////////////////////////////////////////// class RendererUboSub: public Renderer { public: class Type : public Renderer::Type { bool isAvailable() const { return true; } const char* name() const { return "ubosub"; } Renderer* create() const { RendererUboSub* renderer = new RendererUboSub(); return renderer; } unsigned int priority() const { return 2; } }; class TypeVbum : public Renderer::Type { bool isAvailable() const { return !!has_GL_NV_vertex_buffer_unified_memory; } const char* name() const { return "ubosub_bindless"; } Renderer* create() const { RendererUboSub* renderer = new RendererUboSub(); renderer->m_vbum = true; return renderer; } unsigned int priority() const { return 2; } }; class TypeSort : public Renderer::Type { bool isAvailable() const { return true; } const char* name() const { return "ubosub_sorted"; } Renderer* create() const { RendererUboSub* renderer = new RendererUboSub(); renderer->m_sort = true; return renderer; } unsigned int priority() const { return 2; } }; class TypeSortVbum : public Renderer::Type { bool isAvailable() const { return !!has_GL_NV_vertex_buffer_unified_memory; } const char* name() const { return "ubosub_sorted_bindless"; } Renderer* create() const { RendererUboSub* renderer = new RendererUboSub(); renderer->m_vbum = true; renderer->m_sort = true; return renderer; } unsigned int priority() const { return 2; } }; public: void init(const CadScene* NV_RESTRICT scene, const Resources& resources); void deinit(); void draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager); bool m_sort; bool m_vbum; private: std::vector m_drawItems; GLuint m_streamMatrix; GLuint m_streamMaterial; RendererUboSub() : m_vbum(false) , m_sort(false) { } }; static RendererUboSub::Type s_ubosub; static RendererUboSub::TypeVbum s_ubosub_vbum; static RendererUboSub::TypeSort s_ubosub_sort; static RendererUboSub::TypeSortVbum s_ubosub_vbum_sort; void RendererUboSub::init(const CadScene* NV_RESTRICT scene, const Resources& resources) { resources.usingUboProgram(true); m_scene = scene; fillDrawItems(m_drawItems,0,scene->m_objects.size(), true, true); if (m_sort){ std::sort(m_drawItems.begin(),m_drawItems.end(),DrawItem_compare_groups); } m_scene = scene; glCreateBuffers(1,&m_streamMatrix); glCreateBuffers(1,&m_streamMaterial); glNamedBufferData( m_streamMatrix, sizeof(CadScene::MatrixNode), NULL, GL_STREAM_DRAW); glNamedBufferData( m_streamMaterial, sizeof(CadScene::Material), NULL, GL_STREAM_DRAW); } void RendererUboSub::deinit() { glDeleteBuffers(1,&m_streamMatrix); glDeleteBuffers(1,&m_streamMaterial); } void RendererUboSub::draw(ShadeType shadetype, const Resources& resources, nvh::Profiler& profiler, nvgl::ProgramManager &progManager) { const CadScene* NV_RESTRICT scene = m_scene; bool vbum = m_vbum; scene->enableVertexFormat(VERTEX_POS,VERTEX_NORMAL); glUseProgram(resources.programUbo); SetWireMode(GL_FALSE); if (shadetype == SHADE_SOLIDWIRE){ glEnable(GL_POLYGON_OFFSET_FILL); glPolygonOffset(1,1); } if (vbum){ glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); } glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, resources.sceneUbo); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATRIX, m_streamMatrix); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, m_streamMaterial); { int lastMaterial = -1; int lastGeometry = -1; int lastMatrix = -1; bool lastSolid = true; GLenum mode = GL_TRIANGLES; for (int i = 0; i < m_drawItems.size(); i++){ const DrawItem& di = m_drawItems[i]; if (shadetype == SHADE_SOLID && !di.solid){ if (m_sort) break; continue; } if (lastSolid != di.solid){ SetWireMode( di.solid ? GL_FALSE : GL_TRUE ); if (shadetype == SHADE_SOLIDWIRE_SPLIT){ glBindFramebuffer(GL_FRAMEBUFFER, di.solid ? resources.fbo : resources.fbo2); } } if (lastGeometry != di.geometryIndex){ const CadScene::Geometry &geo = scene->m_geometry[di.geometryIndex]; if (vbum){ glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, geo.vboADDR, geo.numVertices * sizeof(CadScene::Vertex)); glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV,0, geo.iboADDR, (geo.numIndexSolid+geo.numIndexWire) * sizeof(GLuint)); } else{ glBindVertexBuffer(0, geo.vboGL, 0, sizeof(CadScene::Vertex)); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, geo.iboGL); } lastGeometry = di.geometryIndex; } if (lastMatrix != di.matrixIndex){ glNamedBufferSubData(m_streamMatrix, 0, sizeof(CadScene::MatrixNode), &scene->m_matrices[di.matrixIndex]); lastMatrix = di.matrixIndex; } if (lastMaterial != di.materialIndex){ glNamedBufferSubData(m_streamMaterial, 0, sizeof(CadScene::Material), &scene->m_materials[di.materialIndex]); lastMaterial = di.materialIndex; } glDrawElements( di.solid ? GL_TRIANGLES : GL_LINES, di.range.count, GL_UNSIGNED_INT, (void*) di.range.offset); lastSolid = di.solid; } } glBindBufferBase(GL_UNIFORM_BUFFER,UBO_SCENE, 0); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATRIX, 0); glBindBufferBase(GL_UNIFORM_BUFFER,UBO_MATERIAL, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glBindVertexBuffer(0,0,0,0); if (m_vbum){ glDisableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glDisableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); } if (shadetype == SHADE_SOLIDWIRE){ glDisable(GL_POLYGON_OFFSET_FILL); glPolygonOffset(0,0); } scene->disableVertexFormat(VERTEX_POS,VERTEX_NORMAL); } } ================================================ FILE: scan.comp.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 430 /**/ #define TASK_SUM 0 #define TASK_OFFSETS 1 #define TASK_COMBINE 2 #ifndef TASK #define TASK TASK_SUM #endif #define THREADBLOCK_SIZE 512 #define BATCH_SIZE (THREADBLOCK_SIZE*4) uniform uint numElements; /////////////////////////////////////////////////////// // based on CUDA Sample "scan.cu" layout (local_size_x = THREADBLOCK_SIZE) in; #if TASK != TASK_COMBINE uint threadIdx = gl_LocalInvocationID.x; #extension GL_NV_shader_thread_group : enable #extension GL_NV_shader_thread_shuffle : enable #if GL_NV_shader_thread_group #define USESHUFFLE #define LOG2_WARP_SIZE 5U #define WARP_SIZE (1U << LOG2_WARP_SIZE) // Almost the same as naive scan1Inclusive but doesn't need barriers // nor shared memory // and works only for size <= WARP_SIZE #if GL_NV_shader_thread_shuffle shared uint s_Data[(THREADBLOCK_SIZE / WARP_SIZE)]; uint warpScanInclusive(uint idata, uint size){ uint sum = idata; for (int STEP = 0; STEP < 5 && (1<<(STEP+1)) <= size; STEP++){ bool valid = false; uint temp = shuffleUpNV(sum, 1 << STEP, 32, valid); if (valid) { sum += temp; } } return sum; } #else shared uint s_Data[THREADBLOCK_SIZE * 2]; // Almost the same as naive scan1Inclusive but doesn't need barriers // and works only for size <= WARP_SIZE uint warpScanInclusive(uint idata, uint size){ uint pos = 2 * threadIdx.x - (threadIdx.x & (size - 1)); s_Data[pos] = 0; pos += size; s_Data[pos] = idata; if(size >= 2) s_Data[pos] += s_Data[pos - 1]; if(size >= 4) s_Data[pos] += s_Data[pos - 2]; if(size >= 8) s_Data[pos] += s_Data[pos - 4]; if(size >= 16) s_Data[pos] += s_Data[pos - 8]; if(size >= 32) s_Data[pos] += s_Data[pos - 16]; return s_Data[pos]; } #endif uint warpScanExclusive(uint idata, uint size){ return warpScanInclusive(idata, size) - idata; } uint scan1Inclusive(uint idata, uint size){ if(size > WARP_SIZE){ //Bottom-level inclusive warp scan uint warpResult = warpScanInclusive(idata, WARP_SIZE); //Save top elements of each warp for exclusive warp scan #if !GL_NV_shader_thread_shuffle //sync to wait for warp scans to complete (because l_Data is being overwritten) memoryBarrierShared(); barrier(); #endif if( (threadIdx & (WARP_SIZE - 1)) == (WARP_SIZE - 1) ) s_Data[threadIdx >> LOG2_WARP_SIZE] = warpResult; //wait for warp scans to complete memoryBarrierShared(); barrier(); if( threadIdx < (THREADBLOCK_SIZE / WARP_SIZE) ){ //grab top warp elements uint val = s_Data[threadIdx]; //calculate exclsive scan and write back to shared memory s_Data[threadIdx] = warpScanExclusive(val, size >> LOG2_WARP_SIZE); } //return updated warp scans with exclusive scan results memoryBarrierShared(); barrier(); return warpResult + s_Data[threadIdx >> LOG2_WARP_SIZE]; }else{ return warpScanInclusive(idata, size); } } #else shared uint s_Data[THREADBLOCK_SIZE * 2]; uint scan1Inclusive(uint idata, uint size) { uint pos = 2 * threadIdx.x - (threadIdx.x & (size - 1)); s_Data[pos] = 0; pos += size; s_Data[pos] = idata; for (uint offset = 1; offset < size; offset <<= 1) { memoryBarrierShared(); barrier(); uint t = s_Data[pos] + s_Data[pos - offset]; memoryBarrierShared(); barrier(); s_Data[pos] = t; } return s_Data[pos]; } #endif uint scan1Exclusive(uint idata, uint size) { return scan1Inclusive(idata, size) - idata; } uvec4 scan4Inclusive(uvec4 idata4, uint size) { //Level-0 inclusive scan idata4.y += idata4.x; idata4.z += idata4.y; idata4.w += idata4.z; //Level-1 exclusive scan uint oval = scan1Exclusive(idata4.w, size / 4); idata4.x += oval; idata4.y += oval; idata4.z += oval; idata4.w += oval; return idata4; } //Exclusive vector scan: the array to be scanned is stored //in local thread memory scope as uint4 uvec4 scan4Exclusive(uvec4 idata4, uint size) { uvec4 odata4 = scan4Inclusive(idata4, size); odata4.x -= idata4.x; odata4.y -= idata4.y; odata4.z -= idata4.z; odata4.w -= idata4.w; return odata4; } #endif #if TASK == TASK_SUM layout (std430, binding=1) buffer inputBuffer { uvec4 indata[]; }; layout (std430, binding=0) buffer outputBuffer { uvec4 outdata[]; }; void main() { uint idx = gl_GlobalInvocationID.x; uint maxidx = ((numElements + 3) / 4); bool valid = idx < maxidx; //Load data uvec4 idata4 = valid ? indata[idx] : uvec4(0); // Calculate scan //uvec4 odata4 = scan4Inclusive(idata4, min(BATCH_SIZE, (maxidx-idx)*4)); uvec4 odata4 = scan4Inclusive(idata4, BATCH_SIZE); //Write back if (valid) outdata[idx] = odata4; } #endif #if TASK == TASK_OFFSETS layout (std430, binding=1) buffer inputBuffer { uint indata[]; }; layout (std430, binding=0) buffer outputBuffer { uvec4 outdata[]; }; void main() { uint idx = gl_GlobalInvocationID.x; uint startIdx = (idx * BATCH_SIZE * 4); bool valid = false; //Load data uvec4 idata4 = uvec4(0); for (uint i = 0; i < 4; i++){ uint readIdx = startIdx + (i+1)*BATCH_SIZE - 1u; if ( readIdx < numElements ){ idata4[i] = indata[readIdx]; valid = true; } } //Calculate scan uvec4 odata4 = scan4Inclusive(idata4, BATCH_SIZE); //Write back if (valid) outdata[idx] = odata4; } #endif #if TASK == TASK_COMBINE layout (std430, binding=1) buffer inputBuffer { uint indata[]; }; layout (std430, binding=0) buffer outputBuffer { uint outdata[]; }; void main() { uint idx = gl_GlobalInvocationID.x; bool valid = idx < numElements; uint batch = idx / BATCH_SIZE; if (valid && batch > 0) { outdata[idx] += indata[batch-1]; } } #endif ================================================ FILE: scansystem.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include "scansystem.hpp" #include inline static GLuint snapdiv(GLuint input, GLuint align) { return (input + align - 1) / align; } size_t ScanSystem::getOffsetSize(GLuint elements) { GLuint groups = snapdiv(elements,BATCH_ELEMENTS); if (groups == 1) return 0; GLuint groupcombines = snapdiv(groups,BATCH_ELEMENTS); size_t size = groupcombines*BATCH_ELEMENTS*sizeof(GLuint); if (groupcombines > 1){ // add another layer GLuint combines = snapdiv(groupcombines,BATCH_ELEMENTS); size += combines*BATCH_ELEMENTS*sizeof(GLuint); } return size; } bool ScanSystem::scanData( GLuint elements, const Buffer& input, const Buffer& output, const Buffer& offsets ) { assert( (elements % 4) == 0 ); assert( elements < (GLuint64)BATCH_ELEMENTS*BATCH_ELEMENTS*BATCH_ELEMENTS); assert( elements * sizeof(GLuint) <= size_t(input.size) ); assert( input.size <= output.size ); glUseProgram(programs.prefixsum); glUniform1ui(0,elements); input.BindBufferRange(GL_SHADER_STORAGE_BUFFER,1); output.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); GLuint groups = snapdiv(elements,BATCH_ELEMENTS); assert(groups <= maxGrpsPrefix); glDispatchCompute(groups,1,1); if (groups > 1){ GLuint groupcombines = snapdiv(groups,BATCH_ELEMENTS); assert( groupcombines <= BATCH_ELEMENTS ); assert( getOffsetSize(elements) <= size_t(offsets.size)); glUseProgram(programs.offsets); glUniform1ui(0,elements); output.BindBufferRange(GL_SHADER_STORAGE_BUFFER, 1); offsets.BindBufferRange(GL_SHADER_STORAGE_BUFFER, 0); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); assert(groupcombines <= maxGrpsOffsets); glDispatchCompute(groupcombines,1,1); if (groupcombines > 1){ glUniform1ui(0,groupcombines*BATCH_ELEMENTS); Buffer additionaloffsets = offsets; // derive from offsets GLintptr required = groupcombines*BATCH_ELEMENTS*sizeof(GLuint);; additionaloffsets.offset += required; additionaloffsets.size = offsets.size - required; offsets.BindBufferRange(GL_SHADER_STORAGE_BUFFER,1); additionaloffsets.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glDispatchCompute(1,1,1); combineWithOffsets(groupcombines*BATCH_ELEMENTS, offsets, additionaloffsets); } } glBindBufferBase(GL_SHADER_STORAGE_BUFFER,0,0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,0,1); return groups > 1; } void ScanSystem::combineWithOffsets(GLuint elements, const Buffer& output, const Buffer& offsets ) { //assert((elements % 4) == 0); assert(elements * sizeof(GLuint) <= size_t(output.size)); glUseProgram(programs.combine); glUniform1ui(0,elements); offsets.BindBufferRange(GL_SHADER_STORAGE_BUFFER, 1); output.BindBufferRange(GL_SHADER_STORAGE_BUFFER, 0); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); GLuint groups = snapdiv(elements,GROUPSIZE); assert(groups < maxGrpsCombine); glDispatchCompute(groups,1,1); } void ScanSystem::init( const Programs& progs ) { update(progs); } void ScanSystem::update( const Programs& progs ) { GLuint maxGroups[3]; glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT,0,(GLint*)&maxGroups[0]); //GLuint groupSize[3]; //glGetProgramiv(progs.combine, GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupSize); maxGrpsCombine = maxGroups[0]; //glGetProgramiv(progs.offsets, GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupSize); maxGrpsOffsets = maxGroups[0]; //glGetProgramiv(progs.prefixsum, GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupSize); maxGrpsPrefix = maxGroups[0]; programs = progs; } void ScanSystem::test() { GLuint scanbuffers[3]; glCreateBuffers(3,scanbuffers); GLuint low = ScanSystem::BATCH_ELEMENTS/2; GLuint mid = ScanSystem::BATCH_ELEMENTS*ScanSystem::BATCH_ELEMENTS; GLuint high = ScanSystem::BATCH_ELEMENTS*ScanSystem::BATCH_ELEMENTS*2; size_t offsize = ScanSystem::getOffsetSize(high); GLuint* data = new GLuint[high]; for (GLuint i = 0; i < high; i++){ data[i] = 1; } glNamedBufferStorage(scanbuffers[0], high * sizeof(GLuint), &data[0], 0 ); glNamedBufferStorage(scanbuffers[1], high * sizeof(GLuint),0, GL_MAP_READ_BIT ); glNamedBufferStorage(scanbuffers[2], offsize,0,GL_MAP_READ_BIT); delete [] data; GLuint result; bool needcombine; // low needcombine = scanData(low, scanbuffers[0], scanbuffers[1], scanbuffers[2]); assert(needcombine == false); result = 0; glGetNamedBufferSubData(scanbuffers[1],sizeof(GLuint) * (low-1), sizeof(GLuint), &result); assert(result == low); // med needcombine = scanData(mid, scanbuffers[0], scanbuffers[1], scanbuffers[2]); assert(needcombine == true); result = 0; glGetNamedBufferSubData(scanbuffers[2],sizeof(GLuint) * (ScanSystem::BATCH_ELEMENTS-1), sizeof(GLuint), &result); assert(result == mid); combineWithOffsets(mid, scanbuffers[1], scanbuffers[2]); result = 0; glGetNamedBufferSubData(scanbuffers[1],sizeof(GLuint) * (mid-1), sizeof(GLuint), &result); assert(result == mid); // high needcombine = scanData(high, scanbuffers[0], scanbuffers[1], scanbuffers[2]); assert(needcombine == true); combineWithOffsets(high, scanbuffers[1], scanbuffers[2]); result = 0; glGetNamedBufferSubData(scanbuffers[1],sizeof(GLuint) * (high-1), sizeof(GLuint), &result); assert(result == high); glDeleteBuffers(3,scanbuffers); } ================================================ FILE: scansystem.hpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #ifndef SCANSYSTEM_H__ #define SCANSYSTEM_H__ #include #include class ScanSystem { public: const static size_t GROUPSIZE = 512; const static size_t BATCH_ELEMENTS = GROUPSIZE*4; struct Programs { GLuint prefixsum; GLuint offsets; GLuint combine; }; struct Buffer { GLuint buffer; GLintptr offset; GLsizeiptr size; void create(size_t sizei, const void* data, GLbitfield flags) { size = sizei; offset = 0; glCreateBuffers(1,&buffer); glNamedBufferStorage(buffer, size, data, flags); } Buffer(GLuint buffer) : buffer(buffer) , offset(0) { if (sizeof(GLsizeiptr) > 4) glGetNamedBufferParameteri64v(buffer,GL_BUFFER_SIZE, (GLint64*)&size); else glGetNamedBufferParameteriv(buffer, GL_BUFFER_SIZE, (GLint*)&size); } Buffer() : buffer(0) , offset(0) , size(0) { } inline void BindBufferRange(GLenum target, GLuint index) const { glBindBufferRange(target, index, buffer, offset, size); } inline void BindBufferRange(GLenum target, GLuint index, GLintptr offseta, GLsizeiptr sizea) const { glBindBufferRange(target, index, buffer, offset+offseta, size+sizea); } inline void GetNamedBufferSubData(void* data){ glGetNamedBufferSubData(buffer,offset,size,data); } }; void init(const Programs& progs); void update(const Programs& progs); void test(); // returns true if offsets are needed // the offset value needs to be added using the BATCH_ELEMENTS bool scanData( GLuint elements, const Buffer& input, const Buffer& output, const Buffer& offsets); void combineWithOffsets(GLuint elements, const Buffer& output, const Buffer& offsets); static size_t getOffsetSize(GLuint elements); public: Programs programs; GLuint maxGrpsPrefix; GLuint maxGrpsOffsets; GLuint maxGrpsCombine; }; #endif ================================================ FILE: scene.frag.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 430 /**/ #extension GL_ARB_shading_language_include : enable #include "common.h" // must match cadscene struct Side { vec4 ambient; vec4 diffuse; vec4 specular; vec4 emissive; }; struct Material { Side sides[2]; Side _pad[2]; }; layout(std140,binding=UBO_MATERIAL) uniform materialBuffer { #if USE_INDEXING Material materials[256]; #else Material materials[1]; #endif }; in Interpolants { vec3 wPos; vec3 wNormal; #if USE_INDEXING flat ivec2 assigns; #endif #if !defined(WIREMODE) flat int wireMode; #endif } IN; #if !defined(WIREMODE) int wireMode = IN.wireMode; #else int wireMode = WIREMODE; #endif layout(location=0,index=0) out vec4 out_Color; vec4 shade(const Side side) { vec4 color = side.ambient + side.emissive; vec3 eyePos = vec3(scene.viewMatrixIT[0].w,scene.viewMatrixIT[1].w,scene.viewMatrixIT[2].w); vec3 lightDir = normalize( scene.wLightPos.xyz - IN.wPos); vec3 viewDir = normalize( eyePos - IN.wPos); vec3 halfDir = normalize(lightDir + viewDir); vec3 normal = normalize(IN.wNormal) * (gl_FrontFacing ? 1 : -1); color += side.diffuse * max(dot(normal,lightDir),0); color += side.specular * pow(max(0,dot(normal,halfDir)),16); return color; } void main() { int mi = 0; #if USE_INDEXING mi = IN.assigns.y; #endif out_Color = shade(materials[mi].sides[gl_FrontFacing ? 1 : 0]); if (wireMode != 0){ out_Color = materials[mi].sides[0].diffuse*1.5 + 0.3; } } ================================================ FILE: scene.vert.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 430 /**/ #extension GL_ARB_shading_language_include : enable #include "common.h" #if USE_INDEXING && USE_BASEINSTANCE #extension GL_ARB_shader_draw_parameters : require #endif in layout(location=VERTEX_POS) vec3 pos; in layout(location=VERTEX_NORMAL) vec3 normal; #if USE_INDEXING #if USE_BASEINSTANCE ivec2 assigns = ivec2( gl_BaseInstanceARB & 0xFFFFF, gl_BaseInstanceARB >> 20); #else in layout(location=VERTEX_ASSIGNS) ivec2 assigns; #endif #define matrixIndex assigns.x #endif #if !defined(WIREMODE) in layout(location=VERTEX_WIREMODE) int wireMode; #endif out Interpolants { vec3 wPos; vec3 wNormal; #if USE_INDEXING flat ivec2 assigns; #endif #if !defined(WIREMODE) flat int wireMode; #endif } OUT; void main() { #if USE_INDEXING || USE_MIX vec3 wPos = (getIndexedMatrix(matrixIndex, NODE_MATRIX_WORLD) * vec4(pos,1)).xyz; vec3 wNormal = mat3(getIndexedMatrix(matrixIndex, NODE_MATRIX_WORLDIT)) * normal; #else vec3 wPos = (object.worldMatrix * vec4(pos,1)).xyz; vec3 wNormal = mat3(object.worldMatrixIT) * normal; #endif gl_Position = scene.viewProjMatrix * vec4(wPos,1); OUT.wPos = wPos; OUT.wNormal = wNormal; #if USE_INDEXING OUT.assigns = assigns; #endif #if !defined(WIREMODE) OUT.wireMode = wireMode; #endif } ================================================ FILE: statesystem.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include "statesystem.hpp" #include // memcmp ////////////////////////////////////////////////////////////////////////// void StateSystem::ClipDistanceState::applyGL() const { for (GLuint i = 0; i < MAX_CLIPPLANES; i++){ if (isBitSet(enabled,i)) glEnable (GL_CLIP_DISTANCE0 + i); else glDisable (GL_CLIP_DISTANCE0 + i); } } void StateSystem::ClipDistanceState::getGL() { enabled = 0; for (GLuint i = 0; i < MAX_CLIPPLANES; i++){ setBitState(enabled,i,glIsEnabled(GL_CLIP_DISTANCE0 + i)); } } ////////////////////////////////////////////////////////////////////////// #if STATESYSTEM_USE_DEPRECATED void StateSystem::AlphaStateDepr::applyGL() const { glAlphaFunc(mode,refvalue); } void StateSystem::AlphaStateDepr::getGL() { glGetIntegerv(GL_ALPHA_TEST_FUNC,(GLint*)&mode); glGetFloatv(GL_ALPHA_TEST_REF, &refvalue); } #endif ////////////////////////////////////////////////////////////////////////// void StateSystem::StencilState::applyGL() const { glStencilFuncSeparate(GL_FRONT, funcs[FACE_FRONT].func, funcs[FACE_FRONT].refvalue, funcs[FACE_FRONT].mask); glStencilFuncSeparate(GL_BACK, funcs[FACE_BACK ].func, funcs[FACE_BACK ].refvalue, funcs[FACE_BACK ].mask); glStencilOpSeparate(GL_FRONT, ops[FACE_FRONT].fail, ops[FACE_FRONT].zfail, ops[FACE_FRONT].zpass); glStencilOpSeparate(GL_BACK, ops[FACE_BACK ].fail, ops[FACE_BACK ].zfail, ops[FACE_BACK ].zpass); } void StateSystem::StencilState::getGL() { glGetIntegerv(GL_STENCIL_FUNC, (GLint*)&funcs[FACE_FRONT].func); glGetIntegerv(GL_STENCIL_REF, (GLint*)&funcs[FACE_FRONT].refvalue); glGetIntegerv(GL_STENCIL_VALUE_MASK, (GLint*)&funcs[FACE_FRONT].mask); glGetIntegerv(GL_STENCIL_BACK_FUNC, (GLint*)&funcs[FACE_BACK].func); glGetIntegerv(GL_STENCIL_BACK_REF, (GLint*)&funcs[FACE_BACK].refvalue); glGetIntegerv(GL_STENCIL_BACK_VALUE_MASK, (GLint*)&funcs[FACE_BACK].mask); glGetIntegerv(GL_STENCIL_FAIL, (GLint*)&ops[FACE_FRONT].fail); glGetIntegerv(GL_STENCIL_PASS_DEPTH_FAIL, (GLint*)&ops[FACE_FRONT].zfail); glGetIntegerv(GL_STENCIL_PASS_DEPTH_PASS, (GLint*)&ops[FACE_FRONT].zpass); glGetIntegerv(GL_STENCIL_BACK_FAIL, (GLint*)&ops[FACE_BACK].fail); glGetIntegerv(GL_STENCIL_BACK_PASS_DEPTH_FAIL, (GLint*)&ops[FACE_BACK].zfail); glGetIntegerv(GL_STENCIL_BACK_PASS_DEPTH_PASS, (GLint*)&ops[FACE_BACK].zpass); } ////////////////////////////////////////////////////////////////////////// void StateSystem::BlendState::applyGL() const { if (separateEnable){ for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){ if (isBitSet(separateEnable,i)) glEnablei(GL_BLEND,i); else glDisablei(GL_BLEND,i); } } if (useSeparate){ for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){ glBlendFuncSeparatei(i,blends[i].rgb.srcw,blends[i].rgb.dstw,blends[i].alpha.srcw,blends[i].alpha.dstw); glBlendEquationSeparatei(i,blends[i].rgb.equ,blends[i].alpha.equ); } } else{ glBlendFuncSeparate(blends[0].rgb.srcw,blends[0].rgb.dstw,blends[0].alpha.srcw,blends[0].alpha.dstw); glBlendEquationSeparate(blends[0].rgb.equ,blends[0].alpha.equ); } //glBlendColor(color[0],color[1],color[2],color[3]); } void StateSystem::BlendState::getGL() { GLuint stateSet = 0; separateEnable = 0; for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){ if (setBitState(separateEnable,i, glIsEnabledi( GL_BLEND, i))) stateSet++; } if (stateSet == MAX_DRAWBUFFERS){ separateEnable = 0; } GLuint numEqual = 1; for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){ glGetIntegeri_v(GL_BLEND_SRC_RGB,i,(GLint*)&blends[i].rgb.srcw); glGetIntegeri_v(GL_BLEND_DST_RGB,i,(GLint*)&blends[i].rgb.dstw); glGetIntegeri_v(GL_BLEND_EQUATION_RGB,i,(GLint*)&blends[i].rgb.equ); glGetIntegeri_v(GL_BLEND_SRC_ALPHA,i,(GLint*)&blends[i].alpha.srcw); glGetIntegeri_v(GL_BLEND_DST_ALPHA,i,(GLint*)&blends[i].alpha.dstw); glGetIntegeri_v(GL_BLEND_EQUATION_ALPHA,i,(GLint*)&blends[i].alpha.equ); if (i > 1 && memcmp(&blends[i].rgb,&blends[i-1].rgb,sizeof(blends[i].rgb))==0 && memcmp(&blends[i].alpha,&blends[i-1].alpha,sizeof(blends[i].alpha))==0){ numEqual++; } } useSeparate = numEqual != MAX_DRAWBUFFERS; //glGetFloatv(GL_BLEND_COLOR,color); } ////////////////////////////////////////////////////////////////////////// void StateSystem::DepthState::applyGL() const { glDepthFunc(func); } void StateSystem::DepthState::getGL() { glGetIntegerv(GL_DEPTH_FUNC,(GLint*)&func); } ////////////////////////////////////////////////////////////////////////// void StateSystem::LogicState::applyGL() const { glLogicOp(op); } void StateSystem::LogicState::getGL() { glGetIntegerv(GL_LOGIC_OP_MODE,(GLint*)&op); } ////////////////////////////////////////////////////////////////////////// void StateSystem::RasterState::applyGL() const { //glFrontFace(frontFace); glCullFace(cullFace); //glPolygonOffset(polyOffsetFactor,polyOffsetUnits); glPolygonMode(GL_FRONT_AND_BACK,polyMode); //glLineWidth(lineWidth); glPointSize(pointSize); glPointParameterf(GL_POINT_FADE_THRESHOLD_SIZE,pointFade); glPointParameteri(GL_POINT_SPRITE_COORD_ORIGIN,pointSpriteOrigin); } void StateSystem::RasterState::getGL() { //glGetIntegerv(GL_FRONT_FACE, (GLint*)&frontFace); glGetIntegerv(GL_CULL_FACE_MODE, (GLint*)&cullFace); //glGetFloatv(GL_POLYGON_OFFSET_FACTOR,&polyOffsetFactor); //glGetFloatv(GL_POLYGON_OFFSET_UNITS,&polyOffsetUnits); //glGetFloatv(GL_LINE_WIDTH,&lineWidth); glGetFloatv(GL_POINT_SIZE,&pointSize); glGetFloatv(GL_POINT_FADE_THRESHOLD_SIZE,&pointFade); glGetIntegerv(GL_POINT_SPRITE_COORD_ORIGIN,(GLint*)&pointSpriteOrigin); } ////////////////////////////////////////////////////////////////////////// #if STATESYSTEM_USE_DEPRECATED void StateSystem::RasterStateDepr::applyGL() const { glLineStipple(lineStippleFactor,lineStipplePattern); glShadeModel(shadeModel); } void StateSystem::RasterStateDepr::getGL() { GLint pattern; glGetIntegerv(GL_LINE_STIPPLE_PATTERN,&pattern); lineStipplePattern = pattern; glGetIntegerv(GL_LINE_STIPPLE_REPEAT,(GLint*)&lineStippleFactor); glGetIntegerv(GL_SHADE_MODEL,(GLint*)&shadeModel); } #endif ////////////////////////////////////////////////////////////////////////// void StateSystem::PrimitiveState::applyGL() const { glPrimitiveRestartIndex(restartIndex); glProvokingVertex(provokingVertex); glPatchParameteri(GL_PATCH_VERTICES,patchVertices); } void StateSystem::PrimitiveState::getGL() { glGetIntegerv(GL_PRIMITIVE_RESTART_INDEX, (GLint*)&restartIndex); glGetIntegerv(GL_PROVOKING_VERTEX, (GLint*)&provokingVertex); glGetIntegerv(GL_PATCH_VERTICES, (GLint*)&patchVertices); } ////////////////////////////////////////////////////////////////////////// void StateSystem::SampleState::applyGL() const { glSampleCoverage(coverage,invert); glSampleMaski(0,mask); } void StateSystem::SampleState::getGL() { glGetIntegerv(GL_SAMPLE_COVERAGE_VALUE,(GLint*)&coverage); glGetIntegerv(GL_SAMPLE_COVERAGE_INVERT,(GLint*)&invert); glGetIntegeri_v(GL_SAMPLE_MASK_VALUE,0,(GLint*)&mask); } ////////////////////////////////////////////////////////////////////////// /* void StateSystem::ViewportState::applyGL() const { if (useSeparate){ glViewportArrayv(0,MAX_VIEWPORTS, &viewports[0].x); } else{ glViewport(GLint(viewports[0].x),GLint(viewports[0].y),GLsizei(viewports[0].width),GLsizei(viewports[0].height)); } } void StateSystem::ViewportState::getGL() { int numEqual = 1; for (GLuint i = 0; i < MAX_VIEWPORTS; i++){ glGetFloati_v(GL_VIEWPORT,i,&viewports[i].x); if (i > 0 && memcmp(&viewports[i],&viewports[i-1],sizeof(viewports[i]))==0){ numEqual++; } } useSeparate = (numEqual != MAX_VIEWPORTS); } */ ////////////////////////////////////////////////////////////////////////// void StateSystem::DepthRangeState::applyGL() const { if (useSeparate){ glDepthRangeArrayv(0,MAX_VIEWPORTS, &depths[0].nearPlane); } else{ glDepthRange(depths[0].nearPlane,depths[0].farPlane); } } void StateSystem::DepthRangeState::getGL() { GLuint numEqual = 1; for (GLuint i = 0; i < MAX_VIEWPORTS; i++){ glGetDoublei_v(GL_DEPTH_RANGE,i,&depths[i].nearPlane); if (i > 0 && memcmp(&depths[i],&depths[i-1],sizeof(depths[i]))==0){ numEqual++; } } useSeparate = (numEqual != MAX_VIEWPORTS); } ////////////////////////////////////////////////////////////////////////// /* void StateSystem::ScissorState::applyGL() const { if (useSeparate){ glScissorArrayv(0,MAX_VIEWPORTS, &scissor[0].x); } else{ glScissor(scissor[0].x,scissor[0].y,scissor[0].width,scissor[0].height); } } void StateSystem::ScissorState::getGL() { GLuint numEqual = 1; for (GLuint i = 0; i < MAX_VIEWPORTS; i++){ glGetIntegeri_v(GL_SCISSOR_BOX,i,&scissor[i].x); if (i > 0 && memcmp(&scissor[i],&scissor[i-1],sizeof(scissor[i]))==0){ numEqual++; } } useSeparate = (numEqual != MAX_VIEWPORTS); } */ ////////////////////////////////////////////////////////////////////////// void StateSystem::ScissorEnableState::applyGL() const { if (separateEnable){ for (GLuint i = 0; i < MAX_VIEWPORTS; i++){ if (isBitSet(separateEnable,i)) glEnablei (GL_SCISSOR_TEST,i); else glDisablei(GL_SCISSOR_TEST,i); } } } void StateSystem::ScissorEnableState::getGL() { GLuint stateSet = 0; separateEnable = 0; for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){ if (setBitState(separateEnable,i, glIsEnabledi( GL_BLEND, i))) stateSet++; } if (stateSet == MAX_DRAWBUFFERS){ separateEnable = 0; } } ////////////////////////////////////////////////////////////////////////// void StateSystem::MaskState::applyGL() const { if (colormaskUseSeparate){ for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){ glColorMaski(i, colormask[i][0],colormask[i][1],colormask[i][2],colormask[i][3]); } } else{ glColorMask( colormask[0][0],colormask[0][1],colormask[0][2],colormask[0][3] ); } glDepthMask(depth); glStencilMaskSeparate(GL_FRONT, stencil[FACE_FRONT]); glStencilMaskSeparate(GL_BACK, stencil[FACE_BACK]); } void StateSystem::MaskState::getGL() { glGetBooleanv(GL_DEPTH_WRITEMASK,&depth); glGetIntegerv(GL_STENCIL_WRITEMASK, (GLint*)&stencil[FACE_FRONT]); glGetIntegerv(GL_STENCIL_BACK_WRITEMASK, (GLint*)&stencil[FACE_BACK]); int numEqual = 1; for (GLuint i = 0; i < MAX_DRAWBUFFERS; i++){ glGetBooleani_v(GL_COLOR_WRITEMASK, i, colormask[i]); if ( i > 0 && memcmp(colormask[i],colormask[i-1],sizeof(colormask[i]))==0){ numEqual++; } } colormaskUseSeparate = numEqual != MAX_DRAWBUFFERS; } ////////////////////////////////////////////////////////////////////////// void StateSystem::FBOState::applyGL(bool skipFboBinding) const { if (!skipFboBinding){ glBindFramebuffer(GL_DRAW_FRAMEBUFFER,fboDraw); glBindFramebuffer(GL_READ_FRAMEBUFFER,fboRead); } glDrawBuffers(numBuffers,drawBuffers); glReadBuffer(readBuffer); } void StateSystem::FBOState::getGL() { glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING,(GLint*)&fboDraw); glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING,(GLint*)&fboRead); glGetIntegerv(GL_READ_BUFFER,(GLint*)&readBuffer); for (int i = 0; i < MAX_DRAWBUFFERS; i++){ glGetIntegerv(GL_DRAW_BUFFER0 + i,(GLint*)&drawBuffers[i]); if (drawBuffers[i] != GL_NONE){ numBuffers = i+1; } } } ////////////////////////////////////////////////////////////////////////// void StateSystem::VertexEnableState::applyGL(GLbitfield changed) const { for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){ if (isBitSet(changed,i)){ if (isBitSet(enabled,i)) glEnableVertexAttribArray(i); else glDisableVertexAttribArray(i); } } } void StateSystem::VertexEnableState::getGL() { enabled = 0; for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){ GLint status; glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_ARRAY_ENABLED, (GLint*)&status); setBitState(enabled,i, status); } } ////////////////////////////////////////////////////////////////////////// void StateSystem::VertexFormatState::applyGL(GLbitfield changedFormat, GLbitfield changedBinding) const { for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){ if (!isBitSet(changedFormat,i)) continue; switch(formats[i].mode){ case VERTEXMODE_FLOAT: glVertexAttribFormat(i, formats[i].size, formats[i].type, formats[i].normalized, formats[i].relativeoffset); break; case VERTEXMODE_INT: case VERTEXMODE_UINT: glVertexAttribIFormat(i, formats[i].size, formats[i].type, formats[i].relativeoffset); break; } glVertexAttribBinding(i,formats[i].binding); } for (GLuint i = 0; i < MAX_VERTEXBINDINGS; i++){ if (!isBitSet(changedBinding,i)) continue; glVertexBindingDivisor(i,bindings[i].divisor); glBindVertexBuffer(i,0,0,bindings[i].stride); } } void StateSystem::VertexFormatState::getGL() { for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){ GLint status = 0; glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_RELATIVE_OFFSET, (GLint*)&formats[i].relativeoffset); glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_ARRAY_SIZE, (GLint*)&formats[i].size); glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_ARRAY_TYPE, (GLint*)&formats[i].type); glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_ARRAY_NORMALIZED, (GLint*)&status); formats[i].normalized = status; glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_ARRAY_INTEGER, (GLint*)&status); if (status){ formats[i].mode = VERTEXMODE_INT; } else{ formats[i].mode = VERTEXMODE_FLOAT; } glGetVertexAttribiv(i,GL_VERTEX_ATTRIB_BINDING, (GLint*)&formats[i].binding); } for (GLuint i = 0; i < MAX_VERTEXBINDINGS; i++){ glGetIntegeri_v(GL_VERTEX_BINDING_DIVISOR,i,(GLint*)&bindings[i].divisor); glGetIntegeri_v(GL_VERTEX_BINDING_STRIDE, i,(GLint*)&bindings[i].stride); } } ////////////////////////////////////////////////////////////////////////// void StateSystem::VertexImmediateState::applyGL(GLbitfield changed) const { for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){ if (!isBitSet(changed,i)) continue; switch(data[i].mode){ case VERTEXMODE_FLOAT: glVertexAttrib4fv(i,data[i].floats); break; case VERTEXMODE_INT: glVertexAttribI4iv(i,data[i].ints); break; case VERTEXMODE_UINT: glVertexAttribI4uiv(i,data[i].uints); break; } } } void StateSystem::VertexImmediateState::getGL() { for (GLuint i = 0; i < MAX_VERTEXATTRIBS; i++){ switch(data[i].mode){ case VERTEXMODE_FLOAT: glGetVertexAttribfv(i,GL_CURRENT_VERTEX_ATTRIB,data[i].floats); break; case VERTEXMODE_INT: glGetVertexAttribIiv(i,GL_CURRENT_VERTEX_ATTRIB,data[i].ints); break; case VERTEXMODE_UINT: glGetVertexAttribIuiv(i,GL_CURRENT_VERTEX_ATTRIB,data[i].uints); break; } } } ////////////////////////////////////////////////////////////////////////// void StateSystem::ProgramState::applyGL() const { glUseProgram(program); } void StateSystem::ProgramState::getGL() { glGetIntegerv(GL_CURRENT_PROGRAM, (GLint*)&program); } ////////////////////////////////////////////////////////////////////////// // keep in sync! static GLenum s_stateEnums[StateSystem::NUM_STATEBITS] = { GL_BLEND, GL_COLOR_LOGIC_OP, GL_CULL_FACE, GL_DEPTH_CLAMP, GL_DEPTH_TEST, GL_DITHER, GL_FRAMEBUFFER_SRGB, GL_LINE_SMOOTH, GL_MULTISAMPLE, GL_POLYGON_OFFSET_FILL, GL_POLYGON_OFFSET_LINE, GL_POLYGON_OFFSET_POINT, GL_POLYGON_SMOOTH, GL_PRIMITIVE_RESTART, GL_PRIMITIVE_RESTART_FIXED_INDEX, GL_RASTERIZER_DISCARD, GL_SAMPLE_ALPHA_TO_COVERAGE, GL_SAMPLE_ALPHA_TO_ONE, GL_SAMPLE_COVERAGE, GL_SAMPLE_SHADING, GL_SAMPLE_MASK, GL_STENCIL_TEST, GL_SCISSOR_TEST, GL_TEXTURE_CUBE_MAP_SEAMLESS, GL_PROGRAM_POINT_SIZE, }; void StateSystem::EnableState::applyGL(GLbitfield changedBits) const { for (GLuint i = 0; i < NUM_STATEBITS; i++){ if (isBitSet(changedBits,i)){ if (isBitSet(stateBits,i)) glEnable (s_stateEnums[i]); else glDisable (s_stateEnums[i]); } } } void StateSystem::EnableState::getGL() { for (GLuint i = 0; i < NUM_STATEBITS; i++){ setBitState(stateBits,i, glIsEnabled(s_stateEnums[i])); } } ////////////////////////////////////////////////////////////////////////// #if STATESYSTEM_USE_DEPRECATED static GLenum s_stateEnumsDepr[StateSystem::NUM_STATEBITSDEPR] = { GL_ALPHA_TEST, GL_LINE_STIPPLE, GL_POINT_SMOOTH, GL_POINT_SPRITE, GL_POLYGON_STIPPLE, }; void StateSystem::EnableStateDepr::applyGL(GLbitfield changedBits) const { for (GLuint i = 0; i < NUM_STATEBITSDEPR; i++){ if (isBitSet(changedBits,i)){ if (isBitSet(stateBitsDepr,i)) glEnable (s_stateEnumsDepr[i]); else glDisable (s_stateEnumsDepr[i]); } } } void StateSystem::EnableStateDepr::getGL() { for (GLuint i = 0; i < NUM_STATEBITSDEPR; i++){ setBitState(stateBitsDepr,i, glIsEnabled(s_stateEnumsDepr[i])); } } #endif ////////////////////////////////////////////////////////////////////////// void StateSystem::State::applyGL(bool coreonly, bool skipFboBinding) const { enable.applyGL(); #if STATESYSTEM_USE_DEPRECATED if (!coreonly) enableDepr.applyGL(); #endif program.applyGL(); clip.applyGL(); #if STATESYSTEM_USE_DEPRECATED if (!coreonly) alpha.applyGL(); #endif blend.applyGL(); depth.applyGL(); stencil.applyGL(); logic.applyGL(); primitive.applyGL(); sample.applyGL(); raster.applyGL(); #if STATESYSTEM_USE_DEPRECATED if (!coreonly) rasterDepr.applyGL(); #endif /*if (!isBitSet(dynamicState,DYNAMIC_VIEWPORT)){ viewport.applyGL(); }*/ depthrange.applyGL(); /*if (!isBitSet(dynamicState,DYNAMIC_SCISSOR)){ scissor.applyGL(); }*/ scissorenable.applyGL(); mask.applyGL(); fbo.applyGL(skipFboBinding); vertexenable.applyGL(); vertexformat.applyGL(); verteximm.applyGL(); } void StateSystem::State::getGL(bool coreonly) { enable.getGL(); #if STATESYSTEM_USE_DEPRECATED if (!coreonly) enableDepr.applyGL(); #endif program.getGL(); clip.getGL(); #if STATESYSTEM_USE_DEPRECATED if (!coreonly) alpha.applyGL(); #endif blend.getGL(); depth.getGL(); stencil.getGL(); logic.getGL(); primitive.getGL(); sample.getGL(); raster.getGL(); #if STATESYSTEM_USE_DEPRECATED if (!coreonly) rasterDepr.applyGL(); #endif //viewport.getGL(); depthrange.getGL(); //scissor.getGL(); scissorenable.getGL(); mask.getGL(); fbo.getGL(); vertexenable.getGL(); vertexformat.getGL(); verteximm.getGL(); } ////////////////////////////////////////////////////////////////////////// void StateSystem::init(bool coreonly) { m_coreonly = coreonly; } void StateSystem::deinit() { m_states.resize(0); m_freeIDs.resize(0); } void StateSystem::generate( GLuint num, StateID* objects ) { GLuint i; for ( i = 0; i < num && !m_freeIDs.empty(); i++){ objects[i] = m_freeIDs.back(); m_freeIDs.pop_back(); } GLuint begin = GLuint(m_states.size()); if ( i < num){ m_states.resize( begin + num - i); } for ( i = i; i < num; i++){ objects[i] = begin + i; } } void StateSystem::destroy( GLuint num, const StateID* objects ) { for (GLuint i = 0; i < num; i++){ m_freeIDs.push_back(objects[i]); } } void StateSystem::set( StateID id, const State& state, GLenum basePrimitiveMode ) { StateInternal& intstate = m_states[id]; intstate.changeID++; intstate.state = state; intstate.state.basePrimitiveMode = basePrimitiveMode; intstate.usedDiff = 0; for (int i = 0; i < MAX_DIFFS; i++){ intstate.others[i].state = INVALID_ID; } } const StateSystem::State& StateSystem::get( StateID id ) const { return m_states[id].state; } int inline StateSystem::prepareTransitionCache(StateID prev, StateInternal& to ) { StateInternal& from = m_states[prev]; int index = -1; for (int i = 0; i < MAX_DIFFS; i++){ if ( to.others[i].state == prev && to.others[i].changeID == from.changeID) { index = i; break; } } if (index < 0){ index = to.usedDiff; to.usedDiff = (to.usedDiff + 1) % MAX_DIFFS; to.others[index].state = prev; to.others[index].changeID = from.changeID; makeDiff(to.diffs[index], from, to); } return index; } void StateSystem::applyGL( StateID id, bool skipFboBinding ) const { m_states[id].state.applyGL( m_coreonly, skipFboBinding ); } void StateSystem::applyGL( StateID id, StateID prev, bool skipFboBinding ) { StateInternal& to = m_states[id]; if (prev == INVALID_ID){ applyGL(id, skipFboBinding); return; } int index = prepareTransitionCache(prev, to); applyDiffGL( to.diffs[index], to.state, skipFboBinding ); } void StateSystem::applyDiffGL( const StateDiff& diff, const State &state, bool skipFboBinding ) { if (isBitSet(diff.changedContentBits,StateDiff::ENABLE)) state.enable.applyGL(diff.changedStateBits); #if STATESYSTEM_USE_DEPRECATED if (!m_coreonly && isBitSet(diff.changedContentBits,StateDiff::ENABLE_DEPR)) state.enableDepr.applyGL(diff.changedStateDeprBits); #endif if (isBitSet(diff.changedContentBits,StateDiff::PROGRAM)) state.program.applyGL(); if (isBitSet(diff.changedContentBits,StateDiff::CLIP)) state.clip.applyGL(); #if STATESYSTEM_USE_DEPRECATED if (!m_coreonly && isBitSet(diff.changedContentBits,StateDiff::ALPHA_DEPR)) state.alpha.applyGL(); #endif if (isBitSet(diff.changedContentBits,StateDiff::BLEND)) state.blend.applyGL(); if (isBitSet(diff.changedContentBits,StateDiff::DEPTH)) state.depth.applyGL(); if (isBitSet(diff.changedContentBits,StateDiff::STENCIL)) state.stencil.applyGL(); if (isBitSet(diff.changedContentBits,StateDiff::LOGIC)) state.logic.applyGL(); if (isBitSet(diff.changedContentBits,StateDiff::PRIMITIVE)) state.primitive.applyGL(); if (isBitSet(diff.changedContentBits,StateDiff::RASTER)) state.raster.applyGL(); #if STATESYSTEM_USE_DEPRECATED if (!m_coreonly && isBitSet(diff.changedContentBits,StateDiff::RASTER_DEPR)) state.rasterDepr.applyGL(); #endif /*if (isBitSet(diff.changedContentBits,StateDiff::VIEWPORT)) state.viewport.applyGL();*/ if (isBitSet(diff.changedContentBits,StateDiff::DEPTHRANGE)) state.depthrange.applyGL(); /*if (isBitSet(diff.changedContentBits,StateDiff::SCISSOR)) state.scissor.applyGL();*/ if (isBitSet(diff.changedContentBits,StateDiff::SCISSORENABLE)) state.scissorenable.applyGL(); if (isBitSet(diff.changedContentBits,StateDiff::MASK)) state.mask.applyGL(); if (isBitSet(diff.changedContentBits,StateDiff::FBO)) state.fbo.applyGL(skipFboBinding); if (isBitSet(diff.changedContentBits,StateDiff::VERTEXENABLE)) state.vertexenable.applyGL(diff.changedVertexEnable); if (isBitSet(diff.changedContentBits,StateDiff::VERTEXFORMAT)) state.vertexformat.applyGL(diff.changedVertexFormat, diff.changedVertexBinding); if (isBitSet(diff.changedContentBits,StateDiff::VERTEXIMMEDIATE)) state.verteximm.applyGL(diff.changedVertexImm); } void StateSystem::makeDiff( StateDiff& diff, const StateInternal &fromInternal, const StateInternal &toInternal ) { const State &from = fromInternal.state; const State &to = toInternal.state; diff.changedStateBits = from.enable.stateBits ^ to.enable.stateBits; #if STATESYSTEM_USE_DEPRECATED diff.changedStateDeprBits = from.enableDepr.stateBitsDepr ^ to.enableDepr.stateBitsDepr; #endif diff.changedContentBits = 0; if (memcmp(&from.enable ,&to.enable ,sizeof(from.enable )) != 0) setBit(diff.changedContentBits,StateDiff::ENABLE); #if STATESYSTEM_USE_DEPRECATED if (memcmp(&from.enableDepr ,&to.enableDepr ,sizeof(from.enableDepr )) != 0) setBit(diff.changedContentBits,StateDiff::ENABLE_DEPR); #endif if (memcmp(&from.program ,&to.program ,sizeof(from.program )) != 0) setBit(diff.changedContentBits,StateDiff::PROGRAM); if (memcmp(&from.clip ,&to.clip ,sizeof(from.clip )) != 0) setBit(diff.changedContentBits,StateDiff::CLIP); #if STATESYSTEM_USE_DEPRECATED if (memcmp(&from.alpha ,&to.alpha ,sizeof(from.alpha )) != 0) setBit(diff.changedContentBits,StateDiff::ALPHA_DEPR); #endif if (memcmp(&from.blend ,&to.blend ,sizeof(from.blend )) != 0) setBit(diff.changedContentBits,StateDiff::BLEND); if (memcmp(&from.depth ,&to.depth ,sizeof(from.depth )) != 0) setBit(diff.changedContentBits,StateDiff::DEPTH); if (memcmp(&from.stencil ,&to.stencil ,sizeof(from.stencil )) != 0) setBit(diff.changedContentBits,StateDiff::STENCIL); if (memcmp(&from.logic ,&to.logic ,sizeof(from.logic )) != 0) setBit(diff.changedContentBits,StateDiff::LOGIC); if (memcmp(&from.primitive ,&to.primitive ,sizeof(from.primitive )) != 0) setBit(diff.changedContentBits,StateDiff::PRIMITIVE); if (memcmp(&from.raster ,&to.raster ,sizeof(from.raster )) != 0) setBit(diff.changedContentBits,StateDiff::RASTER); #if STATESYSTEM_USE_DEPRECATED if (memcmp(&from.rasterDepr ,&to.rasterDepr ,sizeof(from.rasterDepr )) != 0) setBit(diff.changedContentBits,StateDiff::RASTER_DEPR); #endif //if (memcmp(&from.viewport ,&to.viewport ,sizeof(from.viewport )) != 0) setBit(diff.changedContentBits,StateDiff::VIEWPORT); if (memcmp(&from.depth ,&to.depth ,sizeof(from.depth )) != 0) setBit(diff.changedContentBits,StateDiff::DEPTHRANGE); //if (memcmp(&from.scissor ,&to.scissor ,sizeof(from.scissor )) != 0) setBit(diff.changedContentBits,StateDiff::SCISSOR); if (memcmp(&from.scissorenable ,&to.scissorenable ,sizeof(from.scissorenable )) != 0) setBit(diff.changedContentBits,StateDiff::SCISSORENABLE); if (memcmp(&from.mask ,&to.mask ,sizeof(from.mask )) != 0) setBit(diff.changedContentBits,StateDiff::MASK); if (memcmp(&from.fbo ,&to.fbo ,sizeof(from.fbo )) != 0) setBit(diff.changedContentBits,StateDiff::FBO); // special case vertex stuff, more likely to change then rest diff.changedVertexEnable = from.vertexenable.enabled ^ to.vertexenable.enabled; diff.changedVertexImm = 0; diff.changedVertexFormat = 0; for (GLint i = 0; i < MAX_VERTEXATTRIBS; i++){ if (memcmp(&from.vertexformat.formats[i], &to.vertexformat.formats[i], sizeof(to.vertexformat.formats[i])) != 0) setBit(diff.changedVertexFormat,i); if (memcmp(&from.verteximm.data[i], &to.verteximm.data[i], sizeof(to.verteximm.data[i])) != 0) setBit(diff.changedVertexImm,i); } diff.changedVertexBinding = 0; for (GLint i = 0; i < MAX_VERTEXBINDINGS; i++){ if (memcmp(&from.vertexformat.bindings[i], &to.vertexformat.bindings[i], sizeof(to.vertexformat.bindings[i])) != 0) setBit(diff.changedVertexBinding,i); } if (diff.changedVertexEnable) setBit(diff.changedContentBits,StateDiff::VERTEXENABLE); if (diff.changedVertexBinding || diff.changedVertexFormat) setBit(diff.changedContentBits,StateDiff::VERTEXFORMAT); if (diff.changedVertexImm) setBit(diff.changedContentBits,StateDiff::VERTEXIMMEDIATE); } void StateSystem::prepareTransition( StateID id, StateID prev ) { StateInternal& to = m_states[id]; prepareTransitionCache(prev,to); } ================================================ FILE: statesystem.hpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #ifndef STATESYSTEM_H__ #define STATESYSTEM_H__ #include #include class StateSystem { public: static inline bool isBitSet(GLbitfield bits, GLuint key) { return (bits & (1< m_states; std::vector m_freeIDs; void makeDiff(StateDiff& diff, const StateInternal &fromInternal, const StateInternal &toInternal); void applyDiffGL(const StateDiff& diff, const State &to, bool skipFboBinding); int prepareTransitionCache(StateID prev, StateInternal& to ); }; #endif ================================================ FILE: tokenbase.cpp ================================================ /* * Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include "tokenbase.hpp" using namespace nvtoken; #include "common.h" namespace csfviewer { bool TokenRendererBase::hasNativeCommandList() { return !!has_GL_NV_command_list; } void TokenRendererBase::init(bool bindlessUbo, bool bindlessVbo) { m_bindlessVboUbo = bindlessVbo && bindlessUbo; m_hwsupport = hasNativeCommandList() && !m_emulate; for (int i = 0; i < NUM_SHADES; i++){ m_tokenAddresses[i] = 0; } if (m_hwsupport){ glCreateStatesNV(NUM_STATES,m_stateObjects); if (m_uselist){ glCreateCommandListsNV(NUM_SHADES,m_commandLists); } } else{ // we use a fast mode for glBufferAddressRangeNV where we ignore precise buffer boundaries // this will trigger the driver to throw warnings, which may cause a crash #if !defined (NDEBUG) if (m_bindlessVboUbo){ glDisable(GL_DEBUG_OUTPUT_SYNCHRONOUS); glDisable(GL_DEBUG_OUTPUT); } #endif m_stateSystem.init(false); m_stateSystem.generate(NUM_STATES,m_stateIDs); for (int i = 0; i < NUM_STATES; i++){ m_stateObjects[i] = m_stateIDs[i]; } } nvtokenInitInternals(m_hwsupport, m_bindlessVboUbo); } void TokenRendererBase::printStats( ShadeType shadeType ) { int stats[NVTOKEN_TYPES] = {0}; ShadeCommand& sc = m_shades[shadeType]; size_t num = sc.states.size(); size_t size = sc.offsets[num-1] + sc.sizes[num-1] - sc.offsets[0]; nvtokenGetStats(&m_tokenStreams[shadeType][sc.offsets[0]], size, stats); LOGI("type: %s\n",toString(shadeType)); LOGI("commandsize: %zu\n",size); LOGI("state toggles: %zu\n", num); LOGI("tokens:\n"); for (int i = 0; i < NVTOKEN_TYPES; i++){ const char* what = nvtokenCommandToString(i); if (what && stats[i]){ LOGI("%s:\t %6d\n", what,stats[i]); } } LOGI("\n"); } void TokenRendererBase::finalize(const Resources &resources, bool fillBuffers) { { m_tokenStreams[SHADE_SOLIDWIRE_SPLIT] = m_tokenStreams[SHADE_SOLIDWIRE]; m_shades[SHADE_SOLIDWIRE_SPLIT] = m_shades[SHADE_SOLIDWIRE]; if (USE_STATEFBO_SPLIT){ ShadeCommand& sc = m_shades[SHADE_SOLIDWIRE_SPLIT]; for (size_t i = 0; i < sc.sizes.size(); i++){ if (sc.states[i] == m_stateObjects[STATE_LINES]){ sc.states[i] = m_stateObjects[STATE_LINES_SPLIT]; } } } else{ ShadeCommand& sc = m_shades[SHADE_SOLIDWIRE_SPLIT]; for (size_t i = 0; i < sc.sizes.size(); i++) { if (sc.states[i] == m_stateObjects[STATE_LINES]){ sc.fbos[i] = resources.fbo2; } else{ sc.fbos[i] = resources.fbo; } } } } glCreateBuffers(NUM_SHADES,m_tokenBuffers); if (m_hwsupport && fillBuffers){ for (int i = 0; i < NUM_SHADES; i++){ glNamedBufferStorage(m_tokenBuffers[i],m_tokenStreams[i].size(), &m_tokenStreams[i][0], 0); if (m_useaddress){ glGetNamedBufferParameterui64vNV(m_tokenBuffers[i], GL_BUFFER_GPU_ADDRESS_NV, &m_tokenAddresses[i]); glMakeNamedBufferResidentNV(m_tokenBuffers[i], GL_READ_ONLY); ShadeCommand& sc = m_shades[i]; sc.addresses.clear(); sc.addresses.reserve( sc.offsets.size() ); for (size_t n = 0; n < sc.offsets.size(); n++){ sc.addresses.push_back( m_tokenAddresses[i] + sc.offsets[n] ); } } } } } void TokenRendererBase::deinit() { if (m_useaddress){ for (int i = 0; i < NUM_SHADES; i++){ if (m_tokenAddresses[i]){ glMakeNamedBufferNonResidentNV( m_tokenBuffers[i] ); } } } glDeleteBuffers(NUM_SHADES,m_tokenBuffers); if (m_hwsupport){ glDeleteStatesNV(NUM_STATES,m_stateObjects); if (m_uselist){ glDeleteCommandListsNV(NUM_SHADES,m_commandLists); } } else { #if !defined (NDEBUG) if (m_bindlessVboUbo){ glEnable(GL_DEBUG_OUTPUT); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); } #endif } m_stateSystem.deinit(); } void TokenRendererBase::captureState( const Resources &resources ) { bool stateChanged = m_stateChangeID != resources.stateChangeID; bool fboTexChanged = m_fboStateChangeID != resources.fboTextureChangeID; m_stateChangeID = resources.stateChangeID; m_fboStateChangeID = resources.fboTextureChangeID; if (stateChanged){ StateSystem::State state; state.verteximm.data[VERTEX_WIREMODE].mode = StateSystem::VERTEXMODE_INT; // need to set this properly if (m_bindlessVboUbo){ // temp workaround #if USE_RESETADDRESSES glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV,0,0,0); glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV,0,0,0); glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV,UBO_MATERIAL,0,0); glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV,UBO_MATRIX,0,0); glBufferAddressRangeNV(GL_UNIFORM_BUFFER_ADDRESS_NV,UBO_SCENE,0,0); #endif } // we will do a series of state captures glBindFramebuffer(GL_FRAMEBUFFER, resources.fbo); glUseProgram(resources.programUsed); SetWireMode(GL_FALSE); if (m_hwsupport){ glStateCaptureNV(m_stateObjects[STATE_TRIS],GL_TRIANGLES); } else { state.getGL(); // very costly, smarter would be setting this manually m_stateSystem.set(m_stateIDs[STATE_TRIS], state, GL_TRIANGLES); } glEnable(GL_POLYGON_OFFSET_FILL); // glPolygonOffset(1,1); //not captured if (m_hwsupport){ glStateCaptureNV(m_stateObjects[STATE_TRISOFFSET],GL_TRIANGLES); } else { state.getGL(); // very costly, smarter would be setting this manually m_stateSystem.set(m_stateIDs[STATE_TRISOFFSET], state, GL_TRIANGLES); } SetWireMode(GL_TRUE); if (m_hwsupport){ glStateCaptureNV(m_stateObjects[STATE_LINES],GL_LINES); } else { state.getGL(); // very costly, smarter would be setting this manually m_stateSystem.set(m_stateIDs[STATE_LINES], state, GL_LINES); } glBindFramebuffer(GL_FRAMEBUFFER, resources.fbo2); if (m_hwsupport){ glStateCaptureNV(m_stateObjects[STATE_LINES_SPLIT], GL_LINES); } else { state.getGL(); // very costly, smarter would be setting this manually m_stateSystem.set(m_stateIDs[STATE_LINES_SPLIT], state, GL_LINES); } if (!m_hwsupport){ m_stateSystem.prepareTransition(m_stateIDs[STATE_TRISOFFSET], m_stateObjects[STATE_LINES]); m_stateSystem.prepareTransition(m_stateIDs[STATE_LINES], m_stateObjects[STATE_TRISOFFSET]); m_stateSystem.prepareTransition(m_stateIDs[STATE_TRISOFFSET], m_stateObjects[STATE_LINES_SPLIT]); m_stateSystem.prepareTransition(m_stateIDs[STATE_LINES_SPLIT],m_stateObjects[STATE_TRISOFFSET]); } // reset, stored in stateobjects glUseProgram(0); glDisable(GL_POLYGON_OFFSET_FILL); glPolygonOffset(0,0); #if 1 // workaround glBindFramebuffer(GL_FRAMEBUFFER, resources.fbo); #else glBindFramebuffer(GL_FRAMEBUFFER, 0); #endif } if (m_hwsupport && m_uselist && (stateChanged || fboTexChanged)){ for (int i = 0; i < NUM_SHADES; i++){ ShadeCommand& shade = m_shades[i]; std::vector ptrs; ptrs.reserve(shade.offsets.size()); for (size_t p = 0; p < shade.offsets.size(); p++){ ptrs.push_back(&m_tokenStreams[i][shade.offsets[p]]); } glCommandListSegmentsNV(m_commandLists[i],1); glListDrawCommandsStatesClientNV(m_commandLists[i],0, &ptrs[0], &shade.sizes[0], &shade.states[0], &shade.fbos[0], int(shade.states.size()) ); glCompileCommandListNV(m_commandLists[i]); } } } void TokenRendererBase::renderShadeCommandSW( const void* NV_RESTRICT stream, size_t streamSize, ShadeCommand &shade ) { nvtokenDrawCommandsStatesSW(stream, streamSize, &shade.offsets[0], &shade.sizes[0], &shade.states[0], &shade.fbos[0], GLuint(shade.states.size()), m_stateSystem); } } ================================================ FILE: tokenbase.hpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ // a few performance tests // only affect TOKEN techniques #define USE_RESETADDRESSES 1 #define USE_FASTDRAWS 1 #define USE_STATEFBO_SPLIT 0 //otherwise fbo[] as used #define USE_POLYOFFSETTOKEN 1 // only affects TOKEN #define USE_STATEOBJ_REBUILD 0 // does 100 statecaptures per frame #define USE_NOFILTER 0 // only affects TOKENSORT #define USE_PERFRAMEBUILD 0 #include #include #include "renderer.hpp" #include "nvtoken.hpp" using namespace nvtoken; namespace csfviewer { #define UBOSTAGE_VERTEX (nvtoken::s_nvcmdlist_stages[NVTOKEN_STAGE_VERTEX]) #define UBOSTAGE_FRAGMENT (nvtoken::s_nvcmdlist_stages[NVTOKEN_STAGE_FRAGMENT]) #if USE_FASTDRAWS #define NVTokenDrawElemsUsed NVTokenDrawElems #else #define NVTokenDrawElemsUsed NVTokenDrawElemsInstanced #endif class TokenRendererBase { public: enum StateType { STATE_TRIS, STATE_TRISOFFSET, STATE_LINES, STATE_LINES_SPLIT, NUM_STATES, }; struct ShadeCommand { std::vector addresses; std::vector offsets; std::vector sizes; std::vector states; std::vector fbos; }; bool m_emulate; bool m_sort; bool m_uselist; bool m_useaddress; TokenRendererBase() : m_hwsupport(false) , m_bindlessVboUbo(false) , m_useaddress(false) , m_emulate(false) , m_uselist(false) , m_sort(false) , m_stateChangeID(~0) , m_fboStateChangeID(~0) { } static bool hasNativeCommandList(); protected: bool m_hwsupport; bool m_bindlessVboUbo; GLuint m_tokenBuffers[NUM_SHADES]; GLuint64 m_tokenAddresses[NUM_SHADES]; std::string m_tokenStreams[NUM_SHADES]; GLuint m_commandLists[NUM_SHADES]; ShadeCommand m_shades[NUM_SHADES]; size_t m_stateChangeID; size_t m_fboStateChangeID; StateSystem m_stateSystem; StateSystem::StateID m_stateIDs[NUM_STATES]; GLuint m_stateObjects[NUM_STATES]; void init(bool bindlessUbo, bool bindlessVbo); void printStats(ShadeType shadeType); void finalize(const Resources &resources, bool fillBuffers=true); void deinit(); void captureState(const Resources &resources); void renderShadeCommandSW( const void* NV_RESTRICT stream, size_t streamSize, ShadeCommand &shade ); }; } ================================================ FILE: transform-leaves.comp.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 430 /**/ #ifndef USE_COMPUTE #define USE_COMPUTE 1 #endif #define MAX_LEVELS 10 #define LEVELBITS 8 #define MATRIX_BASE 0 #define MATRIX_INVTRANS 1 #define MATRIX_BEGIN_WORLD 0 #define MATRIX_BEGIN_OBJECT 2 #define MATRICES 4 #if USE_COMPUTE layout (local_size_x = 256) in; layout(std430,binding=2) buffer scratchBuffer { int nodes[]; }; layout(location=0) uniform int count; layout(location=1) uniform int levelcap; // must be >= 1 #define BAILOUT gl_GlobalInvocationID.x >= count int self = nodes[gl_GlobalInvocationID.x]; #else layout(location=0) uniform int levelcap; // must be >= 1 #define BAILOUT false layout(location=0) in int self; #endif layout(binding=0) uniform isamplerBuffer parentsBuffer; layout(std430,binding=0) restrict buffer worldMatricesBuffer { mat4 worldMatrices[]; }; layout(binding=1) uniform samplerBuffer texWorldMatrices; layout(binding=2) uniform samplerBuffer texObjectMatrices; mat4 getMatrix(samplerBuffer texbuffer, int idx) { return mat4(texelFetch(texbuffer,idx*4 + 0), texelFetch(texbuffer,idx*4 + 1), texelFetch(texbuffer,idx*4 + 2), texelFetch(texbuffer,idx*4 + 3)); } mat4 getObjectMatrix(int idx, int what){ return getMatrix(texObjectMatrices,idx*MATRICES + what + MATRIX_BEGIN_OBJECT); }; mat4 getWorldMatrix(int idx, int what){ return getMatrix(texWorldMatrices,idx*MATRICES + what + MATRIX_BEGIN_WORLD); }; void main() { if (BAILOUT){ return; } int levels[MAX_LEVELS]; int curlevel = 0; // build path to root while (curlevel < MAX_LEVELS){ levels[curlevel++] = self; int info = texelFetch(parentsBuffer,self).x; self = info >> LEVELBITS; int lvl = info & ((1< 0) { self = levels[curlevel]; // walk downwards, save matrix in registers & save at end // never read worldmatrices due to read/write hazards parentBase = parentBase * getObjectMatrix(self,MATRIX_BASE); worldMatrices[self*MATRICES + MATRIX_BEGIN_WORLD + MATRIX_BASE] = parentBase; worldMatrices[self*MATRICES + MATRIX_BEGIN_WORLD + MATRIX_INVTRANS] = transpose(inverse(parentBase)); } } ================================================ FILE: transform-level.comp.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 430 /**/ #ifndef USE_COMPUTE #define USE_COMPUTE 1 #endif #define LEVELBITS 8 #define MATRIX_BASE 0 #define MATRIX_INVTRANS 1 #define MATRIX_BEGIN_WORLD 0 #define MATRIX_BEGIN_OBJECT 2 #define MATRICES 4 #if USE_COMPUTE layout (local_size_x = 256) in; layout(std430,binding=2) buffer scratchBuffer { int nodes[]; }; layout(location=0) uniform int count; #define BAILOUT gl_GlobalInvocationID.x >= count int self = nodes[gl_GlobalInvocationID.x]; #else #define BAILOUT false layout(location=0) in int self; #endif layout(binding=0) uniform isamplerBuffer parentsBuffer; layout(std430,binding=0) restrict buffer worldMatricesBuffer { mat4 worldMatrices[]; }; layout(binding=1) uniform samplerBuffer texWorldMatrices; layout(binding=2) uniform samplerBuffer texObjectMatrices; mat4 getMatrix(samplerBuffer texbuffer, int idx) { return mat4(texelFetch(texbuffer,idx*4 + 0), texelFetch(texbuffer,idx*4 + 1), texelFetch(texbuffer,idx*4 + 2), texelFetch(texbuffer,idx*4 + 3)); } mat4 getObjectMatrix(int idx, int what){ return getMatrix(texObjectMatrices,idx*MATRICES + what + MATRIX_BEGIN_OBJECT); }; mat4 getWorldMatrix(int idx, int what){ return getMatrix(texWorldMatrices,idx*MATRICES + what + MATRIX_BEGIN_WORLD); }; void main() { if (BAILOUT){ return; } int parent = texelFetch(parentsBuffer,self).x >> LEVELBITS; // world base matrix mat4 world = getWorldMatrix(parent,MATRIX_BASE) * getObjectMatrix(self,MATRIX_BASE); #if 0 // world inv trans matrix mat4 parentInv = transpose(getWorldMatrix(parent,MATRIX_INVTRANS)); mat4 objectInv = transpose(getObjectMatrix(self, MATRIX_INVTRANS)); mat4 worldInv = objectInv * parentInv; #else mat4 worldInv = inverse(world); #endif worldMatrices[self*MATRICES + MATRIX_BEGIN_WORLD + MATRIX_BASE] = world; worldMatrices[self*MATRICES + MATRIX_BEGIN_WORLD + MATRIX_INVTRANS] = transpose(worldInv); } ================================================ FILE: transformsystem.cpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #include #include "transformsystem.hpp" #include void TransformSystem::process(const NodeTree& nodeTree, Buffer& ids, Buffer& matricesObject, Buffer& matricesWorld ) { glUseProgram(m_programs.transform_leaves); glBindBuffer (GL_SHADER_STORAGE_BUFFER, m_scratchGL); glBufferData (GL_SHADER_STORAGE_BUFFER, sizeof(GLuint)*nodeTree.getNumActiveNodes(),NULL,GL_STREAM_DRAW); #if 0 // APIC hack glTextureBufferEXT(m_texsGL[TEXTURE_IDS], GL_TEXTURE_BUFFER, GL_R32I, ids.buffer); glTextureBufferEXT(m_texsGL[TEXTURE_OBJECT],GL_TEXTURE_BUFFER, GL_RGBA32F, matricesObject.buffer); glTextureBufferEXT(m_texsGL[TEXTURE_WORLD], GL_TEXTURE_BUFFER, GL_RGBA32F, matricesWorld.buffer); #else glTextureBufferRange(m_texsGL[TEXTURE_IDS], GL_R32I, ids.buffer, ids.offset, ids.size); glTextureBufferRange(m_texsGL[TEXTURE_OBJECT], GL_RGBA32F, matricesObject.buffer, matricesObject.offset, matricesObject.size); glTextureBufferRange(m_texsGL[TEXTURE_WORLD], GL_RGBA32F, matricesWorld.buffer, matricesWorld.offset, matricesWorld.size); #endif for (int i = 0; i < TEXTURES; i++){ nvgl::bindMultiTexture(GL_TEXTURE0 + i, GL_TEXTURE_BUFFER, m_texsGL[i]); } matricesWorld.BindBufferRange(GL_SHADER_STORAGE_BUFFER,0); matricesObject.BindBufferRange(GL_SHADER_STORAGE_BUFFER,1); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,2,m_scratchGL); const int maxshaderlevels = 10; int maxlevels = maxshaderlevels; int totalNodes = 0; bool useLeaves = true; int currentDepth = 1; const NodeTree::Level* level = nodeTree.getUsedLevel(currentDepth); // TODO: // // This code lacks a proper heuristic for switching between level and leaves based processing. // One should prefer level if there is enough nodes per level, otherwise descend and gather // many leaves from multiple levels. // while (level){ // dispatch on last level, or if we have reached maxlevels bool willdispatch = currentDepth && (!nodeTree.getUsedLevel(currentDepth+1) || currentDepth+1 % maxlevels == 0); // the last level in leaf mode, must use all level nodes, and not just the leaves of this level // as subsequent leaves operate in level mode const std::vector& nodes = useLeaves && !willdispatch ? level->leaves : level->nodes; if (!nodes.empty()){ glBufferSubData(GL_SHADER_STORAGE_BUFFER,totalNodes*sizeof(GLuint),sizeof(GLuint)*nodes.size(),&nodes[0]); totalNodes += (int)nodes.size(); } currentDepth++; level = nodeTree.getUsedLevel(currentDepth); if (willdispatch){ int groupsize = useLeaves ? m_leavesGroup : m_levelsGroup; if (useLeaves){ glUniform1i(0,totalNodes); glUniform1i(1,1); } else{ glUniform1i(0,totalNodes); } glDispatchCompute((totalNodes+groupsize-1)/groupsize,1,1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT); if (useLeaves){ // switch to per-level mode after first batch of leaves is over (tip of hierarchy) glUseProgram(m_programs.transform_level); useLeaves = false; maxlevels = 1; // assure we dispatch every level } totalNodes = 0; } } glUseProgram(0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,0,0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,1,0); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,2,0); for (int i = 0; i < TEXTURES; i++){ nvgl::bindMultiTexture(GL_TEXTURE0 + i, GL_TEXTURE_BUFFER, 0); } } void TransformSystem::init( const Programs &programs ) { m_programs = programs; glCreateBuffers(1,&m_scratchGL); glCreateTextures(GL_TEXTURE_BUFFER, TEXTURES, m_texsGL); } void TransformSystem::deinit() { glDeleteBuffers(1,&m_scratchGL); glDeleteTextures(TEXTURES,m_texsGL); } void TransformSystem::update( const Programs &programs ) { m_programs = programs; GLuint groupsizes[3]; glGetProgramiv(programs.transform_leaves, GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupsizes); m_leavesGroup = groupsizes[0]; glGetProgramiv(programs.transform_level, GL_COMPUTE_WORK_GROUP_SIZE, (GLint*)groupsizes); m_levelsGroup = groupsizes[0]; } ================================================ FILE: transformsystem.hpp ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ /* Contact ckubisch@nvidia.com (Christoph Kubisch) for feedback */ #ifndef TRANSFORMSYSTEM_H__ #define TRANSFORMSYSTEM_H__ #include #include #include "nodetree.hpp" class TransformSystem { public: struct Programs { GLuint transform_level; GLuint transform_leaves; }; struct Buffer { GLuint buffer; GLintptr offset; GLsizeiptr size; Buffer(GLuint buffer, size_t sizei=0) : buffer(buffer) , offset(0) { glBindBuffer(GL_COPY_READ_BUFFER, buffer); if (!sizei){ if (sizeof(GLsizeiptr) > 4) glGetBufferParameteri64v(GL_COPY_READ_BUFFER,GL_BUFFER_SIZE, (GLint64*)&size); else glGetBufferParameteriv(GL_COPY_READ_BUFFER, GL_BUFFER_SIZE, (GLint*)&size); glBindBuffer(GL_COPY_READ_BUFFER, 0); } else{ size = sizei; } } Buffer() : buffer(0) , offset(0) , size(0) { } inline void BindBufferRange(GLenum target, GLuint index) const { glBindBufferRange(target, index, buffer, offset, size); } inline void TexBuffer(GLenum target, GLenum internalformat) const { glTexBufferRange(target, internalformat, buffer, offset, size); } }; void init( const Programs &programs ); void deinit(); void update( const Programs &programs ); void process(const NodeTree&, Buffer& ids, Buffer& matricesObject, Buffer& matricesWorld ); private: enum Textures { TEXTURE_IDS, TEXTURE_WORLD, TEXTURE_OBJECT, TEXTURES, }; GLuint m_leavesGroup; GLuint m_levelsGroup; Programs m_programs; GLuint m_scratchGL; GLuint m_texsGL[TEXTURES]; }; #endif ================================================ FILE: xplode-animation.comp.glsl ================================================ /* * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION * SPDX-License-Identifier: Apache-2.0 */ #version 430 /**/ #ifndef USE_COMPUTE #define USE_COMPUTE 1 #endif #define MATRIX_BASE 0 #define MATRIX_INVTRANS 1 #define MATRIX_BEGIN_WORLD 0 #define MATRIX_BEGIN_OBJECT 2 #define MATRICES 4 layout(location=0) uniform float scale; #if USE_COMPUTE layout (local_size_x = 256) in; layout(location=1) uniform int count; #define BAILOUT gl_GlobalInvocationID.x >= count int self = int(gl_GlobalInvocationID.x); #else #define BAILOUT false int self = int(gl_VertexID); #endif layout(std430,binding=0) restrict buffer matricesBuffer { mat4 matrices[]; }; layout(binding=0) uniform samplerBuffer texMatricesOrig; mat4 getMatrix(samplerBuffer texbuffer, int idx) { return mat4(texelFetch(texbuffer,idx*4 + 0), texelFetch(texbuffer,idx*4 + 1), texelFetch(texbuffer,idx*4 + 2), texelFetch(texbuffer,idx*4 + 3)); } mat4 getObjectMatrixOrig(int idx, int what){ return getMatrix(texMatricesOrig,idx*MATRICES + what + MATRIX_BEGIN_OBJECT); }; mat4 getWorldMatrixOrig(int idx, int what){ return getMatrix(texMatricesOrig,idx*MATRICES + what + MATRIX_BEGIN_WORLD); }; void main() { if (BAILOUT){ return; } mat4 matrixOrig = getObjectMatrixOrig(self,MATRIX_BASE); mat4 matrixITOrig = getObjectMatrixOrig(self,MATRIX_INVTRANS); #if 0 // compiler bug mat4 matrixBase = matrixOrig; mat4 matrixIT = matrixITOrig; matrixBase[3].xyz *= scale; matrixIT[0].w /= scale; matrixIT[1].w /= scale; matrixIT[2].w /= scale; #else vec4 basescale = vec4(scale,scale,scale,1); vec4 itscale = vec4(1,1,1,1/scale); mat4 matrixBase = mat4(matrixOrig[0], matrixOrig[1], matrixOrig[2], matrixOrig[3]*basescale); mat4 matrixIT = mat4(matrixITOrig[0]*itscale,matrixITOrig[1]*itscale,matrixITOrig[2]*itscale,matrixITOrig[3]); #endif matrices[self*MATRICES + MATRIX_BEGIN_OBJECT + MATRIX_BASE] = matrixBase; matrices[self*MATRICES + MATRIX_BEGIN_OBJECT + MATRIX_INVTRANS] = matrixIT; }