Full Code of anilshanbhag/gpu-topk for AI

master eb53fe33e7c0 cached
165 files
3.0 MB
801.3k tokens
37 symbols
1 requests
Download .txt
Showing preview only (3,204K chars total). Download the full file or copy to clipboard to get everything.
Repository: anilshanbhag/gpu-topk
Branch: master
Commit: eb53fe33e7c0
Files: 165
Total size: 3.0 MB

Directory structure:
gitextract_eoi4a78r/

├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── external/
│   └── cub/
│       ├── .cproject
│       ├── .project
│       ├── .settings/
│       │   ├── .gitignore
│       │   ├── org.eclipse.cdt.codan.core.prefs
│       │   ├── org.eclipse.cdt.core.prefs
│       │   ├── org.eclipse.cdt.ui.prefs
│       │   └── org.eclipse.core.runtime.prefs
│       ├── CHANGE_LOG.TXT
│       ├── LICENSE.TXT
│       ├── README.md
│       ├── common.mk
│       ├── cub/
│       │   ├── agent/
│       │   │   ├── agent_histogram.cuh
│       │   │   ├── agent_radix_sort_downsweep.cuh
│       │   │   ├── agent_radix_sort_upsweep.cuh
│       │   │   ├── agent_reduce.cuh
│       │   │   ├── agent_reduce_by_key.cuh
│       │   │   ├── agent_rle.cuh
│       │   │   ├── agent_scan.cuh
│       │   │   ├── agent_segment_fixup.cuh
│       │   │   ├── agent_select_if.cuh
│       │   │   ├── agent_spmv_orig.cuh
│       │   │   └── single_pass_scan_operators.cuh
│       │   ├── block/
│       │   │   ├── block_adjacent_difference.cuh
│       │   │   ├── block_discontinuity.cuh
│       │   │   ├── block_exchange.cuh
│       │   │   ├── block_histogram.cuh
│       │   │   ├── block_load.cuh
│       │   │   ├── block_radix_rank.cuh
│       │   │   ├── block_radix_sort.cuh
│       │   │   ├── block_raking_layout.cuh
│       │   │   ├── block_reduce.cuh
│       │   │   ├── block_scan.cuh
│       │   │   ├── block_shuffle.cuh
│       │   │   ├── block_store.cuh
│       │   │   └── specializations/
│       │   │       ├── block_histogram_atomic.cuh
│       │   │       ├── block_histogram_sort.cuh
│       │   │       ├── block_reduce_raking.cuh
│       │   │       ├── block_reduce_raking_commutative_only.cuh
│       │   │       ├── block_reduce_warp_reductions.cuh
│       │   │       ├── block_scan_raking.cuh
│       │   │       ├── block_scan_warp_scans.cuh
│       │   │       ├── block_scan_warp_scans2.cuh
│       │   │       └── block_scan_warp_scans3.cuh
│       │   ├── cub.cuh
│       │   ├── device/
│       │   │   ├── device_histogram.cuh
│       │   │   ├── device_partition.cuh
│       │   │   ├── device_radix_sort.cuh
│       │   │   ├── device_reduce.cuh
│       │   │   ├── device_run_length_encode.cuh
│       │   │   ├── device_scan.cuh
│       │   │   ├── device_segmented_radix_sort.cuh
│       │   │   ├── device_segmented_reduce.cuh
│       │   │   ├── device_select.cuh
│       │   │   ├── device_spmv.cuh
│       │   │   └── dispatch/
│       │   │       ├── dispatch_histogram.cuh
│       │   │       ├── dispatch_radix_sort.cuh
│       │   │       ├── dispatch_reduce.cuh
│       │   │       ├── dispatch_reduce_by_key.cuh
│       │   │       ├── dispatch_rle.cuh
│       │   │       ├── dispatch_scan.cuh
│       │   │       ├── dispatch_select_if.cuh
│       │   │       └── dispatch_spmv_orig.cuh
│       │   ├── grid/
│       │   │   ├── grid_barrier.cuh
│       │   │   ├── grid_even_share.cuh
│       │   │   ├── grid_mapping.cuh
│       │   │   └── grid_queue.cuh
│       │   ├── host/
│       │   │   └── mutex.cuh
│       │   ├── iterator/
│       │   │   ├── arg_index_input_iterator.cuh
│       │   │   ├── cache_modified_input_iterator.cuh
│       │   │   ├── cache_modified_output_iterator.cuh
│       │   │   ├── constant_input_iterator.cuh
│       │   │   ├── counting_input_iterator.cuh
│       │   │   ├── discard_output_iterator.cuh
│       │   │   ├── tex_obj_input_iterator.cuh
│       │   │   ├── tex_ref_input_iterator.cuh
│       │   │   └── transform_input_iterator.cuh
│       │   ├── thread/
│       │   │   ├── thread_load.cuh
│       │   │   ├── thread_operators.cuh
│       │   │   ├── thread_reduce.cuh
│       │   │   ├── thread_scan.cuh
│       │   │   ├── thread_search.cuh
│       │   │   └── thread_store.cuh
│       │   ├── util_allocator.cuh
│       │   ├── util_arch.cuh
│       │   ├── util_debug.cuh
│       │   ├── util_device.cuh
│       │   ├── util_macro.cuh
│       │   ├── util_namespace.cuh
│       │   ├── util_ptx.cuh
│       │   ├── util_type.cuh
│       │   └── warp/
│       │       ├── specializations/
│       │       │   ├── warp_reduce_shfl.cuh
│       │       │   ├── warp_reduce_smem.cuh
│       │       │   ├── warp_scan_shfl.cuh
│       │       │   └── warp_scan_smem.cuh
│       │       ├── warp_reduce.cuh
│       │       └── warp_scan.cuh
│       ├── eclipse code style profile.xml
│       ├── examples/
│       │   ├── block/
│       │   │   ├── .gitignore
│       │   │   ├── Makefile
│       │   │   ├── example_block_radix_sort.cu
│       │   │   ├── example_block_reduce.cu
│       │   │   ├── example_block_scan.cu
│       │   │   └── reduce_by_key.cu
│       │   └── device/
│       │       ├── .gitignore
│       │       ├── Makefile
│       │       ├── example_device_partition_flagged.cu
│       │       ├── example_device_partition_if.cu
│       │       ├── example_device_radix_sort.cu
│       │       ├── example_device_reduce.cu
│       │       ├── example_device_scan.cu
│       │       ├── example_device_select_flagged.cu
│       │       ├── example_device_select_if.cu
│       │       ├── example_device_select_unique.cu
│       │       └── example_device_sort_find_non_trivial_runs.cu
│       ├── experimental/
│       │   ├── .gitignore
│       │   ├── Makefile
│       │   ├── defunct/
│       │   │   ├── example_coo_spmv.cu
│       │   │   └── test_device_seg_reduce.cu
│       │   ├── histogram/
│       │   │   ├── histogram_cub.h
│       │   │   ├── histogram_gmem_atomics.h
│       │   │   └── histogram_smem_atomics.h
│       │   ├── histogram_compare.cu
│       │   ├── sparse_matrix.h
│       │   ├── spmv_compare.cu
│       │   └── spmv_script.sh
│       ├── test/
│       │   ├── .gitignore
│       │   ├── Makefile
│       │   ├── link_a.cu
│       │   ├── link_b.cu
│       │   ├── link_main.cpp
│       │   ├── mersenne.h
│       │   ├── test_allocator.cu
│       │   ├── test_block_histogram.cu
│       │   ├── test_block_load_store.cu
│       │   ├── test_block_radix_sort.cu
│       │   ├── test_block_reduce.cu
│       │   ├── test_block_scan.cu
│       │   ├── test_device_histogram.cu
│       │   ├── test_device_radix_sort.cu
│       │   ├── test_device_reduce.cu
│       │   ├── test_device_reduce_by_key.cu
│       │   ├── test_device_run_length_encode.cu
│       │   ├── test_device_scan.cu
│       │   ├── test_device_select_if.cu
│       │   ├── test_device_select_unique.cu
│       │   ├── test_grid_barrier.cu
│       │   ├── test_iterator.cu
│       │   ├── test_util.h
│       │   ├── test_warp_reduce.cu
│       │   └── test_warp_scan.cu
│       └── tune/
│           ├── .gitignore
│           ├── Makefile
│           └── tune_device_reduce.cu
├── src/
│   ├── bitonicTopK.cuh
│   ├── radixSelectTopK.cuh
│   ├── sharedmem.cuh
│   ├── sortTopK.cuh
│   └── sortingNetwork.cuh
└── test/
    ├── compareTopKAlgorithms.cu
    ├── generateProblems.cuh
    └── printFunctions.cuh

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
*.o
compareTopKAlgorithms


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2016 Anil Shanbhag

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: Makefile
================================================
OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/")

CUDA_PATH       ?= /usr/local/cuda-10.0
CUDA_INC_PATH   ?= $(CUDA_PATH)/include
CUDA_BIN_PATH   ?= $(CUDA_PATH)/bin

ifeq ($(OS_SIZE),32)
    CUDA_LIB_PATH  ?= $(CUDA_PATH)/lib
else
    CUDA_LIB_PATH  ?= $(CUDA_PATH)/lib64
endif

NVCC            ?= $(CUDA_BIN_PATH)/nvcc

# Make sure to choose the right SM for your device
GENCODE_SM70    := -gencode arch=compute_70,code=sm_70
GENCODE_SM50    := -gencode arch=compute_50,code=sm_50
GENCODE_SM35    := -gencode arch=compute_35,code=sm_35
GENCODE_FLAGS   := $(GENCODE_SM50)

LDFLAGS   := -lcudart -lrt -lcurand -lm
CFLAGS := -O3 -lineinfo -Xptxas="-dlcm=ca -v"
CUB_DIR = ./external/cub/
SOURCE_DIR = ./src/
TEST_DIR = ./test/
INCLUDES := -I$(CUB_DIR) -I$(SOURCE_DIR) -I$(TEST_DIR)

#obj/%.o: src/%.cu $(DEPS)
#	$(NVCC) $(CFLAGS) -I. $(INCLUDES) -g $(GENCODE_FLAGS) $< -o $@

compareTopKAlgorithms: test/compareTopKAlgorithms.cu src/bitonicTopK.cuh src/radixSelectTopK.cuh src/sortTopK.cuh
	$(NVCC) $(CFLAGS) $(INCLUDES) test/compareTopKAlgorithms.cu $(LDFLAGS) -o compareTopKAlgorithms

clean:
	rm -rf *.o compareTopKAlgorithms


================================================
FILE: README.md
================================================
GPU-TopK
========

GPU-TopK implements efficient top-k runtimes for GPUs. The specific problem solved is given a array of entries (key-only or key-value), find the top-k entries based on value of key. 

The package implements the following routines:

* Bitonic Top-K: reduction algorithm based on bitonic sort
* Radix Select Top-K: reduction of radix sort to compute top-k
* Sort Top-K: sorts the entire array and selects the top-k entries

For full details of the algorithms, see our [paper](http://anilshanbhag.in/static/papers/gputopk_sigmod18.pdf)

```
@inproceedings{shanbhag2018efficient,
  title={Efficient Top-K query processing on massively parallel hardware},
  author={Shanbhag, Anil and Pirk, Holger and Madden, Samuel},
  booktitle={Proceedings of the 2018 International Conference on Management of Data},
  pages={1557--1570},
  year={2018},
  organization={ACM}
}
```

Usage
----

The individual implementations can used directly as standalone header files. For example, to use `RadixSelectTopK`, 

```
#include "radixSelectTopK.cuh"
...
float* d_keys_in; // device pointer to the array
uint num_items;   // number of entries in the array
uint k;           // 
float* d_keys_out;// device pointer to the result array (needs to be of size atleast k)
CachingDeviceAllocator&  g_allocator // Cub memory allocator 
radixSelectTopK<float>(d_keys_in, num_items, k, d_keys_out, g_allocator);
```

We have implemented a testutil called `compareTopKAlgorithms` that can be used to benchmark the different algorithms. The testutil lets you test performance of the algorithms across standard data types and certain pre-defined distributions. To run the testutil:

```
# Edit Makefile to select the right Gencode for your GPU
# For example: for V100 GPU set GENCODE_FLAGS to use GENCODE_SM70

make compareTopKAlgorithms
./compareTopKAlgorithms
```

Here is an example tracelog:
```
$ ./compareTopKAlgorithms
Please enter the type of value you want to test:
1-float
2-double
3-uint
1
Please enter distribution type: 0
Please enter K: 32
Please enter number of tests to run per K: 3
Please enter start power (dataset size starts at 2^start)(max val: 29): 29
Please enter stop power (dataset size stops at 2^stop)(max val: 29): 29
NOW STARTING A NEW K

The distribution is: UNIFORM FLOATS
Running test 1 of 3 for size: 536870912 and k: 32
TESTING: 0 Sort
TESTING: 2 Bitonic TopK
TESTING: 1 Radix Select
Running test 2 of 3 for size: 536870912 and k: 32
TESTING: 2 Bitonic TopK
TESTING: 0 Sort
TESTING: 1 Radix Select
Running test 3 of 3 for size: 536870912 and k: 32
TESTING: 0 Sort
TESTING: 1 Radix Select
TESTING: 2 Bitonic TopK


Sort                 averaged: 219.273071 ms
Radix Select         averaged: 132.391724 ms
Bitonic TopK         averaged: 134.959854 ms
Sort                 minimum: 215.583801 ms
Radix Select         minimum: 63.751999 ms
Bitonic TopK         minimum: 28.718592 ms
Sort won 0 times
Radix Select won 1 times
Bitonic TopK won 2 times
```

For benchmarking, it is advisable to run the suite more than once inorder to have GPU warmed up. To see the full set of distributions implemented, check test/generateProblems.cuh. 

Known Issues
-----------
1. Currently works for key-only, we will add key-value soon
2. Works for data set size upto 2^29. This is due to inherent limitations of maximum array size on GPUs.
3. Tested to work well on Nvidia Maxwell architecture and upwards (may not work on K80 GPU - you are welcome to submit a patch). 
4. BitonicTopK works only for K<=256, if you are testing K > 256, make sure to comment BitonicTopK


================================================
FILE: external/cub/.cproject
================================================
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
	<storageModule moduleId="org.eclipse.cdt.core.settings">
		<cconfiguration id="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311">
			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311" moduleId="org.eclipse.cdt.core.settings" name="Default">
				<externalSettings>
					<externalSetting languages="cdt.managedbuild.tool.gnu.c.compiler.input.cygwin.1945715073"/>
				</externalSettings>
				<extensions>
					<extension id="org.eclipse.cdt.core.Cygwin_PE" point="org.eclipse.cdt.core.BinaryParser"/>
					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
				</extensions>
			</storageModule>
			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
				<configuration artifactName="B40CTrunk" buildProperties="" description="" id="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311" name="Default" parent="org.eclipse.cdt.build.core.emptycfg">
					<folderInfo id="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113" name="/" resourcePath="">
						<toolChain id="cdt.managedbuild.toolchain.gnu.cygwin.base.481495889" name="Cygwin GCC" superClass="cdt.managedbuild.toolchain.gnu.cygwin.base">
							<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.Cygwin_PE" id="cdt.managedbuild.target.gnu.platform.cygwin.base.100038061" name="Debug Platform" osList="win32" superClass="cdt.managedbuild.target.gnu.platform.cygwin.base"/>
							<builder buildPath="${workspace_loc:/PrivateCub}/Default" id="cdt.managedbuild.target.gnu.builder.cygwin.base.412463247" keepEnvironmentInBuildfile="false" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.cygwin.base"/>
							<tool id="cdt.managedbuild.tool.gnu.assembler.cygwin.base.996758685" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.cygwin.base">
								<option id="gnu.both.asm.option.include.paths.900454792" name="Include paths (-I)" superClass="gnu.both.asm.option.include.paths" valueType="includePath">
									<listOptionValue builtIn="false" value="&quot;${CUDA_PATH}/include/device_launch_parameters.h&quot;"/>
									<listOptionValue builtIn="false" value="&quot;${CUDA_PATH}/include/crt/device_functions.h&quot;"/>
									<listOptionValue builtIn="false" value="&quot;${CUDA_PATH}/include&quot;"/>
								</option>
								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.221302756" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
							</tool>
							<tool id="cdt.managedbuild.tool.gnu.archiver.cygwin.base.1353653670" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.cygwin.base"/>
							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.cygwin.base.1401626953" name="Cygwin C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.cygwin.base">
								<option id="gnu.cpp.compiler.option.include.paths.1909687606" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
									<listOptionValue builtIn="false" value="&quot;${CUDA_PATH}/include/device_launch_parameters.h&quot;"/>
									<listOptionValue builtIn="false" value="&quot;${CUDA_PATH}/include/device_functions.h&quot;"/>
									<listOptionValue builtIn="false" value="&quot;${CUDA_PATH}/include&quot;"/>
								</option>
								<option id="gnu.cpp.compiler.option.preprocessor.def.1893619952" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
									<listOptionValue builtIn="false" value="__device__"/>
									<listOptionValue builtIn="false" value="__global__"/>
									<listOptionValue builtIn="false" value="__shared__"/>
									<listOptionValue builtIn="false" value="__forceinline__"/>
									<listOptionValue builtIn="false" value="__host__"/>
									<listOptionValue builtIn="false" value="__device_builtin__"/>
									<listOptionValue builtIn="false" value="__device_builtin_texture_type__"/>
									<listOptionValue builtIn="false" value="TEST_ARCH=200"/>
									<listOptionValue builtIn="false" value="__launch_bounds__(...)"/>
									<listOptionValue builtIn="false" value="__align__(...)"/>
									<listOptionValue builtIn="false" value="__CUDA_ARCH__=350"/>
									<listOptionValue builtIn="false" value="__CUDACC__=1"/>
								</option>
								<option id="gnu.cpp.compiler.option.dialect.std.49639338" name="Language standard" superClass="gnu.cpp.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.cpp.compiler.dialect.default" valueType="enumerated"/>
								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.cygwin.1708330939" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input.cygwin"/>
							</tool>
							<tool id="cdt.managedbuild.tool.gnu.c.compiler.cygwin.base.1940954787" name="Cygwin C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.cygwin.base">
								<option id="gnu.c.compiler.option.include.paths.1945618846" name="Include paths (-I)" superClass="gnu.c.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
									<listOptionValue builtIn="false" value="&quot;${CUDA_PATH}/include/device_launch_parameters.h&quot;"/>
									<listOptionValue builtIn="false" value="&quot;${CUDA_PATH}/include/crt/device_functions.h&quot;"/>
									<listOptionValue builtIn="false" value="&quot;${CUDA_PATH}/include&quot;"/>
								</option>
								<option id="gnu.c.compiler.option.preprocessor.def.symbols.1005509663" name="Defined symbols (-D)" superClass="gnu.c.compiler.option.preprocessor.def.symbols" useByScannerDiscovery="false" valueType="definedSymbols">
									<listOptionValue builtIn="false" value="__device__"/>
									<listOptionValue builtIn="false" value="__global__"/>
									<listOptionValue builtIn="false" value="__shared__"/>
									<listOptionValue builtIn="false" value="__forceinline__"/>
									<listOptionValue builtIn="false" value="__host__"/>
									<listOptionValue builtIn="false" value="__device_builtin__"/>
									<listOptionValue builtIn="false" value="__device_builtin_texture_type__"/>
									<listOptionValue builtIn="false" value="TEST_ARCH=200"/>
									<listOptionValue builtIn="false" value="__launch_bounds__(...)"/>
									<listOptionValue builtIn="false" value="__align__(...)"/>
									<listOptionValue builtIn="false" value="__CUDA_ARCH__=350"/>
									<listOptionValue builtIn="false" value="__CUDACC__=1"/>
								</option>
								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.cygwin.469104331" superClass="cdt.managedbuild.tool.gnu.c.compiler.input.cygwin"/>
							</tool>
							<tool id="cdt.managedbuild.tool.gnu.c.linker.cygwin.base.1600375047" name="Cygwin C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.cygwin.base"/>
							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.cygwin.base.1176124124" name="Cygwin C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.cygwin.base">
								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.958378367" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
								</inputType>
							</tool>
						</toolChain>
					</folderInfo>
				</configuration>
			</storageModule>
			<storageModule moduleId="org.eclipse.cdt.core.pathentry"/>
			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
			<storageModule moduleId="org.eclipse.cdt.core.language.mapping"/>
			<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
		</cconfiguration>
	</storageModule>
	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
		<project id="B40CTrunk.null.1404415602" name="B40CTrunk"/>
	</storageModule>
	<storageModule moduleId="refreshScope" versionNumber="2">
		<configuration configurationName="Default">
			<resource resourceType="PROJECT" workspacePath="/GIT_CUB"/>
		</configuration>
	</storageModule>
	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
	<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings">
		<doc-comment-owner id="org.eclipse.cdt.ui.doxygen">
			<path value=""/>
		</doc-comment-owner>
	</storageModule>
	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
	<storageModule moduleId="scannerConfiguration">
		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
		<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
			<buildOutputProvider>
				<openAction enabled="true" filePath=""/>
				<parser enabled="true"/>
			</buildOutputProvider>
			<scannerInfoProvider id="specsFile">
				<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
				<parser enabled="true"/>
			</scannerInfoProvider>
		</profile>
		<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
			<buildOutputProvider>
				<openAction enabled="true" filePath=""/>
				<parser enabled="true"/>
			</buildOutputProvider>
			<scannerInfoProvider id="makefileGenerator">
				<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
				<parser enabled="true"/>
			</scannerInfoProvider>
		</profile>
		<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
			<buildOutputProvider>
				<openAction enabled="true" filePath=""/>
				<parser enabled="true"/>
			</buildOutputProvider>
			<scannerInfoProvider id="specsFile">
				<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
				<parser enabled="true"/>
			</scannerInfoProvider>
		</profile>
		<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
			<buildOutputProvider>
				<openAction enabled="true" filePath=""/>
				<parser enabled="true"/>
			</buildOutputProvider>
			<scannerInfoProvider id="specsFile">
				<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
				<parser enabled="true"/>
			</scannerInfoProvider>
		</profile>
		<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
			<buildOutputProvider>
				<openAction enabled="true" filePath=""/>
				<parser enabled="true"/>
			</buildOutputProvider>
			<scannerInfoProvider id="specsFile">
				<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
				<parser enabled="true"/>
			</scannerInfoProvider>
		</profile>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.c.compiler.cygwin.base.1940954787;cdt.managedbuild.tool.gnu.c.compiler.input.cygwin.469104331">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.cpp.compiler.cygwin.base.1665401269;cdt.managedbuild.tool.gnu.cpp.compiler.input.cygwin.494265807">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.c.compiler.cygwin.base.43985841;cdt.managedbuild.tool.gnu.c.compiler.input.cygwin.1045483126">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.c.compiler.cygwin.base.1240277003;cdt.managedbuild.tool.gnu.c.compiler.input.cygwin.1264397663">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.cpp.compiler.cygwin.base.459535216;cdt.managedbuild.tool.gnu.cpp.compiler.input.cygwin.2120860882">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.c.compiler.cygwin.base.1758599759;cdt.managedbuild.tool.gnu.c.compiler.input.cygwin.466964704">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.cpp.compiler.cygwin.base.1401626953;cdt.managedbuild.tool.gnu.cpp.compiler.input.cygwin.1708330939">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.c.compiler.cygwin.base.1671954574;cdt.managedbuild.tool.gnu.c.compiler.input.cygwin.304556051">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.c.compiler.cygwin.base.2110267806;cdt.managedbuild.tool.gnu.c.compiler.input.cygwin.903720746">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.cpp.compiler.cygwin.base.1850250798;cdt.managedbuild.tool.gnu.cpp.compiler.input.cygwin.1752562149">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.c.compiler.cygwin.base.1296776241;cdt.managedbuild.tool.gnu.c.compiler.input.cygwin.268633283">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.cpp.compiler.cygwin.base.265387950;cdt.managedbuild.tool.gnu.cpp.compiler.input.cygwin.563557831">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.c.compiler.cygwin.base.629007265;cdt.managedbuild.tool.gnu.c.compiler.input.cygwin.450470600">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.cpp.compiler.cygwin.base.2085396856;cdt.managedbuild.tool.gnu.cpp.compiler.input.cygwin.1885998497">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.cpp.compiler.cygwin.base.652522784;cdt.managedbuild.tool.gnu.cpp.compiler.input.cygwin.1098348915">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.c.compiler.cygwin.base.1149397878;cdt.managedbuild.tool.gnu.c.compiler.input.cygwin.1156849140">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.cpp.compiler.cygwin.base.586941236;cdt.managedbuild.tool.gnu.cpp.compiler.input.cygwin.1654082299">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.cpp.compiler.cygwin.base.1214991320;cdt.managedbuild.tool.gnu.cpp.compiler.input.cygwin.332043455">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.cpp.compiler.cygwin.base.440957653;cdt.managedbuild.tool.gnu.cpp.compiler.input.cygwin.1117446939">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
		<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311;cdt.managedbuild.toolchain.gnu.cygwin.base.1260156311.1722659113;cdt.managedbuild.tool.gnu.c.compiler.cygwin.base.158380621;cdt.managedbuild.tool.gnu.c.compiler.input.cygwin.1945715073">
			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC"/>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="makefileGenerator">
					<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
			<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
				<buildOutputProvider>
					<openAction enabled="true" filePath=""/>
					<parser enabled="true"/>
				</buildOutputProvider>
				<scannerInfoProvider id="specsFile">
					<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
					<parser enabled="true"/>
				</scannerInfoProvider>
			</profile>
		</scannerConfigBuildInfo>
	</storageModule>
</cproject>


================================================
FILE: external/cub/.project
================================================
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
	<name>GIT_CUB</name>
	<comment></comment>
	<projects>
	</projects>
	<buildSpec>
		<buildCommand>
			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
			<triggers>clean,full,incremental,</triggers>
			<arguments>
			</arguments>
		</buildCommand>
		<buildCommand>
			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
			<triggers>full,incremental,</triggers>
			<arguments>
			</arguments>
		</buildCommand>
	</buildSpec>
	<natures>
		<nature>org.eclipse.cdt.core.cnature</nature>
		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
		<nature>org.eclipse.cdt.core.ccnature</nature>
	</natures>
</projectDescription>


================================================
FILE: external/cub/.settings/.gitignore
================================================
/language.settings.xml


================================================
FILE: external/cub/.settings/org.eclipse.cdt.codan.core.prefs
================================================
eclipse.preferences.version=1
org.eclipse.cdt.codan.checkers.errnoreturn=Warning
org.eclipse.cdt.codan.checkers.errnoreturn.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},implicit\=>false}
org.eclipse.cdt.codan.checkers.errreturnvalue=Error
org.eclipse.cdt.codan.checkers.errreturnvalue.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.checkers.nocommentinside=-Error
org.eclipse.cdt.codan.checkers.nocommentinside.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.checkers.nolinecomment=-Error
org.eclipse.cdt.codan.checkers.nolinecomment.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.checkers.noreturn=Error
org.eclipse.cdt.codan.checkers.noreturn.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},implicit\=>false}
org.eclipse.cdt.codan.internal.checkers.AbstractClassCreation=Error
org.eclipse.cdt.codan.internal.checkers.AbstractClassCreation.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.AmbiguousProblem=Error
org.eclipse.cdt.codan.internal.checkers.AmbiguousProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.AssignmentInConditionProblem=Warning
org.eclipse.cdt.codan.internal.checkers.AssignmentInConditionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.AssignmentToItselfProblem=Error
org.eclipse.cdt.codan.internal.checkers.AssignmentToItselfProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.CaseBreakProblem=Warning
org.eclipse.cdt.codan.internal.checkers.CaseBreakProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},no_break_comment\=>"no break",last_case_param\=>true,empty_case_param\=>false}
org.eclipse.cdt.codan.internal.checkers.CatchByReference=Warning
org.eclipse.cdt.codan.internal.checkers.CatchByReference.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},unknown\=>false,exceptions\=>()}
org.eclipse.cdt.codan.internal.checkers.CircularReferenceProblem=Error
org.eclipse.cdt.codan.internal.checkers.CircularReferenceProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.ClassMembersInitialization=Warning
org.eclipse.cdt.codan.internal.checkers.ClassMembersInitialization.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},skip\=>true}
org.eclipse.cdt.codan.internal.checkers.FieldResolutionProblem=Error
org.eclipse.cdt.codan.internal.checkers.FieldResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.FunctionResolutionProblem=Error
org.eclipse.cdt.codan.internal.checkers.FunctionResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.InvalidArguments=Error
org.eclipse.cdt.codan.internal.checkers.InvalidArguments.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.InvalidTemplateArgumentsProblem=Error
org.eclipse.cdt.codan.internal.checkers.InvalidTemplateArgumentsProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.LabelStatementNotFoundProblem=Error
org.eclipse.cdt.codan.internal.checkers.LabelStatementNotFoundProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.MemberDeclarationNotFoundProblem=Error
org.eclipse.cdt.codan.internal.checkers.MemberDeclarationNotFoundProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.MethodResolutionProblem=Error
org.eclipse.cdt.codan.internal.checkers.MethodResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.NamingConventionFunctionChecker=-Info
org.eclipse.cdt.codan.internal.checkers.NamingConventionFunctionChecker.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},pattern\=>"^[a-z]",macro\=>true,exceptions\=>()}
org.eclipse.cdt.codan.internal.checkers.NonVirtualDestructorProblem=Warning
org.eclipse.cdt.codan.internal.checkers.NonVirtualDestructorProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.OverloadProblem=Error
org.eclipse.cdt.codan.internal.checkers.OverloadProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.RedeclarationProblem=Error
org.eclipse.cdt.codan.internal.checkers.RedeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.RedefinitionProblem=Error
org.eclipse.cdt.codan.internal.checkers.RedefinitionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.ReturnStyleProblem=-Warning
org.eclipse.cdt.codan.internal.checkers.ReturnStyleProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.ScanfFormatStringSecurityProblem=-Warning
org.eclipse.cdt.codan.internal.checkers.ScanfFormatStringSecurityProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.StatementHasNoEffectProblem=Warning
org.eclipse.cdt.codan.internal.checkers.StatementHasNoEffectProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},macro\=>true,exceptions\=>()}
org.eclipse.cdt.codan.internal.checkers.SuggestedParenthesisProblem=Warning
org.eclipse.cdt.codan.internal.checkers.SuggestedParenthesisProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},paramNot\=>false}
org.eclipse.cdt.codan.internal.checkers.SuspiciousSemicolonProblem=Warning
org.eclipse.cdt.codan.internal.checkers.SuspiciousSemicolonProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},else\=>false,afterelse\=>false}
org.eclipse.cdt.codan.internal.checkers.TypeResolutionProblem=Error
org.eclipse.cdt.codan.internal.checkers.TypeResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.UnusedFunctionDeclarationProblem=Warning
org.eclipse.cdt.codan.internal.checkers.UnusedFunctionDeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},macro\=>true}
org.eclipse.cdt.codan.internal.checkers.UnusedStaticFunctionProblem=Warning
org.eclipse.cdt.codan.internal.checkers.UnusedStaticFunctionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},macro\=>true}
org.eclipse.cdt.codan.internal.checkers.UnusedVariableDeclarationProblem=Warning
org.eclipse.cdt.codan.internal.checkers.UnusedVariableDeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},macro\=>true,exceptions\=>("@(\#)","$Id")}
org.eclipse.cdt.codan.internal.checkers.VariableResolutionProblem=Error
org.eclipse.cdt.codan.internal.checkers.VariableResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
useParentScope=false


================================================
FILE: external/cub/.settings/org.eclipse.cdt.core.prefs
================================================
eclipse.preferences.version=1
indexer/indexAllFiles=true
indexer/indexAllHeaderVersions=false
indexer/indexAllVersionsSpecificHeaders=
indexer/indexOnOpen=false
indexer/indexUnusedHeadersWithAlternateLang=false
indexer/indexUnusedHeadersWithDefaultLang=true
indexer/indexerId=org.eclipse.cdt.core.fastIndexer
indexer/skipFilesLargerThanMB=8
indexer/skipImplicitReferences=false
indexer/skipIncludedFilesLargerThanMB=16
indexer/skipMacroReferences=false
indexer/skipReferences=false
indexer/skipTypeReferences=false
indexer/useHeuristicIncludeResolution=true
org.eclipse.cdt.core.formatter.alignment_for_arguments_in_method_invocation=16
org.eclipse.cdt.core.formatter.alignment_for_assignment=16
org.eclipse.cdt.core.formatter.alignment_for_base_clause_in_type_declaration=48
org.eclipse.cdt.core.formatter.alignment_for_binary_expression=16
org.eclipse.cdt.core.formatter.alignment_for_compact_if=0
org.eclipse.cdt.core.formatter.alignment_for_conditional_expression=48
org.eclipse.cdt.core.formatter.alignment_for_conditional_expression_chain=18
org.eclipse.cdt.core.formatter.alignment_for_constructor_initializer_list=0
org.eclipse.cdt.core.formatter.alignment_for_declarator_list=16
org.eclipse.cdt.core.formatter.alignment_for_enumerator_list=48
org.eclipse.cdt.core.formatter.alignment_for_expression_list=0
org.eclipse.cdt.core.formatter.alignment_for_expressions_in_array_initializer=16
org.eclipse.cdt.core.formatter.alignment_for_member_access=0
org.eclipse.cdt.core.formatter.alignment_for_overloaded_left_shift_chain=16
org.eclipse.cdt.core.formatter.alignment_for_parameters_in_method_declaration=48
org.eclipse.cdt.core.formatter.alignment_for_throws_clause_in_method_declaration=48
org.eclipse.cdt.core.formatter.brace_position_for_array_initializer=next_line
org.eclipse.cdt.core.formatter.brace_position_for_block=next_line
org.eclipse.cdt.core.formatter.brace_position_for_block_in_case=end_of_line
org.eclipse.cdt.core.formatter.brace_position_for_method_declaration=next_line
org.eclipse.cdt.core.formatter.brace_position_for_namespace_declaration=end_of_line
org.eclipse.cdt.core.formatter.brace_position_for_switch=end_of_line
org.eclipse.cdt.core.formatter.brace_position_for_type_declaration=next_line
org.eclipse.cdt.core.formatter.comment.min_distance_between_code_and_line_comment=1
org.eclipse.cdt.core.formatter.comment.never_indent_line_comments_on_first_column=true
org.eclipse.cdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=true
org.eclipse.cdt.core.formatter.compact_else_if=true
org.eclipse.cdt.core.formatter.continuation_indentation=1
org.eclipse.cdt.core.formatter.continuation_indentation_for_array_initializer=1
org.eclipse.cdt.core.formatter.format_guardian_clause_on_one_line=false
org.eclipse.cdt.core.formatter.indent_access_specifier_compare_to_type_header=false
org.eclipse.cdt.core.formatter.indent_access_specifier_extra_spaces=0
org.eclipse.cdt.core.formatter.indent_body_declarations_compare_to_access_specifier=true
org.eclipse.cdt.core.formatter.indent_body_declarations_compare_to_namespace_header=false
org.eclipse.cdt.core.formatter.indent_breaks_compare_to_cases=true
org.eclipse.cdt.core.formatter.indent_declaration_compare_to_template_header=false
org.eclipse.cdt.core.formatter.indent_empty_lines=false
org.eclipse.cdt.core.formatter.indent_statements_compare_to_block=true
org.eclipse.cdt.core.formatter.indent_statements_compare_to_body=true
org.eclipse.cdt.core.formatter.indent_switchstatements_compare_to_cases=true
org.eclipse.cdt.core.formatter.indent_switchstatements_compare_to_switch=false
org.eclipse.cdt.core.formatter.indentation.size=4
org.eclipse.cdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_after_template_declaration=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_before_catch_in_try_statement=insert
org.eclipse.cdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_before_colon_in_constructor_initializer_list=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_before_else_in_if_statement=insert
org.eclipse.cdt.core.formatter.insert_new_line_before_identifier_in_function_declaration=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_in_empty_block=insert
org.eclipse.cdt.core.formatter.insert_space_after_assignment_operator=insert
org.eclipse.cdt.core.formatter.insert_space_after_binary_operator=insert
org.eclipse.cdt.core.formatter.insert_space_after_closing_angle_bracket_in_template_arguments=insert
org.eclipse.cdt.core.formatter.insert_space_after_closing_angle_bracket_in_template_parameters=insert
org.eclipse.cdt.core.formatter.insert_space_after_closing_brace_in_block=insert
org.eclipse.cdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
org.eclipse.cdt.core.formatter.insert_space_after_colon_in_base_clause=insert
org.eclipse.cdt.core.formatter.insert_space_after_colon_in_case=insert
org.eclipse.cdt.core.formatter.insert_space_after_colon_in_conditional=insert
org.eclipse.cdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_base_types=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_declarator_list=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_expression_list=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_template_arguments=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_template_parameters=insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_angle_bracket_in_template_arguments=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_angle_bracket_in_template_parameters=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_bracket=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_exception_specification=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_postfix_operator=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_prefix_operator=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_question_in_conditional=insert
org.eclipse.cdt.core.formatter.insert_space_after_semicolon_in_for=insert
org.eclipse.cdt.core.formatter.insert_space_after_unary_operator=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_assignment_operator=insert
org.eclipse.cdt.core.formatter.insert_space_before_binary_operator=insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_angle_bracket_in_template_arguments=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_angle_bracket_in_template_parameters=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_bracket=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_exception_specification=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_colon_in_base_clause=insert
org.eclipse.cdt.core.formatter.insert_space_before_colon_in_case=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_colon_in_conditional=insert
org.eclipse.cdt.core.formatter.insert_space_before_colon_in_default=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_base_types=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_declarator_list=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_expression_list=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_template_arguments=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_template_parameters=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_angle_bracket_in_template_arguments=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_angle_bracket_in_template_parameters=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_block=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_namespace_declaration=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_bracket=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_exception_specification=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_for=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_if=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_while=insert
org.eclipse.cdt.core.formatter.insert_space_before_postfix_operator=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_prefix_operator=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_question_in_conditional=insert
org.eclipse.cdt.core.formatter.insert_space_before_semicolon=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_unary_operator=do not insert
org.eclipse.cdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
org.eclipse.cdt.core.formatter.insert_space_between_empty_brackets=do not insert
org.eclipse.cdt.core.formatter.insert_space_between_empty_parens_in_exception_specification=do not insert
org.eclipse.cdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
org.eclipse.cdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
org.eclipse.cdt.core.formatter.join_wrapped_lines=true
org.eclipse.cdt.core.formatter.keep_else_statement_on_same_line=false
org.eclipse.cdt.core.formatter.keep_empty_array_initializer_on_one_line=false
org.eclipse.cdt.core.formatter.keep_imple_if_on_one_line=true
org.eclipse.cdt.core.formatter.keep_then_statement_on_same_line=false
org.eclipse.cdt.core.formatter.lineSplit=80
org.eclipse.cdt.core.formatter.number_of_empty_lines_to_preserve=1
org.eclipse.cdt.core.formatter.put_empty_statement_on_new_line=true
org.eclipse.cdt.core.formatter.tabulation.char=space
org.eclipse.cdt.core.formatter.tabulation.size=4
org.eclipse.cdt.core.formatter.use_tabs_only_for_leading_indentations=false


================================================
FILE: external/cub/.settings/org.eclipse.cdt.ui.prefs
================================================
eclipse.preferences.version=1
formatter_profile=_B40C
formatter_settings_version=1


================================================
FILE: external/cub/.settings/org.eclipse.core.runtime.prefs
================================================
content-types/enabled=true
content-types/org.eclipse.cdt.core.cxxHeader/file-extensions=cuh
content-types/org.eclipse.cdt.core.cxxSource/file-extensions=cu
eclipse.preferences.version=1


================================================
FILE: external/cub/CHANGE_LOG.TXT
================================================
1.7.4    09/20/2017
    - Bug fixes: 
        - Issue #114: Can't pair non-trivially-constructible values in radix sort
        - Issue #115: WarpReduce segmented reduction broken in CUDA 9 for logical warp sizes < 32 
          		  
//-----------------------------------------------------------------------------

1.7.3    08/28/2017
    - Bug fixes: 
        - Issue #110: DeviceHistogram null-pointer exception bug for iterator inputs
          		  
//-----------------------------------------------------------------------------

1.7.2    08/26/2017
    - Bug fixes: 
        - Issue #104: Device-wide reduction is now "run-to-run" deterministic for 
          pseudo-associative reduction operators (like floating point addition)
          		  
//-----------------------------------------------------------------------------

1.7.1    08/18/2017
    - Updated Volta radix sorting tuning policies 
    - Bug fixes: 
        - Issue #104 (uint64_t warp-reduce broken for cub 1.7.0 on cuda 8 and older)
        - Issue #103 (Can't mix Thrust 9.0 and CUB)
        - Issue #102 (CUB pulls in windows.h which defines min/max macros that conflict with std::min/std::max)
        - Issue #99 (Radix sorting crashes NVCC on Windows 10 for SM52)
        - Issue #98 (cuda-memcheck: --tool initcheck failed with lineOfSight)
        - Issue #94 (Git clone size)
        - Issue #93 (accept iterators for segment offsets)
        - Issue #87 (CUB uses anonymous unions which is not valid C++)
        - Issue #44 (Check for C++ 11 should be changed that Visual Studio 2013 is also recognized as C++ 11 capable)
          		  
//-----------------------------------------------------------------------------

1.7.0    06/07/2017
    - Compatible with CUDA9 and SM7.x (Volta) independent thread scheduling 
    - API change: remove cub::WarpAll() and cub::WarpAny().  These functions served to 
      emulate __all and __any functionality for SM1.x devices, which did not have those 
      operations.  However, the SM1.x devices are now deprecated in CUDA, and the 
      interfaces of the these two functions are now lacking the lane-mask needed 
      for collectives to run on Volta SMs having independent thread scheduling. 
    - Bug fixes: 
        - Issue #86 Incorrect results with ReduceByKey
          		  
//-----------------------------------------------------------------------------

1.6.4    12/06/2016
    - Updated sm_5x, sm_6x tuning policies for radix sorting (3.5B and 3.4B 
      32b keys/s on TitanX and GTX 1080, respectively)
    - Bug fixes: 
        - Restore fence work-around for scan (reduce-by-key, etc.) hangs 
          in CUDA 8.5
        - Issue 65: DeviceSegmentedRadixSort should allow inputs to have 
          pointer-to-const type 
        - Mollify Clang device-side warnings
        - Remove out-dated VC project files
          		  
//-----------------------------------------------------------------------------

1.6.3    11/20/2016
    - API change: BlockLoad and BlockStore are now templated by the local
      data type, instead of the Iterator type.  This allows for output iterators
      having \p void as their \p value_type (e.g., discard iterators).
    - Updated GP100 tuning policies for radix sorting (6.2B 32b keys/s)
    - Bug fixes: 
        - Issue #74: Warpreduce executes reduction operator for out-of-bounds items
        - Issue #72 (cub:InequalityWrapper::operator() should be non-const)
        - Issue #71 (KeyVairPair won't work if Key has non-trivial ctor)
		- Issue #70 1.5.3 breaks BlockScan API.  Retroactively reversioned
		  from v1.5.3 -> v1.6 to appropriately indicate API change.
		- Issue #69 cub::BlockStore::Store doesn't compile if OutputIteratorT::value_type != T  
        - Issue #68 (cub::TilePrefixCallbackOp::WarpReduce doesn't permit ptx 
          arch specialization)
		- Improved support for Win32 platforms (warnings, alignment, etc)
		  
//-----------------------------------------------------------------------------

1.6.2 (was 1.5.5)    10/25/2016
    - Updated Pascal tuning policies for radix sorting
    - Bug fixes: 
        - Fix for arm64 compilation of caching allocator

//-----------------------------------------------------------------------------

1.6.1 (was 1.5.4)    10/14/2016
    - Bug fixes: 
        - Fix for radix sorting bug introduced by scan refactorization

//-----------------------------------------------------------------------------

1.6.0 (was 1.5.3)    10/11/2016
    - API change: Device/block/warp-wide exclusive scans have been revised to now 
      accept an "initial value" (instead of an "identity value") for seeding the 
      computation with an arbitrary prefix.  
    - API change: Device-wide reductions and scans can now have input sequence types that are 
      different from output sequence types (as long as they are coercible)
      value") for seeding the computation with an arbitrary prefix
    - Reduce repository size (move doxygen binary to doc repository)
    - Minor reductions in block-scan instruction count
    - Bug fixes: 
        - Issue #55: warning in cub/device/dispatch/dispatch_reduce_by_key.cuh 
        - Issue #59: cub::DeviceScan::ExclusiveSum can't prefix sum of float into double
        - Issue #58: Infinite loop in cub::CachingDeviceAllocator::NearestPowerOf
        - Issue #47: Caching allocator needs to clean up cuda error upon successful retry 
        - Issue #46: Very high amount of needed memory from the cub::DeviceHistogram::HistogramEven routine
        - Issue #45: Caching Device Allocator fails with debug output enabled
        - Fix for generic-type reduce-by-key warpscan (sm3.x and newer)

//-----------------------------------------------------------------------------

1.5.2    03/21/2016
	- Improved medium-size scan performance for sm5x (Maxwell)
    - Refactored caching allocator for device memory
   		- Spends less time locked
		- Failure to allocate a block from the runtime will retry once after
		  freeing cached allocations
		- Now respects max-bin (issue where blocks in excess of max-bin were
		  still being retained in free cache)
		- Uses C++11 mutex when available
    - Bug fixes: 
        - Fix for generic-type reduce-by-key warpscan (sm3.x and newer)
          
//-----------------------------------------------------------------------------

1.5.1    12/28/2015
    - Bug fixes: 
        - Fix for incorrect DeviceRadixSort output for some small problems on 
          Maxwell SM52 architectures
        - Fix for macro redefinition warnings when compiling with Thrust sort
          
//-----------------------------------------------------------------------------

1.5.0    12/14/2015
    - New Features:
        - Added new segmented device-wide operations for device-wide sort and 
          reduction primitives.
    - Bug fixes: 
        - Fix for Git Issue 36 (Compilation error with GCC 4.8.4 nvcc 7.0.27) and
          Forums thread (ThreadLoad generates compiler errors when loading from 
          pointer-to-const)
        - Fix for Git Issue 29 (DeviceRadixSort::SortKeys<bool> yields compiler 
          errors)
        - Fix for Git Issue 26 (CUDA error: misaligned address after 
          cub::DeviceRadixSort::SortKeys())
        - Fix for incorrect/crash on 0-length problems, e.g., Git Issue 25 (Floating 
          point exception (core dumped) during cub::DeviceRadixSort::SortKeys)
        - Fix for CUDA 7.5 issues on SM 5.2 with SHFL-based warp-scan and warp-reduction 
          on non-primitive data types (e.g., user-defined structs)
        - Fix for small radix sorting problems where 0 temporary bytes were 
          required and users code was invoking malloc(0) on some systems where
          that returns NULL.  (Impl assumed was asking for size again and was not 
          running the sort.)
          
//-----------------------------------------------------------------------------

1.4.1    04/13/2015
    - Bug fixes: 
        - Fixes for CUDA 7.0 issues with SHFL-based warp-scan and warp-reduction 
          on non-primitive data types (e.g., user-defined structs)
        - Fixes for minor CUDA 7.0 performance regressions in cub::DeviceScan,
          DeviceReduceByKey
        - Fixes to allow cub::DeviceRadixSort and cub::BlockRadixSort on bool types
        - Remove requirement for callers to define the CUB_CDP macro 
          when invoking CUB device-wide rountines using CUDA dynamic parallelism
        - Fix for headers not being included in the proper order (or missing includes)
          for some block-wide functions
          
//-----------------------------------------------------------------------------

1.4.0    03/18/2015
    - New Features:
		- Support and performance tuning for new Maxwell GPU architectures
        - Updated cub::DeviceHistogram implementation that provides the same 
          "histogram-even" and "histogram-range" functionality as IPP/NPP.
          Provides extremely fast and, perhaps more importantly, very 
          uniform performance response across diverse real-world datasets, 
          including pathological (homogeneous) sample distributions (resilience)
        - New cub::DeviceSpmv methods for multiplying sparse matrices by 
          dense vectors, load-balanced using a merge-based parallel decomposition.
        - New cub::DeviceRadixSort sorting entry-points that always return
          the sorted output into the specified buffer (as opposed to the 
          cub::DoubleBuffer in which it could end up in either buffer)
        - New cub::DeviceRunLengthEncode::NonTrivialRuns for finding the starting 
          offsets and lengths of all non-trivial runs (i.e., length > 1) of keys in 
          a given sequence.  (Useful for top-down partitioning algorithms like 
          MSD sorting of very-large keys.)
          
//-----------------------------------------------------------------------------

1.3.2    07/28/2014
    - Bug fixes: 
        - Fix for cub::DeviceReduce where reductions of small problems 
          (small enough to only dispatch a single thread block) would run in 
          the default stream (stream zero) regardless of whether an alternate
          stream was specified.  
          
//-----------------------------------------------------------------------------

1.3.1    05/23/2014
    - Bug fixes: 
        - Workaround for a benign WAW race warning reported by cuda-memcheck
          in BlockScan specialized for BLOCK_SCAN_WARP_SCANS algorithm.
        - Fix for bug in DeviceRadixSort where the algorithm may sort more 
          key bits than the caller specified (up to the nearest radix digit).
        - Fix for ~3% DeviceRadixSort performance regression on Kepler and 
          Fermi that was introduced in v1.3.0.  

//-----------------------------------------------------------------------------

1.3.0    05/12/2014
    - New features:
        - CUB's collective (block-wide, warp-wide) primitives underwent a minor 
          interface refactoring:
            - To provide the appropriate support for multidimensional thread blocks,
              The interfaces for collective classes are now template-parameterized 
              by X, Y, and Z block dimensions (with BLOCK_DIM_Y and BLOCK_DIM_Z being 
              optional, and BLOCK_DIM_X replacing BLOCK_THREADS).  Furthermore, the 
              constructors that accept remapped linear thread-identifiers have been 
              removed: all primitives now assume a row-major thread-ranking for 
              multidimensional thread blocks.  
            - To allow the host program (compiled by the host-pass) to 
              accurately determine the device-specific storage requirements for 
              a given collective (compiled for each device-pass), the interfaces 
              for collective classes are now (optionally) template-parameterized 
              by the desired PTX compute capability. This is useful when 
              aliasing collective storage to shared memory that has been 
              allocated dynamically by the host at the kernel call site.   
            - Most CUB programs having typical 1D usage should not require any 
              changes to accomodate these updates.
        - Added new "combination" WarpScan methods for efficiently computing 
          both inclusive and exclusive prefix scans (and sums).
    - Bug fixes: 
        - Fixed bug in cub::WarpScan (which affected cub::BlockScan and 
          cub::DeviceScan) where incorrect results (e.g., NAN) would often be 
          returned when parameterized for floating-point types (fp32, fp64).
        - Workaround-fix for ptxas error when compiling with with -G flag on Linux 
          (for debug instrumentation) 
        - Misc. workaround-fixes for certain scan scenarios (using custom 
          scan operators) where code compiled for SM1x is run on newer 
          GPUs of higher compute-capability: the compiler could not tell
          which memory space was being used collective operations and was 
          mistakenly using global ops instead of shared ops. 

//-----------------------------------------------------------------------------

1.2.3    04/01/2014
    - Bug fixes: 
        - Fixed access violation bug in DeviceReduce::ReduceByKey for non-primitive value types
        - Fixed code-snippet bug in ArgIndexInputIteratorT documentation 

//-----------------------------------------------------------------------------

1.2.2    03/03/2014
    - New features:
        - Added MS VC++ project solutions for device-wide and block-wide examples 
    - Performance:
        - Added a third algorithmic variant of cub::BlockReduce for improved performance
          when using commutative operators (e.g., numeric addition)
    - Bug fixes: 
        - Fixed bug where inclusion of Thrust headers in a certain order prevented CUB device-wide primitives from working properly

//-----------------------------------------------------------------------------

1.2.0    02/25/2014
    - New features:
        - Added device-wide reduce-by-key (DeviceReduce::ReduceByKey, DeviceReduce::RunLengthEncode) 
    - Performance
        - Improved DeviceScan, DeviceSelect, DevicePartition performance
    - Documentation and testing:
        - Compatible with CUDA 6.0
        - Added performance-portability plots for many device-wide primitives to doc 
        - Update doc and tests to reflect iterator (in)compatibilities with CUDA 5.0 (and older) and Thrust 1.6 (and older).
    - Bug fixes 
        - Revised the operation of temporary tile status bookkeeping for DeviceScan (and similar) to be safe for current code run on future platforms (now uses proper fences)  
        - Fixed DeviceScan bug where Win32 alignment disagreements between host and device regarding user-defined data types would corrupt tile status
        - Fixed BlockScan bug where certain exclusive scans on custom data types for the BLOCK_SCAN_WARP_SCANS variant would return incorrect results for the first thread in the block
        - Added workaround for TexRefInputIteratorTto work with CUDA 6.0
    
//-----------------------------------------------------------------------------

1.1.1    12/11/2013
    - New features:
        - Added TexObjInputIteratorT, TexRefInputIteratorT, CacheModifiedInputIteratorT, and CacheModifiedOutputIterator types for loading & storing arbitrary types through the cache hierarchy.  Compatible with Thrust API. 
        - Added descending sorting to DeviceRadixSort and BlockRadixSort
        - Added min, max, arg-min, and arg-max to DeviceReduce
        - Added DeviceSelect (select-unique, select-if, and select-flagged)
        - Added DevicePartition (partition-if, partition-flagged)
        - Added generic cub::ShuffleUp(), cub::ShuffleDown(), and cub::ShuffleIndex() for warp-wide communication of arbitrary data types (SM3x+)
        - Added cub::MaxSmOccupancy() for accurately determining SM occupancy for any given kernel function pointer
    - Performance
        - Improved DeviceScan and DeviceRadixSort performance for older architectures (SM10-SM30)
    - Interface changes:
        - Refactored block-wide I/O (BlockLoad and BlockStore), removing cache-modifiers from their interfaces.  The CacheModifiedInputIteratorTand CacheModifiedOutputIterator should now be used with BlockLoad and BlockStore to effect that behavior.
        - Rename device-wide "stream_synchronous" param to "debug_synchronous" to avoid confusion about usage
    - Documentation and testing:
        - Added simple examples of device-wide methods
        - Improved doxygen documentation and example snippets
        - Improved test coverege to include up to 21,000 kernel variants and 851,000 unit tests (per architecture, per platform)
    - Bug fixes 
        - Fixed misc DeviceScan, BlockScan, DeviceReduce, and BlockReduce bugs when operating on non-primitive types for older architectures SM10-SM13
        - Fixed DeviceScan / WarpReduction bug: SHFL-based segmented reduction producting incorrect results for multi-word types (size > 4B) on Linux 
        - Fixed BlockScan bug: For warpscan-based scans, not all threads in the first warp were entering the prefix callback functor
        - Fixed DeviceRadixSort bug: race condition with key-value pairs for pre-SM35 architectures
        - Fixed DeviceRadixSort bug: incorrect bitfield-extract behavior with long keys on 64bit Linux
        - Fixed BlockDiscontinuity bug: complation error in for types other than int32/uint32
        - CDP (device-callable) versions of device-wide methods now report the same temporary storage allocation size requirement as their host-callable counterparts
     

//-----------------------------------------------------------------------------

1.0.2    08/23/2013
    - Corrections to code snippet examples for BlockLoad, BlockStore, and BlockDiscontinuity
    - Cleaned up unnecessary/missing header includes.  You can now safely #inlude a specific .cuh (instead of cub.cuh)
    - Bug/compilation fixes for BlockHistogram 

//-----------------------------------------------------------------------------

1.0.1    08/08/2013
    - New collective interface idiom (specialize::construct::invoke).
    - Added best-in-class DeviceRadixSort.  Implements short-circuiting for homogenous digit passes.
    - Added best-in-class DeviceScan.  Implements single-pass "adaptive-lookback" strategy.
    - Significantly improved documentation (with example code snippets) 
    - More extensive regression test suit for aggressively testing collective variants
    - Allow non-trially-constructed types (previously unions had prevented aliasing temporary storage of those types)
    - Improved support for Kepler SHFL (collective ops now use SHFL for types larger than 32b)
    - Better code generation for 64-bit addressing within BlockLoad/BlockStore
    - DeviceHistogram now supports histograms of arbitrary bins
    - Misc. fixes
      - Workarounds for SM10 codegen issues in uncommonly-used WarpScan/Reduce specializations
      - Updates to accommodate CUDA 5.5 dynamic parallelism   


//-----------------------------------------------------------------------------

0.9.4    05/07/2013

    - Fixed compilation errors for SM10-SM13
    - Fixed compilation errors for some WarpScan entrypoints on SM30+
    - Added block-wide histogram (BlockHistogram256)
    - Added device-wide histogram (DeviceHistogram256)
    - Added new BlockScan algorithm variant BLOCK_SCAN_RAKING_MEMOIZE, which 
      trades more register consumption for less shared memory I/O)
    - Updates to BlockRadixRank to use BlockScan (which improves performance
      on Kepler due to SHFL instruction)
    - Allow types other than C++ primitives to be used in WarpScan::*Sum methods 
      if they only have operator + overloaded.  (Previously they also required 
      to support assignment from int(0).) 
    - Update BlockReduce's BLOCK_REDUCE_WARP_REDUCTIONS algorithm to work even 
      when block size is not an even multiple of warp size
    - Added work management utility descriptors (GridQueue, GridEvenShare)
    - Refactoring of DeviceAllocator interface and CachingDeviceAllocator 
      implementation 
    - Misc. documentation updates and corrections. 
     
//-----------------------------------------------------------------------------

0.9.2    04/04/2013

    - Added WarpReduce.  WarpReduce uses the SHFL instruction when applicable. 
      BlockReduce now uses this WarpReduce instead of implementing its own.
    - Misc. fixes for 64-bit Linux compilation warnings and errors.
    - Misc. documentation updates and corrections. 

//-----------------------------------------------------------------------------

0.9.1    03/09/2013

    - Fix for ambiguity in BlockScan::Reduce() between generic reduction and 
      summation.  Summation entrypoints are now called ::Sum(), similar to the 
      convention in BlockScan.
    - Small edits to mainpage documentation and download tracking
    
//-----------------------------------------------------------------------------

0.9.0    03/07/2013    

    - Intial "preview" release.    CUB is the first durable, high-performance library 
      of cooperative block-level, warp-level, and thread-level primitives for CUDA 
      kernel programming.  More primitives and examples coming soon!
    

================================================
FILE: external/cub/LICENSE.TXT
================================================
Copyright (c) 2010-2011, Duane Merrill.  All rights reserved.
Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
   *  Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
   *  Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
   *  Neither the name of the NVIDIA CORPORATION nor the
      names of its contributors may be used to endorse or promote products
      derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

================================================
FILE: external/cub/README.md
================================================
<hr>
<h3>About CUB</h3>

Current release: v1.7.4 (09/20/2017)

We recommend the [CUB Project Website](http://nvlabs.github.com/cub) and the [cub-users discussion forum](http://groups.google.com/group/cub-users) for further information and examples.

CUB provides state-of-the-art, reusable software components for every layer 
of the CUDA programming model:
- [<b><em>Device-wide primitives</em></b>] (https://nvlabs.github.com/cub/group___device_module.html) 
  - Sort, prefix scan, reduction, histogram, etc.  
  - Compatible with CUDA dynamic parallelism
- [<b><em>Block-wide "collective" primitives</em></b>] (https://nvlabs.github.com/cub/group___block_module.html)
  - I/O, sort, prefix scan, reduction, histogram, etc.  
  - Compatible with arbitrary thread block sizes and types 
- [<b><em>Warp-wide "collective" primitives</em></b>] (https://nvlabs.github.com/cub/group___warp_module.html)
  - Warp-wide prefix scan, reduction, etc.
  - Safe and architecture-specific
- [<b><em>Thread and resource utilities</em></b>](https://nvlabs.github.com/cub/group___thread_module.html)
  - PTX intrinsics, device reflection, texture-caching iterators, caching memory allocators, etc. 

![Orientation of collective primitives within the CUDA software stack](http://nvlabs.github.com/cub/cub_overview.png)

<br><hr>
<h3>A Simple Example</h3>

```C++
#include <cub/cub.cuh>
 
// Block-sorting CUDA kernel
__global__ void BlockSortKernel(int *d_in, int *d_out)
{
     using namespace cub;

     // Specialize BlockRadixSort, BlockLoad, and BlockStore for 128 threads 
     // owning 16 integer items each
     typedef BlockRadixSort<int, 128, 16>                     BlockRadixSort;
     typedef BlockLoad<int, 128, 16, BLOCK_LOAD_TRANSPOSE>   BlockLoad;
     typedef BlockStore<int, 128, 16, BLOCK_STORE_TRANSPOSE> BlockStore;
 
     // Allocate shared memory
     __shared__ union {
         typename BlockRadixSort::TempStorage  sort;
         typename BlockLoad::TempStorage       load; 
         typename BlockStore::TempStorage      store; 
     } temp_storage; 

     int block_offset = blockIdx.x * (128 * 16);	  // OffsetT for this block's ment

     // Obtain a segment of 2048 consecutive keys that are blocked across threads
     int thread_keys[16];
     BlockLoad(temp_storage.load).Load(d_in + block_offset, thread_keys);
     __syncthreads();

     // Collectively sort the keys
     BlockRadixSort(temp_storage.sort).Sort(thread_keys);
     __syncthreads();

     // Store the sorted segment 
     BlockStore(temp_storage.store).Store(d_out + block_offset, thread_keys);
}
```

Each thread block uses cub::BlockRadixSort to collectively sort 
its own input segment.  The class is specialized by the 
data type being sorted, by the number of threads per block, by the number of 
keys per thread, and implicitly by the targeted compilation architecture.  

The cub::BlockLoad and cub::BlockStore classes are similarly specialized.    
Furthermore, to provide coalesced accesses to device memory, these primitives are 
configured to access memory using a striped access pattern (where consecutive threads 
simultaneously access consecutive items) and then <em>transpose</em> the keys into 
a [<em>blocked arrangement</em>](index.html#sec4sec3) of elements across threads. 

Once specialized, these classes expose opaque \p TempStorage member types.  
The thread block uses these storage types to statically allocate the union of 
shared memory needed by the thread block.  (Alternatively these storage types 
could be aliased to global memory allocations).

<br><hr>
<h3>Stable Releases</h3>

CUB releases are labeled using version identifiers having three fields: 
*epoch.feature.update*.  The *epoch* field corresponds to support for
a major change in the CUDA programming model.  The *feature* field 
corresponds to a stable set of features, functionality, and interface.  The
*update* field corresponds to a bug-fix or performance update for that
feature set.  At the moment, we do not publicly provide non-stable releases 
such as development snapshots, beta releases or rolling releases.  (Feel free
to contact us if you would like such things.)  See the 
[CUB Project Website](http://nvlabs.github.com/cub) for more information.

<br><hr>
<h3>Contributors</h3>

CUB is developed as an open-source project by [NVIDIA Research](http://research.nvidia.com).  The primary contributor is [Duane Merrill](http://github.com/dumerrill).

<br><hr>
<h3>Open Source License</h3>

CUB is available under the "New BSD" open-source license:

```
Copyright (c) 2010-2011, Duane Merrill.  All rights reserved.
Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
   *  Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
   *  Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
   *  Neither the name of the NVIDIA CORPORATION nor the
      names of its contributors may be used to endorse or promote products
      derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
```


================================================
FILE: external/cub/common.mk
================================================
#/******************************************************************************
# * Copyright (c) 2011, Duane Merrill.  All rights reserved.
# * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
# * 
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions are met:
# *	 * Redistributions of source code must retain the above copyright
# *	   notice, this list of conditions and the following disclaimer.
# *	 * Redistributions in binary form must reproduce the above copyright
# *	   notice, this list of conditions and the following disclaimer in the
# *	   documentation and/or other materials provided with the distribution.
# *	 * Neither the name of the NVIDIA CORPORATION nor the
# *	   names of its contributors may be used to endorse or promote products
# *	   derived from this software without specific prior written permission.
# * 
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *
#******************************************************************************/


#-------------------------------------------------------------------------------
# Commandline Options
#-------------------------------------------------------------------------------

# [sm=<XXX,...>] Compute-capability to compile for, e.g., "sm=200,300,350" (SM20 by default).
  
COMMA = ,
ifdef sm
	SM_ARCH = $(subst $(COMMA),-,$(sm))
else 
    SM_ARCH = 200
endif

ifeq (700, $(findstring 700, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_70,code=\"sm_70,compute_70\" 
    SM_DEF 		+= -DSM700
    TEST_ARCH 	= 700
endif
ifeq (620, $(findstring 620, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_62,code=\"sm_62,compute_62\" 
    SM_DEF 		+= -DSM620
    TEST_ARCH 	= 620
endif
ifeq (610, $(findstring 610, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_61,code=\"sm_61,compute_61\" 
    SM_DEF 		+= -DSM610
    TEST_ARCH 	= 610
endif
ifeq (600, $(findstring 600, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_60,code=\"sm_60,compute_60\" 
    SM_DEF 		+= -DSM600
    TEST_ARCH 	= 600
endif
ifeq (520, $(findstring 520, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_52,code=\"sm_52,compute_52\" 
    SM_DEF 		+= -DSM520
    TEST_ARCH 	= 520
endif
ifeq (370, $(findstring 370, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_37,code=\"sm_37,compute_37\" 
    SM_DEF 		+= -DSM370
    TEST_ARCH 	= 370
endif
ifeq (350, $(findstring 350, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_35,code=\"sm_35,compute_35\" 
    SM_DEF 		+= -DSM350
    TEST_ARCH 	= 350
endif
ifeq (300, $(findstring 300, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_30,code=\"sm_30,compute_30\"
    SM_DEF 		+= -DSM300
    TEST_ARCH 	= 300
endif
ifeq (210, $(findstring 210, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_20,code=\"sm_21,compute_20\"
    SM_DEF 		+= -DSM210
    TEST_ARCH 	= 210
endif
ifeq (200, $(findstring 200, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_20,code=\"sm_20,compute_20\"
    SM_DEF 		+= -DSM200
    TEST_ARCH 	= 200
endif
ifeq (130, $(findstring 130, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_13,code=\"sm_13,compute_13\" 
    SM_DEF 		+= -DSM130
    TEST_ARCH 	= 130
endif
ifeq (120, $(findstring 120, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_12,code=\"sm_12,compute_12\" 
    SM_DEF 		+= -DSM120
    TEST_ARCH 	= 120
endif
ifeq (110, $(findstring 110, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_11,code=\"sm_11,compute_11\" 
    SM_DEF 		+= -DSM110
    TEST_ARCH 	= 110
endif
ifeq (100, $(findstring 100, $(SM_ARCH)))
    SM_TARGETS 	+= -gencode=arch=compute_10,code=\"sm_10,compute_10\" 
    SM_DEF 		+= -DSM100
    TEST_ARCH 	= 100
endif


# [cdp=<0|1>] CDP enable option (default: no)
ifeq ($(cdp), 1)
	DEFINES += -DCUB_CDP
	CDP_SUFFIX = cdp
    NVCCFLAGS += -rdc=true -lcudadevrt
else
	CDP_SUFFIX = nocdp
endif


# [force32=<0|1>] Device addressing mode option (64-bit device pointers by default) 
ifeq ($(force32), 1)
	CPU_ARCH = -m32
	CPU_ARCH_SUFFIX = i386
else
	CPU_ARCH = -m64
	CPU_ARCH_SUFFIX = x86_64
    NPPI = -lnppist
endif


# [abi=<0|1>] CUDA ABI option (enabled by default) 
ifneq ($(abi), 0)
	ABI_SUFFIX = abi
else 
	NVCCFLAGS += -Xptxas -abi=no
	ABI_SUFFIX = noabi
endif


# [open64=<0|1>] Middle-end compiler option (nvvm by default)
ifeq ($(open64), 1)
	NVCCFLAGS += -open64
	PTX_SUFFIX = open64
else 
	PTX_SUFFIX = nvvm
endif


# [verbose=<0|1>] Verbose toolchain output from nvcc option
ifeq ($(verbose), 1)
	NVCCFLAGS += -v
endif


# [keep=<0|1>] Keep intermediate compilation artifacts option
ifeq ($(keep), 1)
	NVCCFLAGS += -keep
endif

# [debug=<0|1>] Generate debug mode code
ifeq ($(debug), 1)
	NVCCFLAGS += -G
endif


#-------------------------------------------------------------------------------
# Compiler and compilation platform
#-------------------------------------------------------------------------------

CUB_DIR = $(dir $(lastword $(MAKEFILE_LIST)))

NVCC = "$(shell which nvcc)"
ifdef nvccver
    NVCC_VERSION = $(nvccver)
else
    NVCC_VERSION = $(strip $(shell nvcc --version | grep release | sed 's/.*release //' |  sed 's/,.*//'))
endif

# detect OS
OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])

# Default flags: verbose kernel properties (regs, smem, cmem, etc.); runtimes for compilation phases 
NVCCFLAGS += $(SM_DEF) -Xptxas -v -Xcudafe -\# 

ifeq (WIN_NT, $(findstring WIN_NT, $(OSUPPER)))
    # For MSVC
    # Enable more warnings and treat as errors
    NVCCFLAGS += -Xcompiler /W3 -Xcompiler /WX
    # Disable excess x86 floating point precision that can lead to results being labeled incorrectly
    NVCCFLAGS += -Xcompiler /fp:strict
    # Help the compiler/linker work with huge numbers of kernels on Windows
	NVCCFLAGS += -Xcompiler /bigobj -Xcompiler /Zm500
	CC = cl
	
	# Multithreaded runtime
	NVCCFLAGS += -Xcompiler /MT
	
ifneq ($(force32), 1)
	CUDART_CYG = "$(shell dirname $(NVCC))/../lib/Win32/cudart.lib"
else
	CUDART_CYG = "$(shell dirname $(NVCC))/../lib/x64/cudart.lib"
endif
	CUDART = "$(shell cygpath -w $(CUDART_CYG))"
else
    # For g++
    # Disable excess x86 floating point precision that can lead to results being labeled incorrectly
    NVCCFLAGS += -Xcompiler -ffloat-store
    CC = g++
ifneq ($(force32), 1)
    CUDART = "$(shell dirname $(NVCC))/../lib/libcudart_static.a"
else
    CUDART = "$(shell dirname $(NVCC))/../lib64/libcudart_static.a"
endif
endif

# Suffix to append to each binary
BIN_SUFFIX = sm$(SM_ARCH)_$(PTX_SUFFIX)_$(NVCC_VERSION)_$(ABI_SUFFIX)_$(CDP_SUFFIX)_$(CPU_ARCH_SUFFIX)


#-------------------------------------------------------------------------------
# Dependency Lists
#-------------------------------------------------------------------------------

rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))

CUB_DEPS = 	$(call rwildcard, $(CUB_DIR),*.cuh) \
			$(CUB_DIR)common.mk
		


================================================
FILE: external/cub/cub/agent/agent_histogram.cuh
================================================
/******************************************************************************
 * Copyright (c) 2011, Duane Merrill.  All rights reserved.
 * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of the NVIDIA CORPORATION nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 ******************************************************************************/

/**
 * \file
 * cub::AgentHistogram implements a stateful abstraction of CUDA thread blocks for participating in device-wide histogram .
 */

#pragma once

#include <iterator>

#include "../util_type.cuh"
#include "../block/block_load.cuh"
#include "../grid/grid_queue.cuh"
#include "../iterator/cache_modified_input_iterator.cuh"
#include "../util_namespace.cuh"

/// Optional outer namespace(s)
CUB_NS_PREFIX

/// CUB namespace
namespace cub {


/******************************************************************************
 * Tuning policy
 ******************************************************************************/

/**
 *
 */
enum BlockHistogramMemoryPreference
{
    GMEM,
    SMEM,
    BLEND
};


/**
 * Parameterizable tuning policy type for AgentHistogram
 */
template <
    int                             _BLOCK_THREADS,                 ///< Threads per thread block
    int                             _PIXELS_PER_THREAD,             ///< Pixels per thread (per tile of input)
    BlockLoadAlgorithm              _LOAD_ALGORITHM,                ///< The BlockLoad algorithm to use
    CacheLoadModifier               _LOAD_MODIFIER,                 ///< Cache load modifier for reading input elements
    bool                            _RLE_COMPRESS,                  ///< Whether to perform localized RLE to compress samples before histogramming
    BlockHistogramMemoryPreference  _MEM_PREFERENCE,                ///< Whether to prefer privatized shared-memory bins (versus privatized global-memory bins)
    bool                            _WORK_STEALING>                 ///< Whether to dequeue tiles from a global work queue
struct AgentHistogramPolicy
{
    enum
    {
        BLOCK_THREADS           = _BLOCK_THREADS,                   ///< Threads per thread block
        PIXELS_PER_THREAD       = _PIXELS_PER_THREAD,               ///< Pixels per thread (per tile of input)
        IS_RLE_COMPRESS         = _RLE_COMPRESS,                    ///< Whether to perform localized RLE to compress samples before histogramming
        MEM_PREFERENCE          = _MEM_PREFERENCE,                  ///< Whether to prefer privatized shared-memory bins (versus privatized global-memory bins)
        IS_WORK_STEALING        = _WORK_STEALING,                   ///< Whether to dequeue tiles from a global work queue
    };

    static const BlockLoadAlgorithm     LOAD_ALGORITHM          = _LOAD_ALGORITHM;          ///< The BlockLoad algorithm to use
    static const CacheLoadModifier      LOAD_MODIFIER           = _LOAD_MODIFIER;           ///< Cache load modifier for reading input elements
};


/******************************************************************************
 * Thread block abstractions
 ******************************************************************************/

/**
 * \brief AgentHistogram implements a stateful abstraction of CUDA thread blocks for participating in device-wide histogram .
 */
template <
    typename    AgentHistogramPolicyT,     ///< Parameterized AgentHistogramPolicy tuning policy type
    int         PRIVATIZED_SMEM_BINS,           ///< Number of privatized shared-memory histogram bins of any channel.  Zero indicates privatized counters to be maintained in device-accessible memory.
    int         NUM_CHANNELS,                   ///< Number of channels interleaved in the input data.  Supports up to four channels.
    int         NUM_ACTIVE_CHANNELS,            ///< Number of channels actively being histogrammed
    typename    SampleIteratorT,                ///< Random-access input iterator type for reading samples
    typename    CounterT,                       ///< Integer type for counting sample occurrences per histogram bin
    typename    PrivatizedDecodeOpT,            ///< The transform operator type for determining privatized counter indices from samples, one for each channel
    typename    OutputDecodeOpT,                ///< The transform operator type for determining output bin-ids from privatized counter indices, one for each channel
    typename    OffsetT,                        ///< Signed integer type for global offsets
    int         PTX_ARCH = CUB_PTX_ARCH>        ///< PTX compute capability
struct AgentHistogram
{
    //---------------------------------------------------------------------
    // Types and constants
    //---------------------------------------------------------------------

    /// The sample type of the input iterator
    typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;

    /// The pixel type of SampleT
    typedef typename CubVector<SampleT, NUM_CHANNELS>::Type PixelT;

    /// The quad type of SampleT
    typedef typename CubVector<SampleT, 4>::Type QuadT;

    /// Constants
    enum
    {
        BLOCK_THREADS           = AgentHistogramPolicyT::BLOCK_THREADS,

        PIXELS_PER_THREAD       = AgentHistogramPolicyT::PIXELS_PER_THREAD,
        SAMPLES_PER_THREAD      = PIXELS_PER_THREAD * NUM_CHANNELS,
        QUADS_PER_THREAD        = SAMPLES_PER_THREAD / 4,

        TILE_PIXELS             = PIXELS_PER_THREAD * BLOCK_THREADS,
        TILE_SAMPLES            = SAMPLES_PER_THREAD * BLOCK_THREADS,

        IS_RLE_COMPRESS            = AgentHistogramPolicyT::IS_RLE_COMPRESS,

        MEM_PREFERENCE          = (PRIVATIZED_SMEM_BINS > 0) ?
                                        AgentHistogramPolicyT::MEM_PREFERENCE :
                                        GMEM,

        IS_WORK_STEALING           = AgentHistogramPolicyT::IS_WORK_STEALING,
    };

    /// Cache load modifier for reading input elements
    static const CacheLoadModifier LOAD_MODIFIER = AgentHistogramPolicyT::LOAD_MODIFIER;


    /// Input iterator wrapper type (for applying cache modifier)
    typedef typename If<IsPointer<SampleIteratorT>::VALUE,
            CacheModifiedInputIterator<LOAD_MODIFIER, SampleT, OffsetT>,     // Wrap the native input pointer with CacheModifiedInputIterator
            SampleIteratorT>::Type                                           // Directly use the supplied input iterator type
        WrappedSampleIteratorT;

    /// Pixel input iterator type (for applying cache modifier)
    typedef CacheModifiedInputIterator<LOAD_MODIFIER, PixelT, OffsetT>
        WrappedPixelIteratorT;

    /// Qaud input iterator type (for applying cache modifier)
    typedef CacheModifiedInputIterator<LOAD_MODIFIER, QuadT, OffsetT>
        WrappedQuadIteratorT;

    /// Parameterized BlockLoad type for samples
    typedef BlockLoad<
            SampleT,
            BLOCK_THREADS,
            SAMPLES_PER_THREAD,
            AgentHistogramPolicyT::LOAD_ALGORITHM>
        BlockLoadSampleT;

    /// Parameterized BlockLoad type for pixels
    typedef BlockLoad<
            PixelT,
            BLOCK_THREADS,
            PIXELS_PER_THREAD,
            AgentHistogramPolicyT::LOAD_ALGORITHM>
        BlockLoadPixelT;

    /// Parameterized BlockLoad type for quads
    typedef BlockLoad<
            QuadT,
            BLOCK_THREADS,
            QUADS_PER_THREAD,
            AgentHistogramPolicyT::LOAD_ALGORITHM>
        BlockLoadQuadT;

    /// Shared memory type required by this thread block
    struct _TempStorage
    {
        CounterT histograms[NUM_ACTIVE_CHANNELS][PRIVATIZED_SMEM_BINS + 1];     // Smem needed for block-privatized smem histogram (with 1 word of padding)

        int tile_idx;

        // Aliasable storage layout
        union Aliasable
        {
            typename BlockLoadSampleT::TempStorage sample_load;     // Smem needed for loading a tile of samples
            typename BlockLoadPixelT::TempStorage pixel_load;       // Smem needed for loading a tile of pixels
            typename BlockLoadQuadT::TempStorage quad_load;         // Smem needed for loading a tile of quads

        } aliasable;
    };


    /// Temporary storage type (unionable)
    struct TempStorage : Uninitialized<_TempStorage> {};


    //---------------------------------------------------------------------
    // Per-thread fields
    //---------------------------------------------------------------------

    /// Reference to temp_storage
    _TempStorage &temp_storage;

    /// Sample input iterator (with cache modifier applied, if possible)
    WrappedSampleIteratorT d_wrapped_samples;

    /// Native pointer for input samples (possibly NULL if unavailable)
    SampleT* d_native_samples;

    /// The number of output bins for each channel
    int (&num_output_bins)[NUM_ACTIVE_CHANNELS];

    /// The number of privatized bins for each channel
    int (&num_privatized_bins)[NUM_ACTIVE_CHANNELS];

    /// Reference to gmem privatized histograms for each channel
    CounterT* d_privatized_histograms[NUM_ACTIVE_CHANNELS];

    /// Reference to final output histograms (gmem)
    CounterT* (&d_output_histograms)[NUM_ACTIVE_CHANNELS];

    /// The transform operator for determining output bin-ids from privatized counter indices, one for each channel
    OutputDecodeOpT (&output_decode_op)[NUM_ACTIVE_CHANNELS];

    /// The transform operator for determining privatized counter indices from samples, one for each channel
    PrivatizedDecodeOpT (&privatized_decode_op)[NUM_ACTIVE_CHANNELS];

    /// Whether to prefer privatized smem counters vs privatized global counters
    bool prefer_smem;


    //---------------------------------------------------------------------
    // Initialize privatized bin counters
    //---------------------------------------------------------------------

    // Initialize privatized bin counters
    __device__ __forceinline__ void InitBinCounters(CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS])
    {
        // Initialize histogram bin counts to zeros
        #pragma unroll
        for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL)
        {
            for (int privatized_bin = threadIdx.x; privatized_bin < num_privatized_bins[CHANNEL]; privatized_bin += BLOCK_THREADS)
            {
                privatized_histograms[CHANNEL][privatized_bin] = 0;
            }
        }

        // Barrier to make sure all threads are done updating counters
        CTA_SYNC();
    }


    // Initialize privatized bin counters.  Specialized for privatized shared-memory counters
    __device__ __forceinline__ void InitSmemBinCounters()
    {
        CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS];

        for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL)
            privatized_histograms[CHANNEL] = temp_storage.histograms[CHANNEL];

        InitBinCounters(privatized_histograms);
    }


    // Initialize privatized bin counters.  Specialized for privatized global-memory counters
    __device__ __forceinline__ void InitGmemBinCounters()
    {
        InitBinCounters(d_privatized_histograms);
    }


    //---------------------------------------------------------------------
    // Update final output histograms
    //---------------------------------------------------------------------

    // Update final output histograms from privatized histograms
    __device__ __forceinline__ void StoreOutput(CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS])
    {
        // Barrier to make sure all threads are done updating counters
        CTA_SYNC();

        // Apply privatized bin counts to output bin counts
        #pragma unroll
        for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL)
        {
            int channel_bins = num_privatized_bins[CHANNEL];
            for (int privatized_bin = threadIdx.x; 
                    privatized_bin < channel_bins;  
                    privatized_bin += BLOCK_THREADS)
            {
                int         output_bin  = -1;
                CounterT    count       = privatized_histograms[CHANNEL][privatized_bin];
                bool        is_valid    = count > 0;

                output_decode_op[CHANNEL].template BinSelect<LOAD_MODIFIER>((SampleT) privatized_bin, output_bin, is_valid);

                if (output_bin >= 0)
                {
                    atomicAdd(&d_output_histograms[CHANNEL][output_bin], count);
                }

            }
        }
    }


    // Update final output histograms from privatized histograms.  Specialized for privatized shared-memory counters
    __device__ __forceinline__ void StoreSmemOutput()
    {
        CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS];
        for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL)
            privatized_histograms[CHANNEL] = temp_storage.histograms[CHANNEL];

        StoreOutput(privatized_histograms);
    }


    // Update final output histograms from privatized histograms.  Specialized for privatized global-memory counters
    __device__ __forceinline__ void StoreGmemOutput()
    {
        StoreOutput(d_privatized_histograms);
    }


    //---------------------------------------------------------------------
    // Tile accumulation
    //---------------------------------------------------------------------

    // Accumulate pixels.  Specialized for RLE compression.
    __device__ __forceinline__ void AccumulatePixels(
        SampleT             samples[PIXELS_PER_THREAD][NUM_CHANNELS],
        bool                is_valid[PIXELS_PER_THREAD],
        CounterT*           privatized_histograms[NUM_ACTIVE_CHANNELS],
        Int2Type<true>      is_rle_compress)
    {
        #pragma unroll
        for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL)
        {
            // Bin pixels
            int bins[PIXELS_PER_THREAD];

            #pragma unroll
            for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD; ++PIXEL)
            {
                bins[PIXEL] = -1;
                privatized_decode_op[CHANNEL].template BinSelect<LOAD_MODIFIER>(samples[PIXEL][CHANNEL], bins[PIXEL], is_valid[PIXEL]);
            }

            CounterT accumulator = 1;

            #pragma unroll
            for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD - 1; ++PIXEL)
            {
                if (bins[PIXEL] != bins[PIXEL + 1])
                {
                    if (bins[PIXEL] >= 0)
                        atomicAdd(privatized_histograms[CHANNEL] + bins[PIXEL], accumulator);

                     accumulator = 0;
                }
                accumulator++;
            }

            // Last pixel
            if (bins[PIXELS_PER_THREAD - 1] >= 0)
                atomicAdd(privatized_histograms[CHANNEL] + bins[PIXELS_PER_THREAD - 1], accumulator);
        }
    }


    // Accumulate pixels.  Specialized for individual accumulation of each pixel.
    __device__ __forceinline__ void AccumulatePixels(
        SampleT             samples[PIXELS_PER_THREAD][NUM_CHANNELS],
        bool                is_valid[PIXELS_PER_THREAD],
        CounterT*           privatized_histograms[NUM_ACTIVE_CHANNELS],
        Int2Type<false>     is_rle_compress)
    {
        #pragma unroll
        for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD; ++PIXEL)
        {
            #pragma unroll
            for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL)
            {
                int bin = -1;
                privatized_decode_op[CHANNEL].template BinSelect<LOAD_MODIFIER>(samples[PIXEL][CHANNEL], bin, is_valid[PIXEL]);
                if (bin >= 0)
                    atomicAdd(privatized_histograms[CHANNEL] + bin, 1);
            }
        }
    }


    /**
     * Accumulate pixel, specialized for smem privatized histogram
     */
    __device__ __forceinline__ void AccumulateSmemPixels(
        SampleT             samples[PIXELS_PER_THREAD][NUM_CHANNELS],
        bool                is_valid[PIXELS_PER_THREAD])
    {
        CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS];

        for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL)
            privatized_histograms[CHANNEL] = temp_storage.histograms[CHANNEL];

        AccumulatePixels(samples, is_valid, privatized_histograms, Int2Type<IS_RLE_COMPRESS>());
    }


    /**
     * Accumulate pixel, specialized for gmem privatized histogram
     */
    __device__ __forceinline__ void AccumulateGmemPixels(
        SampleT             samples[PIXELS_PER_THREAD][NUM_CHANNELS],
        bool                is_valid[PIXELS_PER_THREAD])
    {
        AccumulatePixels(samples, is_valid, d_privatized_histograms, Int2Type<IS_RLE_COMPRESS>());
    }



    //---------------------------------------------------------------------
    // Tile loading
    //---------------------------------------------------------------------

    // Load full, aligned tile using pixel iterator (multi-channel)
    template <int _NUM_ACTIVE_CHANNELS>
    __device__ __forceinline__ void LoadFullAlignedTile(
        OffsetT                         block_offset,
        int                             valid_samples,
        SampleT                         (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS],
        Int2Type<_NUM_ACTIVE_CHANNELS>  num_active_channels)
    {
        typedef PixelT AliasedPixels[PIXELS_PER_THREAD];

        WrappedPixelIteratorT d_wrapped_pixels((PixelT*) (d_native_samples + block_offset));

        // Load using a wrapped pixel iterator
        BlockLoadPixelT(temp_storage.aliasable.pixel_load).Load(
            d_wrapped_pixels,
            reinterpret_cast<AliasedPixels&>(samples));
    }

    // Load full, aligned tile using quad iterator (single-channel)
    __device__ __forceinline__ void LoadFullAlignedTile(
        OffsetT                         block_offset,
        int                             valid_samples,
        SampleT                         (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS],
        Int2Type<1>                     num_active_channels)
    {
        typedef QuadT AliasedQuads[QUADS_PER_THREAD];

        WrappedQuadIteratorT d_wrapped_quads((QuadT*) (d_native_samples + block_offset));

        // Load using a wrapped quad iterator
        BlockLoadQuadT(temp_storage.aliasable.quad_load).Load(
            d_wrapped_quads,
            reinterpret_cast<AliasedQuads&>(samples));
    }

    // Load full, aligned tile
    __device__ __forceinline__ void LoadTile(
        OffsetT         block_offset,
        int             valid_samples,
        SampleT         (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS],
        Int2Type<true>  is_full_tile,
        Int2Type<true>  is_aligned)
    {
        LoadFullAlignedTile(block_offset, valid_samples, samples, Int2Type<NUM_ACTIVE_CHANNELS>());
    }

    // Load full, mis-aligned tile using sample iterator
    __device__ __forceinline__ void LoadTile(
        OffsetT         block_offset,
        int             valid_samples,
        SampleT         (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS],
        Int2Type<true>  is_full_tile,
        Int2Type<false> is_aligned)
    {
        typedef SampleT AliasedSamples[SAMPLES_PER_THREAD];

        // Load using sample iterator
        BlockLoadSampleT(temp_storage.aliasable.sample_load).Load(
            d_wrapped_samples + block_offset,
            reinterpret_cast<AliasedSamples&>(samples));
    }

    // Load partially-full, aligned tile using the pixel iterator
    __device__ __forceinline__ void LoadTile(
        OffsetT         block_offset,
        int             valid_samples,
        SampleT         (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS],
        Int2Type<false> is_full_tile,
        Int2Type<true>  is_aligned)
    {
        typedef PixelT AliasedPixels[PIXELS_PER_THREAD];

        WrappedPixelIteratorT d_wrapped_pixels((PixelT*) (d_native_samples + block_offset));

        int valid_pixels = valid_samples / NUM_CHANNELS;

        // Load using a wrapped pixel iterator
        BlockLoadPixelT(temp_storage.aliasable.pixel_load).Load(
            d_wrapped_pixels,
            reinterpret_cast<AliasedPixels&>(samples),
            valid_pixels);
    }

    // Load partially-full, mis-aligned tile using sample iterator
    __device__ __forceinline__ void LoadTile(
        OffsetT         block_offset,
        int             valid_samples,
        SampleT         (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS],
        Int2Type<false> is_full_tile,
        Int2Type<false> is_aligned)
    {
        typedef SampleT AliasedSamples[SAMPLES_PER_THREAD];

        BlockLoadSampleT(temp_storage.aliasable.sample_load).Load(
            d_wrapped_samples + block_offset,
            reinterpret_cast<AliasedSamples&>(samples),
            valid_samples);
    }


    //---------------------------------------------------------------------
    // Tile processing
    //---------------------------------------------------------------------

    // Consume a tile of data samples
    template <
        bool IS_ALIGNED,        // Whether the tile offset is aligned (quad-aligned for single-channel, pixel-aligned for multi-channel)
        bool IS_FULL_TILE>      // Whether the tile is full
    __device__ __forceinline__ void ConsumeTile(OffsetT block_offset, int valid_samples)
    {
        SampleT     samples[PIXELS_PER_THREAD][NUM_CHANNELS];
        bool        is_valid[PIXELS_PER_THREAD];

        // Load tile
        LoadTile(
            block_offset,
            valid_samples,
            samples,
            Int2Type<IS_FULL_TILE>(),
            Int2Type<IS_ALIGNED>());

        // Set valid flags
        #pragma unroll
        for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD; ++PIXEL)
            is_valid[PIXEL] = IS_FULL_TILE || (((threadIdx.x * PIXELS_PER_THREAD + PIXEL) * NUM_CHANNELS) < valid_samples);

        // Accumulate samples
#if CUB_PTX_ARCH >= 120
        if (prefer_smem)
            AccumulateSmemPixels(samples, is_valid);
        else
            AccumulateGmemPixels(samples, is_valid);
#else
        AccumulateGmemPixels(samples, is_valid);
#endif

    }


    // Consume row tiles.  Specialized for work-stealing from queue
    template <bool IS_ALIGNED>
    __device__ __forceinline__ void ConsumeTiles(
        OffsetT             num_row_pixels,             ///< The number of multi-channel pixels per row in the region of interest
        OffsetT             num_rows,                   ///< The number of rows in the region of interest
        OffsetT             row_stride_samples,         ///< The number of samples between starts of consecutive rows in the region of interest
        int                 tiles_per_row,              ///< Number of image tiles per row
        GridQueue<int>      tile_queue,
        Int2Type<true>      is_work_stealing)
    {

        int         num_tiles                   = num_rows * tiles_per_row;
        int         tile_idx                    = (blockIdx.y  * gridDim.x) + blockIdx.x;
        OffsetT     num_even_share_tiles        = gridDim.x * gridDim.y;

        while (tile_idx < num_tiles)
        {
            int     row             = tile_idx / tiles_per_row;
            int     col             = tile_idx - (row * tiles_per_row);
            OffsetT row_offset      = row * row_stride_samples;
            OffsetT col_offset      = (col * TILE_SAMPLES);
            OffsetT tile_offset     = row_offset + col_offset;

            if (col == tiles_per_row - 1)
            {
                // Consume a partially-full tile at the end of the row
                OffsetT num_remaining = (num_row_pixels * NUM_CHANNELS) - col_offset;
                ConsumeTile<IS_ALIGNED, false>(tile_offset, num_remaining);
            } 
            else
            {
                // Consume full tile
                ConsumeTile<IS_ALIGNED, true>(tile_offset, TILE_SAMPLES);
            }

            CTA_SYNC();

            // Get next tile
            if (threadIdx.x == 0)
                temp_storage.tile_idx = tile_queue.Drain(1) + num_even_share_tiles;

            CTA_SYNC();

            tile_idx = temp_storage.tile_idx;
        }
    }


    // Consume row tiles.  Specialized for even-share (striped across thread blocks)
    template <bool IS_ALIGNED>
    __device__ __forceinline__ void ConsumeTiles(
        OffsetT             num_row_pixels,             ///< The number of multi-channel pixels per row in the region of interest
        OffsetT             num_rows,                   ///< The number of rows in the region of interest
        OffsetT             row_stride_samples,         ///< The number of samples between starts of consecutive rows in the region of interest
        int                 tiles_per_row,              ///< Number of image tiles per row
        GridQueue<int>      tile_queue,
        Int2Type<false>     is_work_stealing)
    {
        for (int row = blockIdx.y; row < num_rows; row += gridDim.y)
        {
            OffsetT row_begin   = row * row_stride_samples;
            OffsetT row_end     = row_begin + (num_row_pixels * NUM_CHANNELS);
            OffsetT tile_offset = row_begin + (blockIdx.x * TILE_SAMPLES);

            while (tile_offset < row_end)
            {
                OffsetT num_remaining = row_end - tile_offset;

                if (num_remaining < TILE_SAMPLES)
                {
                    // Consume partial tile
                    ConsumeTile<IS_ALIGNED, false>(tile_offset, num_remaining);
                    break;
                }

                // Consume full tile
                ConsumeTile<IS_ALIGNED, true>(tile_offset, TILE_SAMPLES);
                tile_offset += gridDim.x * TILE_SAMPLES;
            }
        }
    }


    //---------------------------------------------------------------------
    // Parameter extraction
    //---------------------------------------------------------------------

    // Return a native pixel pointer (specialized for CacheModifiedInputIterator types)
    template <
        CacheLoadModifier   _MODIFIER,
        typename            _ValueT,
        typename            _OffsetT>
    __device__ __forceinline__ SampleT* NativePointer(CacheModifiedInputIterator<_MODIFIER, _ValueT, _OffsetT> itr)
    {
        return itr.ptr;
    }

    // Return a native pixel pointer (specialized for other types)
    template <typename IteratorT>
    __device__ __forceinline__ SampleT* NativePointer(IteratorT itr)
    {
        return NULL;
    }



    //---------------------------------------------------------------------
    // Interface
    //---------------------------------------------------------------------


    /**
     * Constructor
     */
    __device__ __forceinline__ AgentHistogram(
        TempStorage         &temp_storage,                                      ///< Reference to temp_storage
        SampleIteratorT     d_samples,                                          ///< Input data to reduce
        int                 (&num_output_bins)[NUM_ACTIVE_CHANNELS],            ///< The number bins per final output histogram
        int                 (&num_privatized_bins)[NUM_ACTIVE_CHANNELS],        ///< The number bins per privatized histogram
        CounterT*           (&d_output_histograms)[NUM_ACTIVE_CHANNELS],        ///< Reference to final output histograms
        CounterT*           (&d_privatized_histograms)[NUM_ACTIVE_CHANNELS],    ///< Reference to privatized histograms
        OutputDecodeOpT     (&output_decode_op)[NUM_ACTIVE_CHANNELS],           ///< The transform operator for determining output bin-ids from privatized counter indices, one for each channel
        PrivatizedDecodeOpT (&privatized_decode_op)[NUM_ACTIVE_CHANNELS])       ///< The transform operator for determining privatized counter indices from samples, one for each channel
    :
        temp_storage(temp_storage.Alias()),
        d_wrapped_samples(d_samples),
        num_output_bins(num_output_bins),
        num_privatized_bins(num_privatized_bins),
        d_output_histograms(d_output_histograms),
        privatized_decode_op(privatized_decode_op),
        output_decode_op(output_decode_op),
        d_native_samples(NativePointer(d_wrapped_samples)),
        prefer_smem((MEM_PREFERENCE == SMEM) ?
            true :                              // prefer smem privatized histograms
            (MEM_PREFERENCE == GMEM) ?
                false :                         // prefer gmem privatized histograms
                blockIdx.x & 1)                 // prefer blended privatized histograms
    {
        int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;

        // Initialize the locations of this block's privatized histograms
        for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL)
            this->d_privatized_histograms[CHANNEL] = d_privatized_histograms[CHANNEL] + (blockId * num_privatized_bins[CHANNEL]);
    }


    /**
     * Consume image
     */
    __device__ __forceinline__ void ConsumeTiles(
        OffsetT             num_row_pixels,             ///< The number of multi-channel pixels per row in the region of interest
        OffsetT             num_rows,                   ///< The number of rows in the region of interest
        OffsetT             row_stride_samples,         ///< The number of samples between starts of consecutive rows in the region of interest
        int                 tiles_per_row,              ///< Number of image tiles per row
        GridQueue<int>      tile_queue)                 ///< Queue descriptor for assigning tiles of work to thread blocks
    {
        // Check whether all row starting offsets are quad-aligned (in single-channel) or pixel-aligned (in multi-channel)
        int     quad_mask           = AlignBytes<QuadT>::ALIGN_BYTES - 1;
        int     pixel_mask          = AlignBytes<PixelT>::ALIGN_BYTES - 1;
        size_t  row_bytes           = sizeof(SampleT) * row_stride_samples;

        bool quad_aligned_rows      = (NUM_CHANNELS == 1) && (SAMPLES_PER_THREAD % 4 == 0) &&     // Single channel
                                        ((size_t(d_native_samples) & quad_mask) == 0) &&        // ptr is quad-aligned
                                        ((num_rows == 1) || ((row_bytes & quad_mask) == 0));    // number of row-samples is a multiple of the alignment of the quad

        bool pixel_aligned_rows     = (NUM_CHANNELS > 1) &&                                     // Multi channel
                                        ((size_t(d_native_samples) & pixel_mask) == 0) &&       // ptr is pixel-aligned
                                        ((row_bytes & pixel_mask) == 0);                        // number of row-samples is a multiple of the alignment of the pixel

        // Whether rows are aligned and can be vectorized
        if ((d_native_samples != NULL) && (quad_aligned_rows || pixel_aligned_rows))
            ConsumeTiles<true>(num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue, Int2Type<IS_WORK_STEALING>());
        else
            ConsumeTiles<false>(num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue, Int2Type<IS_WORK_STEALING>());
    }


    /**
     * Initialize privatized bin counters.  Specialized for privatized shared-memory counters
     */
    __device__ __forceinline__ void InitBinCounters()
    {
        if (prefer_smem)
            InitSmemBinCounters();
        else
            InitGmemBinCounters();
    }


    /**
     * Store privatized histogram to device-accessible memory.  Specialized for privatized shared-memory counters
     */
    __device__ __forceinline__ void StoreOutput()
    {
        if (prefer_smem)
            StoreSmemOutput();
        else
            StoreGmemOutput();
    }


};




}               // CUB namespace
CUB_NS_POSTFIX  // Optional outer namespace(s)



================================================
FILE: external/cub/cub/agent/agent_radix_sort_downsweep.cuh
================================================
/******************************************************************************
 * Copyright (c) 2011, Duane Merrill.  All rights reserved.
 * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of the NVIDIA CORPORATION nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 ******************************************************************************/

/**
 * \file
 * AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort downsweep .
 */


#pragma once

#include <stdint.h>

#include "../thread/thread_load.cuh"
#include "../block/block_load.cuh"
#include "../block/block_store.cuh"
#include "../block/block_radix_rank.cuh"
#include "../block/block_exchange.cuh"
#include "../util_type.cuh"
#include "../iterator/cache_modified_input_iterator.cuh"
#include "../util_namespace.cuh"

/// Optional outer namespace(s)
CUB_NS_PREFIX

/// CUB namespace
namespace cub {


/******************************************************************************
 * Tuning policy types
 ******************************************************************************/

/**
 * Radix ranking algorithm
 */
enum RadixRankAlgorithm
{
    RADIX_RANK_BASIC,
    RADIX_RANK_MEMOIZE,
    RADIX_RANK_MATCH
};

/**
 * Parameterizable tuning policy type for AgentRadixSortDownsweep
 */
template <
    int                         _BLOCK_THREADS,         ///< Threads per thread block
    int                         _ITEMS_PER_THREAD,      ///< Items per thread (per tile of input)
    BlockLoadAlgorithm          _LOAD_ALGORITHM,        ///< The BlockLoad algorithm to use
    CacheLoadModifier           _LOAD_MODIFIER,         ///< Cache load modifier for reading keys (and values)
    RadixRankAlgorithm          _RANK_ALGORITHM,        ///< The radix ranking algorithm to use
    BlockScanAlgorithm          _SCAN_ALGORITHM,        ///< The block scan algorithm to use
    int                         _RADIX_BITS>            ///< The number of radix bits, i.e., log2(bins)
struct AgentRadixSortDownsweepPolicy
{
    enum
    {
        BLOCK_THREADS           = _BLOCK_THREADS,           ///< Threads per thread block
        ITEMS_PER_THREAD        = _ITEMS_PER_THREAD,        ///< Items per thread (per tile of input)
        RADIX_BITS              = _RADIX_BITS,              ///< The number of radix bits, i.e., log2(bins)
    };

    static const BlockLoadAlgorithm  LOAD_ALGORITHM     = _LOAD_ALGORITHM;    ///< The BlockLoad algorithm to use
    static const CacheLoadModifier   LOAD_MODIFIER      = _LOAD_MODIFIER;     ///< Cache load modifier for reading keys (and values)
    static const RadixRankAlgorithm  RANK_ALGORITHM     = _RANK_ALGORITHM;    ///< The radix ranking algorithm to use
    static const BlockScanAlgorithm  SCAN_ALGORITHM     = _SCAN_ALGORITHM;    ///< The BlockScan algorithm to use
};


/******************************************************************************
 * Thread block abstractions
 ******************************************************************************/





/**
 * \brief AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort downsweep .
 */
template <
    typename AgentRadixSortDownsweepPolicy,     ///< Parameterized AgentRadixSortDownsweepPolicy tuning policy type
    bool     IS_DESCENDING,                     ///< Whether or not the sorted-order is high-to-low
    typename KeyT,                              ///< KeyT type
    typename ValueT,                            ///< ValueT type
    typename OffsetT>                           ///< Signed integer type for global offsets
struct AgentRadixSortDownsweep
{
    //---------------------------------------------------------------------
    // Type definitions and constants
    //---------------------------------------------------------------------

    // Appropriate unsigned-bits representation of KeyT
    typedef typename Traits<KeyT>::UnsignedBits UnsignedBits;

    static const UnsignedBits           LOWEST_KEY  = Traits<KeyT>::LOWEST_KEY;
    static const UnsignedBits           MAX_KEY     = Traits<KeyT>::MAX_KEY;

    static const BlockLoadAlgorithm     LOAD_ALGORITHM  = AgentRadixSortDownsweepPolicy::LOAD_ALGORITHM;
    static const CacheLoadModifier      LOAD_MODIFIER   = AgentRadixSortDownsweepPolicy::LOAD_MODIFIER;
    static const RadixRankAlgorithm     RANK_ALGORITHM  = AgentRadixSortDownsweepPolicy::RANK_ALGORITHM;
    static const BlockScanAlgorithm     SCAN_ALGORITHM  = AgentRadixSortDownsweepPolicy::SCAN_ALGORITHM;

    enum
    {
        BLOCK_THREADS           = AgentRadixSortDownsweepPolicy::BLOCK_THREADS,
        ITEMS_PER_THREAD        = AgentRadixSortDownsweepPolicy::ITEMS_PER_THREAD,
        RADIX_BITS              = AgentRadixSortDownsweepPolicy::RADIX_BITS,
        TILE_ITEMS              = BLOCK_THREADS * ITEMS_PER_THREAD,

        RADIX_DIGITS            = 1 << RADIX_BITS,
        KEYS_ONLY               = Equals<ValueT, NullType>::VALUE,
    };

    // Input iterator wrapper type (for applying cache modifier)s
    typedef CacheModifiedInputIterator<LOAD_MODIFIER, UnsignedBits, OffsetT>    KeysItr;
    typedef CacheModifiedInputIterator<LOAD_MODIFIER, ValueT, OffsetT>          ValuesItr;

    // Radix ranking type to use
    typedef typename If<(RANK_ALGORITHM == RADIX_RANK_BASIC),
            BlockRadixRank<BLOCK_THREADS, RADIX_BITS, IS_DESCENDING, false, SCAN_ALGORITHM>,
            typename If<(RANK_ALGORITHM == RADIX_RANK_MEMOIZE),
                BlockRadixRank<BLOCK_THREADS, RADIX_BITS, IS_DESCENDING, true, SCAN_ALGORITHM>,
                BlockRadixRankMatch<BLOCK_THREADS, RADIX_BITS, IS_DESCENDING, SCAN_ALGORITHM>
            >::Type
        >::Type BlockRadixRankT;

    enum
    {
        /// Number of bin-starting offsets tracked per thread
        BINS_TRACKED_PER_THREAD = BlockRadixRankT::BINS_TRACKED_PER_THREAD
    };

    // BlockLoad type (keys)
    typedef BlockLoad<
        UnsignedBits,
        BLOCK_THREADS,
        ITEMS_PER_THREAD,
        LOAD_ALGORITHM> BlockLoadKeysT;

    // BlockLoad type (values)
    typedef BlockLoad<
        ValueT,
        BLOCK_THREADS,
        ITEMS_PER_THREAD,
        LOAD_ALGORITHM> BlockLoadValuesT;

    // Value exchange array type
    typedef ValueT ValueExchangeT[TILE_ITEMS];

    /**
     * Shared memory storage layout
     */
    union __align__(16) _TempStorage
    {
        typename BlockLoadKeysT::TempStorage    load_keys;
        typename BlockLoadValuesT::TempStorage  load_values;
        typename BlockRadixRankT::TempStorage   radix_rank;

        struct
        {
            UnsignedBits                        exchange_keys[TILE_ITEMS];
            OffsetT                             relative_bin_offsets[RADIX_DIGITS];
        };

        Uninitialized<ValueExchangeT>           exchange_values;

        OffsetT                                 exclusive_digit_prefix[RADIX_DIGITS];
    };


    /// Alias wrapper allowing storage to be unioned
    struct TempStorage : Uninitialized<_TempStorage> {};


    //---------------------------------------------------------------------
    // Thread fields
    //---------------------------------------------------------------------

    // Shared storage for this CTA
    _TempStorage    &temp_storage;

    // Input and output device pointers
    KeysItr         d_keys_in;
    ValuesItr       d_values_in;
    UnsignedBits    *d_keys_out;
    ValueT          *d_values_out;

    // The global scatter base offset for each digit (valid in the first RADIX_DIGITS threads)
    OffsetT         bin_offset[BINS_TRACKED_PER_THREAD];

    // The least-significant bit position of the current digit to extract
    int             current_bit;

    // Number of bits in current digit
    int             num_bits;

    // Whether to short-cirucit
    int             short_circuit;

    //---------------------------------------------------------------------
    // Utility methods
    //---------------------------------------------------------------------


    /**
     * Scatter ranked keys through shared memory, then to device-accessible memory
     */
    template <bool FULL_TILE>
    __device__ __forceinline__ void ScatterKeys(
        UnsignedBits    (&twiddled_keys)[ITEMS_PER_THREAD],
        OffsetT         (&relative_bin_offsets)[ITEMS_PER_THREAD],
        int             (&ranks)[ITEMS_PER_THREAD],
        OffsetT         valid_items)
    {
        #pragma unroll
        for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM)
        {
            temp_storage.exchange_keys[ranks[ITEM]] = twiddled_keys[ITEM];
        }

        CTA_SYNC();

        #pragma unroll
        for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM)
        {
            UnsignedBits key            = temp_storage.exchange_keys[threadIdx.x + (ITEM * BLOCK_THREADS)];
            UnsignedBits digit          = BFE(key, current_bit, num_bits);
            relative_bin_offsets[ITEM]  = temp_storage.relative_bin_offsets[digit];

            // Un-twiddle
            key = Traits<KeyT>::TwiddleOut(key);

            if (FULL_TILE || 
                (static_cast<OffsetT>(threadIdx.x + (ITEM * BLOCK_THREADS)) < valid_items))
            {
                d_keys_out[relative_bin_offsets[ITEM] + threadIdx.x + (ITEM * BLOCK_THREADS)] = key;
            }
        }
    }


    /**
     * Scatter ranked values through shared memory, then to device-accessible memory
     */
    template <bool FULL_TILE>
    __device__ __forceinline__ void ScatterValues(
        ValueT      (&values)[ITEMS_PER_THREAD],
        OffsetT     (&relative_bin_offsets)[ITEMS_PER_THREAD],
        int         (&ranks)[ITEMS_PER_THREAD],
        OffsetT     valid_items)
    {
        CTA_SYNC();

        ValueExchangeT &exchange_values = temp_storage.exchange_values.Alias();

        #pragma unroll
        for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM)
        {
            exchange_values[ranks[ITEM]] = values[ITEM];
        }

        CTA_SYNC();

        #pragma unroll
        for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM)
        {
            ValueT value = exchange_values[threadIdx.x + (ITEM * BLOCK_THREADS)];

            if (FULL_TILE || 
                (static_cast<OffsetT>(threadIdx.x + (ITEM * BLOCK_THREADS)) < valid_items))
            {
                d_values_out[relative_bin_offsets[ITEM] + threadIdx.x + (ITEM * BLOCK_THREADS)] = value;
            }
        }
    }

    /**
     * Load a tile of keys (specialized for full tile, any ranking algorithm)
     */
    template <int _RANK_ALGORITHM>
    __device__ __forceinline__ void LoadKeys(
        UnsignedBits                (&keys)[ITEMS_PER_THREAD],
        OffsetT                     block_offset,
        OffsetT                     valid_items,
        UnsignedBits                oob_item,
        Int2Type<true>              is_full_tile,
        Int2Type<_RANK_ALGORITHM>   rank_algorithm)
    {
        BlockLoadKeysT(temp_storage.load_keys).Load(
            d_keys_in + block_offset, keys);

        CTA_SYNC();
    }


    /**
     * Load a tile of keys (specialized for partial tile, any ranking algorithm)
     */
    template <int _RANK_ALGORITHM>
    __device__ __forceinline__ void LoadKeys(
        UnsignedBits                (&keys)[ITEMS_PER_THREAD],
        OffsetT                     block_offset,
        OffsetT                     valid_items,
        UnsignedBits                oob_item,
        Int2Type<false>             is_full_tile,
        Int2Type<_RANK_ALGORITHM>   rank_algorithm)
    {
        BlockLoadKeysT(temp_storage.load_keys).Load(
            d_keys_in + block_offset, keys, valid_items, oob_item);

        CTA_SYNC();
    }


    /**
     * Load a tile of keys (specialized for full tile, match ranking algorithm)
     */
    __device__ __forceinline__ void LoadKeys(
        UnsignedBits                (&keys)[ITEMS_PER_THREAD],
        OffsetT                     block_offset,
        OffsetT                     valid_items,
        UnsignedBits                oob_item,
        Int2Type<true>              is_full_tile,
        Int2Type<RADIX_RANK_MATCH>  rank_algorithm)
    {
        LoadDirectWarpStriped(threadIdx.x, d_keys_in + block_offset, keys);
    }


    /**
     * Load a tile of keys (specialized for partial tile, match ranking algorithm)
     */
    __device__ __forceinline__ void LoadKeys(
        UnsignedBits                (&keys)[ITEMS_PER_THREAD],
        OffsetT                     block_offset,
        OffsetT                     valid_items,
        UnsignedBits                oob_item,
        Int2Type<false>             is_full_tile,
        Int2Type<RADIX_RANK_MATCH>  rank_algorithm)
    {
        LoadDirectWarpStriped(threadIdx.x, d_keys_in + block_offset, keys, valid_items, oob_item);
    }


    /**
     * Load a tile of values (specialized for full tile, any ranking algorithm)
     */
    template <int _RANK_ALGORITHM>
    __device__ __forceinline__ void LoadValues(
        ValueT                      (&values)[ITEMS_PER_THREAD],
        OffsetT                     block_offset,
        OffsetT                     valid_items,
        Int2Type<true>              is_full_tile,
        Int2Type<_RANK_ALGORITHM>   rank_algorithm)
    {
        BlockLoadValuesT(temp_storage.load_values).Load(
            d_values_in + block_offset, values);

        CTA_SYNC();
    }


    /**
     * Load a tile of values (specialized for partial tile, any ranking algorithm)
     */
    template <int _RANK_ALGORITHM>
    __device__ __forceinline__ void LoadValues(
        ValueT                      (&values)[ITEMS_PER_THREAD],
        OffsetT                     block_offset,
        OffsetT                     valid_items,
        Int2Type<false>             is_full_tile,
        Int2Type<_RANK_ALGORITHM>   rank_algorithm)
    {
        BlockLoadValuesT(temp_storage.load_values).Load(
            d_values_in + block_offset, values, valid_items);

        CTA_SYNC();
    }


    /**
     * Load a tile of items (specialized for full tile, match ranking algorithm)
     */
    __device__ __forceinline__ void LoadValues(
        ValueT                      (&values)[ITEMS_PER_THREAD],
        OffsetT                     block_offset,
        volatile OffsetT                     valid_items,
        Int2Type<true>              is_full_tile,
        Int2Type<RADIX_RANK_MATCH>  rank_algorithm)
    {
        LoadDirectWarpStriped(threadIdx.x, d_values_in + block_offset, values);
    }


    /**
     * Load a tile of items (specialized for partial tile, match ranking algorithm)
     */
    __device__ __forceinline__ void LoadValues(
        ValueT                      (&values)[ITEMS_PER_THREAD],
        OffsetT                     block_offset,
        volatile OffsetT                     valid_items,
        Int2Type<false>             is_full_tile,
        Int2Type<RADIX_RANK_MATCH>  rank_algorithm)
    {
        LoadDirectWarpStriped(threadIdx.x, d_values_in + block_offset, values, valid_items);
    }


    /**
     * Truck along associated values
     */
    template <bool FULL_TILE>
    __device__ __forceinline__ void GatherScatterValues(
        OffsetT         (&relative_bin_offsets)[ITEMS_PER_THREAD],
        int             (&ranks)[ITEMS_PER_THREAD],
        OffsetT         block_offset,
        OffsetT         valid_items,
        Int2Type<false> /*is_keys_only*/)
    {
        CTA_SYNC();

        ValueT values[ITEMS_PER_THREAD];

        LoadValues(
            values,
            block_offset,
            valid_items,
            Int2Type<FULL_TILE>(),
            Int2Type<RANK_ALGORITHM>());

        ScatterValues<FULL_TILE>(
            values,
            relative_bin_offsets,
            ranks,
            valid_items);
    }


    /**
     * Truck along associated values (specialized for key-only sorting)
     */
    template <bool FULL_TILE>
    __device__ __forceinline__ void GatherScatterValues(
        OffsetT         (&/*relative_bin_offsets*/)[ITEMS_PER_THREAD],
        int             (&/*ranks*/)[ITEMS_PER_THREAD],
        OffsetT         /*block_offset*/,
        OffsetT         /*valid_items*/,
        Int2Type<true>  /*is_keys_only*/)
    {}


    /**
     * Process tile
     */
    template <bool FULL_TILE>
    __device__ __forceinline__ void ProcessTile(
        OffsetT block_offset,
        const OffsetT &valid_items = TILE_ITEMS)
    {
        UnsignedBits    keys[ITEMS_PER_THREAD];
        int             ranks[ITEMS_PER_THREAD];
        OffsetT         relative_bin_offsets[ITEMS_PER_THREAD];

        // Assign default (min/max) value to all keys
        UnsignedBits default_key = (IS_DESCENDING) ? LOWEST_KEY : MAX_KEY;

        // Load tile of keys
        LoadKeys(
            keys,
            block_offset,
            valid_items, 
            default_key,
            Int2Type<FULL_TILE>(),
            Int2Type<RANK_ALGORITHM>());

        // Twiddle key bits if necessary
        #pragma unroll
        for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++)
        {
            keys[KEY] = Traits<KeyT>::TwiddleIn(keys[KEY]);
        }

        // Rank the twiddled keys
        int exclusive_digit_prefix[BINS_TRACKED_PER_THREAD];
        BlockRadixRankT(temp_storage.radix_rank).RankKeys(
            keys,
            ranks,
            current_bit,
            num_bits,
            exclusive_digit_prefix);

        CTA_SYNC();

        // Share exclusive digit prefix
        #pragma unroll
        for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track)
        {
            int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track;
            if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS))
            {
                // Store exclusive prefix
                temp_storage.exclusive_digit_prefix[bin_idx] =
                    exclusive_digit_prefix[track];
            }
        }

        CTA_SYNC();

        // Get inclusive digit prefix
        int inclusive_digit_prefix[BINS_TRACKED_PER_THREAD];

        #pragma unroll
        for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track)
        {
            int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track;
            if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS))
            {
                if (IS_DESCENDING)
                {
                    // Get inclusive digit prefix from exclusive prefix (higher bins come first)
                    inclusive_digit_prefix[track] = (bin_idx == 0) ?
                        (BLOCK_THREADS * ITEMS_PER_THREAD) :
                        temp_storage.exclusive_digit_prefix[bin_idx - 1];
                }
                else
                {
                    // Get inclusive digit prefix from exclusive prefix (lower bins come first)
                    inclusive_digit_prefix[track] = (bin_idx == RADIX_DIGITS - 1) ?
                        (BLOCK_THREADS * ITEMS_PER_THREAD) :
                        temp_storage.exclusive_digit_prefix[bin_idx + 1];
                }
            }
        }

        CTA_SYNC();

        // Update global scatter base offsets for each digit
        #pragma unroll
        for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track)
        {
            int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track;
            if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS))
            {
                bin_offset[track] -= exclusive_digit_prefix[track];
                temp_storage.relative_bin_offsets[bin_idx] = bin_offset[track];
                bin_offset[track] += inclusive_digit_prefix[track];
            }
        }

        CTA_SYNC();

        // Scatter keys
        ScatterKeys<FULL_TILE>(keys, relative_bin_offsets, ranks, valid_items);

        // Gather/scatter values
        GatherScatterValues<FULL_TILE>(relative_bin_offsets , ranks, block_offset, valid_items, Int2Type<KEYS_ONLY>());
    }

    //---------------------------------------------------------------------
    // Copy shortcut
    //---------------------------------------------------------------------

    /**
     * Copy tiles within the range of input
     */
    template <
        typename InputIteratorT,
        typename T>
    __device__ __forceinline__ void Copy(
        InputIteratorT  d_in,
        T               *d_out,
        OffsetT         block_offset,
        OffsetT         block_end)
    {
        // Simply copy the input
        while (block_offset + TILE_ITEMS <= block_end)
        {
            T items[ITEMS_PER_THREAD];

            LoadDirectStriped<BLOCK_THREADS>(threadIdx.x, d_in + block_offset, items);
            CTA_SYNC();
            StoreDirectStriped<BLOCK_THREADS>(threadIdx.x, d_out + block_offset, items);

            block_offset += TILE_ITEMS;
        }

        // Clean up last partial tile with guarded-I/O
        if (block_offset < block_end)
        {
            OffsetT valid_items = block_end - block_offset;

            T items[ITEMS_PER_THREAD];

            LoadDirectStriped<BLOCK_THREADS>(threadIdx.x, d_in + block_offset, items, valid_items);
            CTA_SYNC();
            StoreDirectStriped<BLOCK_THREADS>(threadIdx.x, d_out + block_offset, items, valid_items);
        }
    }


    /**
     * Copy tiles within the range of input (specialized for NullType)
     */
    template <typename InputIteratorT>
    __device__ __forceinline__ void Copy(
        InputIteratorT  /*d_in*/,
        NullType        * /*d_out*/,
        OffsetT         /*block_offset*/,
        OffsetT         /*block_end*/)
    {}


    //---------------------------------------------------------------------
    // Interface
    //---------------------------------------------------------------------

    /**
     * Constructor
     */
    __device__ __forceinline__ AgentRadixSortDownsweep(
        TempStorage     &temp_storage,
        OffsetT         (&bin_offset)[BINS_TRACKED_PER_THREAD],
        OffsetT         num_items,
        const KeyT      *d_keys_in,
        KeyT            *d_keys_out,
        const ValueT    *d_values_in,
        ValueT          *d_values_out,
        int             current_bit,
        int             num_bits)
    :
        temp_storage(temp_storage.Alias()),
        d_keys_in(reinterpret_cast<const UnsignedBits*>(d_keys_in)),
        d_values_in(d_values_in),
        d_keys_out(reinterpret_cast<UnsignedBits*>(d_keys_out)),
        d_values_out(d_values_out),
        current_bit(current_bit),
        num_bits(num_bits),
        short_circuit(1)
    {
        #pragma unroll
        for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track)
        {
            this->bin_offset[track] = bin_offset[track];

            int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track;
            if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS))
            {
                // Short circuit if the histogram has only bin counts of only zeros or problem-size
                short_circuit = short_circuit && ((bin_offset[track] == 0) || (bin_offset[track] == num_items));
            }
        }

        short_circuit = CTA_SYNC_AND(short_circuit);
    }


    /**
     * Constructor
     */
    __device__ __forceinline__ AgentRadixSortDownsweep(
        TempStorage     &temp_storage,
        OffsetT         num_items,
        OffsetT         *d_spine,
        const KeyT      *d_keys_in,
        KeyT            *d_keys_out,
        const ValueT    *d_values_in,
        ValueT          *d_values_out,
        int             current_bit,
        int             num_bits)
    :
        temp_storage(temp_storage.Alias()),
        d_keys_in(reinterpret_cast<const UnsignedBits*>(d_keys_in)),
        d_values_in(d_values_in),
        d_keys_out(reinterpret_cast<UnsignedBits*>(d_keys_out)),
        d_values_out(d_values_out),
        current_bit(current_bit),
        num_bits(num_bits),
        short_circuit(1)
    {
        #pragma unroll
        for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track)
        {
            int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track;

            // Load digit bin offsets (each of the first RADIX_DIGITS threads will load an offset for that digit)
            if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS))
            {
                if (IS_DESCENDING)
                    bin_idx = RADIX_DIGITS - bin_idx - 1;

                // Short circuit if the first block's histogram has only bin counts of only zeros or problem-size
                OffsetT first_block_bin_offset = d_spine[gridDim.x * bin_idx];
                short_circuit = short_circuit && ((first_block_bin_offset == 0) || (first_block_bin_offset == num_items));

                // Load my block's bin offset for my bin
                bin_offset[track] = d_spine[(gridDim.x * bin_idx) + blockIdx.x];
            }
        }

        short_circuit = CTA_SYNC_AND(short_circuit);
    }


    /**
     * Distribute keys from a segment of input tiles.
     */
    __device__ __forceinline__ void ProcessRegion(
        OffsetT   block_offset,
        OffsetT   block_end)
    {
        if (short_circuit)
        {
            // Copy keys
            Copy(d_keys_in, d_keys_out, block_offset, block_end);

            // Copy values
            Copy(d_values_in, d_values_out, block_offset, block_end);
        }
        else
        {
            // Process full tiles of tile_items
            while (block_offset + TILE_ITEMS <= block_end)
            {
                ProcessTile<true>(block_offset);
                block_offset += TILE_ITEMS;

                CTA_SYNC();
            }

            // Clean up last partial tile with guarded-I/O
            if (block_offset < block_end)
            {
                ProcessTile<false>(block_offset, block_end - block_offset);
            }

        }
    }

};



}               // CUB namespace
CUB_NS_POSTFIX  // Optional outer namespace(s)



================================================
FILE: external/cub/cub/agent/agent_radix_sort_upsweep.cuh
================================================
/******************************************************************************
 * Copyright (c) 2011, Duane Merrill.  All rights reserved.
 * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of the NVIDIA CORPORATION nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 ******************************************************************************/

/**
 * \file
 * AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep .
 */

#pragma once

#include "../thread/thread_reduce.cuh"
#include "../thread/thread_load.cuh"
#include "../warp/warp_reduce.cuh"
#include "../block/block_load.cuh"
#include "../util_type.cuh"
#include "../iterator/cache_modified_input_iterator.cuh"
#include "../util_namespace.cuh"

/// Optional outer namespace(s)
CUB_NS_PREFIX

/// CUB namespace
namespace cub {

/******************************************************************************
 * Tuning policy types
 ******************************************************************************/

/**
 * Parameterizable tuning policy typ
Download .txt
gitextract_eoi4a78r/

├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── external/
│   └── cub/
│       ├── .cproject
│       ├── .project
│       ├── .settings/
│       │   ├── .gitignore
│       │   ├── org.eclipse.cdt.codan.core.prefs
│       │   ├── org.eclipse.cdt.core.prefs
│       │   ├── org.eclipse.cdt.ui.prefs
│       │   └── org.eclipse.core.runtime.prefs
│       ├── CHANGE_LOG.TXT
│       ├── LICENSE.TXT
│       ├── README.md
│       ├── common.mk
│       ├── cub/
│       │   ├── agent/
│       │   │   ├── agent_histogram.cuh
│       │   │   ├── agent_radix_sort_downsweep.cuh
│       │   │   ├── agent_radix_sort_upsweep.cuh
│       │   │   ├── agent_reduce.cuh
│       │   │   ├── agent_reduce_by_key.cuh
│       │   │   ├── agent_rle.cuh
│       │   │   ├── agent_scan.cuh
│       │   │   ├── agent_segment_fixup.cuh
│       │   │   ├── agent_select_if.cuh
│       │   │   ├── agent_spmv_orig.cuh
│       │   │   └── single_pass_scan_operators.cuh
│       │   ├── block/
│       │   │   ├── block_adjacent_difference.cuh
│       │   │   ├── block_discontinuity.cuh
│       │   │   ├── block_exchange.cuh
│       │   │   ├── block_histogram.cuh
│       │   │   ├── block_load.cuh
│       │   │   ├── block_radix_rank.cuh
│       │   │   ├── block_radix_sort.cuh
│       │   │   ├── block_raking_layout.cuh
│       │   │   ├── block_reduce.cuh
│       │   │   ├── block_scan.cuh
│       │   │   ├── block_shuffle.cuh
│       │   │   ├── block_store.cuh
│       │   │   └── specializations/
│       │   │       ├── block_histogram_atomic.cuh
│       │   │       ├── block_histogram_sort.cuh
│       │   │       ├── block_reduce_raking.cuh
│       │   │       ├── block_reduce_raking_commutative_only.cuh
│       │   │       ├── block_reduce_warp_reductions.cuh
│       │   │       ├── block_scan_raking.cuh
│       │   │       ├── block_scan_warp_scans.cuh
│       │   │       ├── block_scan_warp_scans2.cuh
│       │   │       └── block_scan_warp_scans3.cuh
│       │   ├── cub.cuh
│       │   ├── device/
│       │   │   ├── device_histogram.cuh
│       │   │   ├── device_partition.cuh
│       │   │   ├── device_radix_sort.cuh
│       │   │   ├── device_reduce.cuh
│       │   │   ├── device_run_length_encode.cuh
│       │   │   ├── device_scan.cuh
│       │   │   ├── device_segmented_radix_sort.cuh
│       │   │   ├── device_segmented_reduce.cuh
│       │   │   ├── device_select.cuh
│       │   │   ├── device_spmv.cuh
│       │   │   └── dispatch/
│       │   │       ├── dispatch_histogram.cuh
│       │   │       ├── dispatch_radix_sort.cuh
│       │   │       ├── dispatch_reduce.cuh
│       │   │       ├── dispatch_reduce_by_key.cuh
│       │   │       ├── dispatch_rle.cuh
│       │   │       ├── dispatch_scan.cuh
│       │   │       ├── dispatch_select_if.cuh
│       │   │       └── dispatch_spmv_orig.cuh
│       │   ├── grid/
│       │   │   ├── grid_barrier.cuh
│       │   │   ├── grid_even_share.cuh
│       │   │   ├── grid_mapping.cuh
│       │   │   └── grid_queue.cuh
│       │   ├── host/
│       │   │   └── mutex.cuh
│       │   ├── iterator/
│       │   │   ├── arg_index_input_iterator.cuh
│       │   │   ├── cache_modified_input_iterator.cuh
│       │   │   ├── cache_modified_output_iterator.cuh
│       │   │   ├── constant_input_iterator.cuh
│       │   │   ├── counting_input_iterator.cuh
│       │   │   ├── discard_output_iterator.cuh
│       │   │   ├── tex_obj_input_iterator.cuh
│       │   │   ├── tex_ref_input_iterator.cuh
│       │   │   └── transform_input_iterator.cuh
│       │   ├── thread/
│       │   │   ├── thread_load.cuh
│       │   │   ├── thread_operators.cuh
│       │   │   ├── thread_reduce.cuh
│       │   │   ├── thread_scan.cuh
│       │   │   ├── thread_search.cuh
│       │   │   └── thread_store.cuh
│       │   ├── util_allocator.cuh
│       │   ├── util_arch.cuh
│       │   ├── util_debug.cuh
│       │   ├── util_device.cuh
│       │   ├── util_macro.cuh
│       │   ├── util_namespace.cuh
│       │   ├── util_ptx.cuh
│       │   ├── util_type.cuh
│       │   └── warp/
│       │       ├── specializations/
│       │       │   ├── warp_reduce_shfl.cuh
│       │       │   ├── warp_reduce_smem.cuh
│       │       │   ├── warp_scan_shfl.cuh
│       │       │   └── warp_scan_smem.cuh
│       │       ├── warp_reduce.cuh
│       │       └── warp_scan.cuh
│       ├── eclipse code style profile.xml
│       ├── examples/
│       │   ├── block/
│       │   │   ├── .gitignore
│       │   │   ├── Makefile
│       │   │   ├── example_block_radix_sort.cu
│       │   │   ├── example_block_reduce.cu
│       │   │   ├── example_block_scan.cu
│       │   │   └── reduce_by_key.cu
│       │   └── device/
│       │       ├── .gitignore
│       │       ├── Makefile
│       │       ├── example_device_partition_flagged.cu
│       │       ├── example_device_partition_if.cu
│       │       ├── example_device_radix_sort.cu
│       │       ├── example_device_reduce.cu
│       │       ├── example_device_scan.cu
│       │       ├── example_device_select_flagged.cu
│       │       ├── example_device_select_if.cu
│       │       ├── example_device_select_unique.cu
│       │       └── example_device_sort_find_non_trivial_runs.cu
│       ├── experimental/
│       │   ├── .gitignore
│       │   ├── Makefile
│       │   ├── defunct/
│       │   │   ├── example_coo_spmv.cu
│       │   │   └── test_device_seg_reduce.cu
│       │   ├── histogram/
│       │   │   ├── histogram_cub.h
│       │   │   ├── histogram_gmem_atomics.h
│       │   │   └── histogram_smem_atomics.h
│       │   ├── histogram_compare.cu
│       │   ├── sparse_matrix.h
│       │   ├── spmv_compare.cu
│       │   └── spmv_script.sh
│       ├── test/
│       │   ├── .gitignore
│       │   ├── Makefile
│       │   ├── link_a.cu
│       │   ├── link_b.cu
│       │   ├── link_main.cpp
│       │   ├── mersenne.h
│       │   ├── test_allocator.cu
│       │   ├── test_block_histogram.cu
│       │   ├── test_block_load_store.cu
│       │   ├── test_block_radix_sort.cu
│       │   ├── test_block_reduce.cu
│       │   ├── test_block_scan.cu
│       │   ├── test_device_histogram.cu
│       │   ├── test_device_radix_sort.cu
│       │   ├── test_device_reduce.cu
│       │   ├── test_device_reduce_by_key.cu
│       │   ├── test_device_run_length_encode.cu
│       │   ├── test_device_scan.cu
│       │   ├── test_device_select_if.cu
│       │   ├── test_device_select_unique.cu
│       │   ├── test_grid_barrier.cu
│       │   ├── test_iterator.cu
│       │   ├── test_util.h
│       │   ├── test_warp_reduce.cu
│       │   └── test_warp_scan.cu
│       └── tune/
│           ├── .gitignore
│           ├── Makefile
│           └── tune_device_reduce.cu
├── src/
│   ├── bitonicTopK.cuh
│   ├── radixSelectTopK.cuh
│   ├── sharedmem.cuh
│   ├── sortTopK.cuh
│   └── sortingNetwork.cuh
└── test/
    ├── compareTopKAlgorithms.cu
    ├── generateProblems.cuh
    └── printFunctions.cuh
Download .txt
SYMBOL INDEX (37 symbols across 7 files)

FILE: external/cub/experimental/histogram/histogram_cub.h
  type typename (line 48) | typedef typename If<is_float, float, unsigned char>::Type    SampleT;
  type typename (line 49) | typedef typename If<is_float, float, unsigned int>::Type     LevelT;

FILE: external/cub/experimental/histogram/histogram_gmem_atomics.h
  function namespace (line 30) | namespace histogram_gmem_atomics
  function run_gmem_atomics (line 138) | double run_gmem_atomics(

FILE: external/cub/experimental/histogram/histogram_smem_atomics.h
  function namespace (line 30) | namespace histogram_smem_atomics
  function run_smem_atomics (line 148) | double run_smem_atomics(

FILE: external/cub/experimental/sparse_matrix.h
  type CooTuple (line 141) | struct CooTuple
  function coo_tuples (line 181) | coo_tuples(NULL) {}
  function Clear (line 187) | void Clear()
  function Display (line 202) | void Display()
  function InitMetis (line 285) | void InitMetis(const string &metis_filename)
  function Clear (line 775) | void Clear()
  function GraphStats (line 810) | GraphStats Stats()
  function FromCoo (line 947) | void FromCoo(const CooMatrix<ValueT, OffsetT> &coo_matrix)
  function DisplayHistogram (line 1009) | void DisplayHistogram()
  function Display (line 1049) | void Display()
  function row_degrees (line 1078) | row_degrees(row_degrees) {}
  function row_degrees (line 1096) | row_degrees(row_degrees) {}
  type std (line 1133) | typedef std::set<OffsetT, OrderByLow<OffsetT> > UnlabeledSet;

FILE: external/cub/test/link_main.cpp
  function main (line 6) | int main()

FILE: external/cub/test/mersenne.h
  function namespace (line 46) | namespace mersenne {

FILE: external/cub/test/test_util.h
  type CommandLineArgs (line 75) | struct CommandLineArgs
  function CheckCmdLineFlag (line 128) | bool CheckCmdLineFlag(const char* arg_name)
  function ParsedArgc (line 234) | int ParsedArgc()
  function CoutCast (line 483) | int CoutCast(char val) { return val; }
  function CoutCast (line 485) | int CoutCast(unsigned char val) { return val; }
  function CoutCast (line 487) | int CoutCast(signed char val) { return val; }
  type GenMode (line 498) | enum GenMode
  type TestFoo (line 1004) | struct TestFoo
  function namespace (line 1088) | namespace cub {
  type TestBar (line 1125) | struct TestBar
  function namespace (line 1204) | namespace cub {
  function DisplayResults (line 1424) | void DisplayResults(
  type CpuTimer (line 1509) | struct CpuTimer
  function Start (line 1583) | void Start()
  function Stop (line 1588) | void Stop()
  function ElapsedMillis (line 1593) | float ElapsedMillis()
Condensed preview — 165 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (3,282K chars).
[
  {
    "path": ".gitignore",
    "chars": 26,
    "preview": "*.o\ncompareTopKAlgorithms\n"
  },
  {
    "path": "LICENSE",
    "chars": 1070,
    "preview": "MIT License\n\nCopyright (c) 2016 Anil Shanbhag\n\nPermission is hereby granted, free of charge, to any person obtaining a c"
  },
  {
    "path": "Makefile",
    "chars": 1146,
    "preview": "OS_SIZE = $(shell uname -m | sed -e \"s/i.86/32/\" -e \"s/x86_64/64/\")\n\nCUDA_PATH       ?= /usr/local/cuda-10.0\nCUDA_INC_PA"
  },
  {
    "path": "README.md",
    "chars": 3571,
    "preview": "GPU-TopK\n========\n\nGPU-TopK implements efficient top-k runtimes for GPUs. The specific problem solved is given a array o"
  },
  {
    "path": "external/cub/.cproject",
    "chars": 65123,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n<?fileVersion 4.0.0?><cproject storage_type_id=\"org.eclipse.cdt."
  },
  {
    "path": "external/cub/.project",
    "chars": 835,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<projectDescription>\r\n\t<name>GIT_CUB</name>\r\n\t<comment></comment>\r\n\t<projects>\r\n"
  },
  {
    "path": "external/cub/.settings/.gitignore",
    "chars": 23,
    "preview": "/language.settings.xml\n"
  },
  {
    "path": "external/cub/.settings/org.eclipse.cdt.codan.core.prefs",
    "chars": 10954,
    "preview": "eclipse.preferences.version=1\r\norg.eclipse.cdt.codan.checkers.errnoreturn=Warning\r\norg.eclipse.cdt.codan.checkers.errnor"
  },
  {
    "path": "external/cub/.settings/org.eclipse.cdt.core.prefs",
    "chars": 14273,
    "preview": "eclipse.preferences.version=1\r\nindexer/indexAllFiles=true\r\nindexer/indexAllHeaderVersions=false\r\nindexer/indexAllVersion"
  },
  {
    "path": "external/cub/.settings/org.eclipse.cdt.ui.prefs",
    "chars": 86,
    "preview": "eclipse.preferences.version=1\r\nformatter_profile=_B40C\r\nformatter_settings_version=1\r\n"
  },
  {
    "path": "external/cub/.settings/org.eclipse.core.runtime.prefs",
    "chars": 190,
    "preview": "content-types/enabled=true\r\ncontent-types/org.eclipse.cdt.core.cxxHeader/file-extensions=cuh\r\ncontent-types/org.eclipse."
  },
  {
    "path": "external/cub/CHANGE_LOG.TXT",
    "chars": 21397,
    "preview": "1.7.4    09/20/2017\n    - Bug fixes: \n        - Issue #114: Can't pair non-trivially-constructible values in radix sort\n"
  },
  {
    "path": "external/cub/LICENSE.TXT",
    "chars": 1600,
    "preview": "Copyright (c) 2010-2011, Duane Merrill.  All rights reserved.\r\nCopyright (c) 2011-2017, NVIDIA CORPORATION.  All rights "
  },
  {
    "path": "external/cub/README.md",
    "chars": 6130,
    "preview": "<hr>\n<h3>About CUB</h3>\n\nCurrent release: v1.7.4 (09/20/2017)\n\nWe recommend the [CUB Project Website](http://nvlabs.gith"
  },
  {
    "path": "external/cub/common.mk",
    "chars": 7688,
    "preview": "#/******************************************************************************\n# * Copyright (c) 2011, Duane Merrill. "
  },
  {
    "path": "external/cub/cub/agent/agent_histogram.cuh",
    "chars": 33331,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/agent/agent_radix_sort_downsweep.cuh",
    "chars": 27531,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/agent/agent_radix_sort_upsweep.cuh",
    "chars": 17917,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/agent/agent_reduce.cuh",
    "chars": 16916,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/agent/agent_reduce_by_key.cuh",
    "chars": 24915,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/agent/agent_rle.cuh",
    "chars": 35663,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/agent/agent_scan.cuh",
    "chars": 18729,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/agent/agent_segment_fixup.cuh",
    "chars": 16655,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/agent/agent_select_if.cuh",
    "chars": 29594,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/agent/agent_spmv_orig.cuh",
    "chars": 36363,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/agent/single_pass_scan_operators.cuh",
    "chars": 27444,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/block_adjacent_difference.cuh",
    "chars": 25011,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/block_discontinuity.cuh",
    "chars": 54550,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/block_exchange.cuh",
    "chars": 52305,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/block_histogram.cuh",
    "chars": 16291,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/block_load.cuh",
    "chars": 55321,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/block_radix_rank.cuh",
    "chars": 25246,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/block_radix_sort.cuh",
    "chars": 38279,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/block_raking_layout.cuh",
    "chars": 6181,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/block_reduce.cuh",
    "chars": 25300,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/block_scan.cuh",
    "chars": 102978,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/block_shuffle.cuh",
    "chars": 11964,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/block_store.cuh",
    "chars": 41449,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/specializations/block_histogram_atomic.cuh",
    "chars": 3285,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/specializations/block_histogram_sort.cuh",
    "chars": 8198,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/specializations/block_reduce_raking.cuh",
    "chars": 9583,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/specializations/block_reduce_raking_commutative_only.cuh",
    "chars": 8369,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/specializations/block_reduce_warp_reductions.cuh",
    "chars": 9942,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/specializations/block_scan_raking.cuh",
    "chars": 28460,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/specializations/block_scan_warp_scans.cuh",
    "chars": 19117,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/specializations/block_scan_warp_scans2.cuh",
    "chars": 20949,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/block/specializations/block_scan_warp_scans3.cuh",
    "chars": 19463,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/cub.cuh",
    "chars": 3626,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/device/device_histogram.cuh",
    "chars": 54347,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/device_partition.cuh",
    "chars": 13891,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/device_radix_sort.cuh",
    "chars": 42322,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/device_reduce.cuh",
    "chars": 38904,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/device_run_length_encode.cuh",
    "chars": 14817,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/device_scan.cuh",
    "chars": 21786,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/device_segmented_radix_sort.cuh",
    "chars": 54627,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/device_segmented_reduce.cuh",
    "chars": 36523,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/device_select.cuh",
    "chars": 18838,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/device_spmv.cuh",
    "chars": 8523,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/dispatch/dispatch_histogram.cuh",
    "chars": 57226,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/dispatch/dispatch_radix_sort.cuh",
    "chars": 82797,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/dispatch/dispatch_reduce.cuh",
    "chars": 42677,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/dispatch/dispatch_reduce_by_key.cuh",
    "chars": 25478,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/dispatch/dispatch_rle.cuh",
    "chars": 23601,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/dispatch/dispatch_scan.cuh",
    "chars": 22736,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/dispatch/dispatch_select_if.cuh",
    "chars": 24454,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/device/dispatch/dispatch_spmv_orig.cuh",
    "chars": 35265,
    "preview": "\n/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  "
  },
  {
    "path": "external/cub/cub/grid/grid_barrier.cuh",
    "chars": 5861,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/grid/grid_even_share.cuh",
    "chars": 8198,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/grid/grid_mapping.cuh",
    "chars": 4827,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/grid/grid_queue.cuh",
    "chars": 7475,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/host/mutex.cuh",
    "chars": 4581,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/iterator/arg_index_input_iterator.cuh",
    "chars": 8781,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/iterator/cache_modified_input_iterator.cuh",
    "chars": 8104,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/iterator/cache_modified_output_iterator.cuh",
    "chars": 8322,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/iterator/constant_input_iterator.cuh",
    "chars": 7650,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/iterator/counting_input_iterator.cuh",
    "chars": 7369,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/iterator/discard_output_iterator.cuh",
    "chars": 6817,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/iterator/tex_obj_input_iterator.cuh",
    "chars": 10546,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/iterator/tex_ref_input_iterator.cuh",
    "chars": 12430,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/iterator/transform_input_iterator.cuh",
    "chars": 8611,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/thread/thread_load.cuh",
    "chars": 18869,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/thread/thread_operators.cuh",
    "chars": 9228,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/thread/thread_reduce.cuh",
    "chars": 6037,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/thread/thread_scan.cuh",
    "chars": 10566,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/thread/thread_search.cuh",
    "chars": 4797,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/thread/thread_store.cuh",
    "chars": 17932,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/util_allocator.cuh",
    "chars": 28716,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/util_arch.cuh",
    "chars": 6768,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/util_debug.cuh",
    "chars": 5061,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/util_device.cuh",
    "chars": 10658,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/util_macro.cuh",
    "chars": 3710,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/util_namespace.cuh",
    "chars": 2100,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/util_ptx.cuh",
    "chars": 21062,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/util_type.cuh",
    "chars": 39450,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/warp/specializations/warp_reduce_shfl.cuh",
    "chars": 21484,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/warp/specializations/warp_reduce_smem.cuh",
    "chars": 14687,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/warp/specializations/warp_scan_shfl.cuh",
    "chars": 26574,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/warp/specializations/warp_scan_smem.cuh",
    "chars": 16153,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/warp/warp_reduce.cuh",
    "chars": 25046,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/cub/warp/warp_scan.cuh",
    "chars": 38915,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/eclipse code style profile.xml",
    "chars": 16543,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n<profiles version=\"1\">\r\n<profile kind=\"CodeFormatterProfile\" nam"
  },
  {
    "path": "external/cub/examples/block/.gitignore",
    "chars": 69,
    "preview": "/bin\n/Debug\n/Release\n/cuda55.sdf\n/cuda55.suo\n/cuda60.sdf\n/cuda60.suo\n"
  },
  {
    "path": "external/cub/examples/block/Makefile",
    "chars": 5255,
    "preview": "#/******************************************************************************\n# * Copyright (c) 2011, Duane Merrill. "
  },
  {
    "path": "external/cub/examples/block/example_block_radix_sort.cu",
    "chars": 10724,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/examples/block/example_block_reduce.cu",
    "chars": 9644,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/examples/block/example_block_scan.cu",
    "chars": 11329,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/examples/block/reduce_by_key.cu",
    "chars": 1554,
    "preview": "\n\n#include <cub/cub.cuh>\n\n\ntemplate <\n    int         BLOCK_THREADS,          ///< Number of CTA threads\n    typename   "
  },
  {
    "path": "external/cub/examples/device/.gitignore",
    "chars": 75,
    "preview": "/bin\n/Debug\n/ipch\n/Release\n/cuda55.sdf\n/cuda55.suo\n/cuda60.sdf\n/cuda60.suo\n"
  },
  {
    "path": "external/cub/examples/device/Makefile",
    "chars": 8830,
    "preview": "#/******************************************************************************\n# * Copyright (c) 2011, Duane Merrill. "
  },
  {
    "path": "external/cub/examples/device/example_device_partition_flagged.cu",
    "chars": 8173,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/examples/device/example_device_partition_if.cu",
    "chars": 8236,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/examples/device/example_device_radix_sort.cu",
    "chars": 8303,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/examples/device/example_device_reduce.cu",
    "chars": 5825,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/examples/device/example_device_scan.cu",
    "chars": 6007,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/examples/device/example_device_select_flagged.cu",
    "chars": 8156,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/examples/device/example_device_select_if.cu",
    "chars": 8210,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/examples/device/example_device_select_unique.cu",
    "chars": 7493,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/examples/device/example_device_sort_find_non_trivial_runs.cu",
    "chars": 13277,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/experimental/.gitignore",
    "chars": 5,
    "preview": "/bin\n"
  },
  {
    "path": "external/cub/experimental/Makefile",
    "chars": 4949,
    "preview": "#/******************************************************************************\n# * Copyright (c) 2011, Duane Merrill. "
  },
  {
    "path": "external/cub/experimental/defunct/example_coo_spmv.cu",
    "chars": 37185,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/experimental/defunct/test_device_seg_reduce.cu",
    "chars": 95317,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/experimental/histogram/histogram_cub.h",
    "chars": 4374,
    "preview": "/******************************************************************************\n * Copyright (c) 2011-2017, NVIDIA CORPO"
  },
  {
    "path": "external/cub/experimental/histogram/histogram_gmem_atomics.h",
    "chars": 6368,
    "preview": "/******************************************************************************\n * Copyright (c) 2011-2017, NVIDIA CORPO"
  },
  {
    "path": "external/cub/experimental/histogram/histogram_smem_atomics.h",
    "chars": 6812,
    "preview": "/******************************************************************************\n * Copyright (c) 2011-2017, NVIDIA CORPO"
  },
  {
    "path": "external/cub/experimental/histogram_compare.cu",
    "chars": 21778,
    "preview": "/******************************************************************************\n * Copyright (c) 2011-2017, NVIDIA CORPO"
  },
  {
    "path": "external/cub/experimental/sparse_matrix.h",
    "chars": 39231,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/experimental/spmv_compare.cu",
    "chars": 30656,
    "preview": "/******************************************************************************\n * Copyright (c) 2011-2017, NVIDIA CORPO"
  },
  {
    "path": "external/cub/experimental/spmv_script.sh",
    "chars": 749,
    "preview": "#!/bin/bash\n\nfor i in 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 20"
  },
  {
    "path": "external/cub/test/.gitignore",
    "chars": 28,
    "preview": "/bin\n/link_main.obj\n/dummy/\n"
  },
  {
    "path": "external/cub/test/Makefile",
    "chars": 18158,
    "preview": "#/******************************************************************************\n# * Copyright (c) 2011, Duane Merrill. "
  },
  {
    "path": "external/cub/test/link_a.cu",
    "chars": 315,
    "preview": "#include <cub/cub.cuh>\n\nvoid a()\n{\n    printf(\"a() called\\n\");\n\n    cub::DoubleBuffer<unsigned int>     d_keys;\n    cub:"
  },
  {
    "path": "external/cub/test/link_b.cu",
    "chars": 315,
    "preview": "#include <cub/cub.cuh>\n\nvoid b()\n{\n    printf(\"b() called\\n\");\n\n    cub::DoubleBuffer<unsigned int>     d_keys;\n    cub:"
  },
  {
    "path": "external/cub/test/link_main.cpp",
    "chars": 113,
    "preview": "#include <stdio.h>\n\nextern void a();\nextern void b();\n\nint main()\n{\n    printf(\"hello world\\n\");\n    return 0;\n}\n"
  },
  {
    "path": "external/cub/test/mersenne.h",
    "chars": 5148,
    "preview": "/*\n A C-program for MT19937, with initialization improved 2002/1/26.\n Coded by Takuji Nishimura and Makoto Matsumoto.\n\n "
  },
  {
    "path": "external/cub/test/test_allocator.cu",
    "chars": 16564,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_block_histogram.cu",
    "chars": 9623,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_block_load_store.cu",
    "chars": 19496,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_block_radix_sort.cu",
    "chars": 25392,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_block_reduce.cu",
    "chars": 26628,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_block_scan.cu",
    "chars": 34810,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_device_histogram.cu",
    "chars": 70313,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_device_radix_sort.cu",
    "chars": 45522,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_device_reduce.cu",
    "chars": 49221,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_device_reduce_by_key.cu",
    "chars": 30363,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_device_run_length_encode.cu",
    "chars": 31192,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_device_scan.cu",
    "chars": 33600,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_device_select_if.cu",
    "chars": 37640,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_device_select_unique.cu",
    "chars": 21518,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_grid_barrier.cu",
    "chars": 5132,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_iterator.cu",
    "chars": 25549,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_util.h",
    "chars": 54220,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_warp_reduce.cu",
    "chars": 27186,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/test/test_warp_scan.cu",
    "chars": 19581,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "external/cub/tune/.gitignore",
    "chars": 5,
    "preview": "/bin\n"
  },
  {
    "path": "external/cub/tune/Makefile",
    "chars": 6388,
    "preview": "#/******************************************************************************\n# * Copyright (c) 2011, Duane Merrill. "
  },
  {
    "path": "external/cub/tune/tune_device_reduce.cu",
    "chars": 27663,
    "preview": "/******************************************************************************\n * Copyright (c) 2011, Duane Merrill.  A"
  },
  {
    "path": "src/bitonicTopK.cuh",
    "chars": 15843,
    "preview": "#pragma once\n\n#include <cuda.h>\n#include <cub/util_allocator.cuh>\n#include <algorithm>\n\n#include \"sharedmem.cuh\"\n\nusing "
  },
  {
    "path": "src/radixSelectTopK.cuh",
    "chars": 17292,
    "preview": "#include <cuda.h>\n#include <cub/device/device_radix_sort.cuh>\n#include <cub/util_allocator.cuh>\n\nusing namespace cub;\nus"
  },
  {
    "path": "src/sharedmem.cuh",
    "chars": 4048,
    "preview": "/*\n * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.\n *\n * Please refer to the NVIDIA end user license ag"
  },
  {
    "path": "src/sortTopK.cuh",
    "chars": 1299,
    "preview": "#include <cuda.h>\n#include <cub/device/device_radix_sort.cuh>\n#include <cub/util_allocator.cuh>\n\nusing namespace std;\nus"
  },
  {
    "path": "src/sortingNetwork.cuh",
    "chars": 26142,
    "preview": "template<typename KeyT>\nstruct SortingNetwork\n{\n  enum { NETWORK_MAX_NUM_ITEMS = 18 };\n\nprivate:\n  static __device__ __f"
  },
  {
    "path": "test/compareTopKAlgorithms.cu",
    "chars": 10001,
    "preview": "#include <cuda.h>\n#include <curand.h>\n#include <cuda_runtime_api.h>\n#include <cub/util_allocator.cuh>\n\n#include <time.h>"
  },
  {
    "path": "test/generateProblems.cuh",
    "chars": 6236,
    "preview": "#pragma once\n\n#include <cstdlib>\n#include <typeinfo>\n#include <cuda.h>\n#include <curand.h>\n#include <cub/device/device_r"
  },
  {
    "path": "test/printFunctions.cuh",
    "chars": 1378,
    "preview": "/* Copyright 2011 Russel Steinbach, Jeffrey Blanchard, Bradley Gordon,\n *   and Toluwaloju Alabi\n *   Licensed under the"
  }
]

About this extraction

This page contains the full source code of the anilshanbhag/gpu-topk GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 165 files (3.0 MB), approximately 801.3k tokens, and a symbol index with 37 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!